Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 apic updates from Ingo Molnar: "Janitorial changes: removal of an unused function plus __init annotations" * 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic: Make arch_init_msi/htirq_domain __init x86/apic: Make init_legacy_irqs() __init x86/ioapic: Remove unused IO_APIC_irq_trigger() function

commit: 2a275382a4f78f386db3473d9bb9987580de3cf4 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Jul 03 13:36:23 2017 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Jul 03 13:36:23 2017 -0700
tree: 01948bada631efa9a34ac219f4f77143fd3e076a
parent: 59b60185b4a1adc46b115291dc34af2186cc9678 [diff]
parent: 538ac46c64a6bc61e71982091fc1eef0026f322e [diff]
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index da5c087..c3c7055 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl

@@ -819,7 +819,7 @@
    certain condition is true.  They must be used carefully to ensure
    there is no race condition.  You declare a
    <type>wait_queue_head_t</type>, and then processes which want to
-   wait for that condition declare a <type>wait_queue_t</type>
+   wait for that condition declare a <type>wait_queue_entry_t</type>
    referring to themselves, and place that in the queue.
   </para>
 

diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 1672573..f46980c 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX

@@ -28,8 +28,6 @@
 	- RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
 torture.txt
 	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
-trace.txt
-	- CONFIG_RCU_TRACE debugfs files and formats
 UP.txt
 	- RCU on Uniprocessor Systems
 whatisRCU.txt

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index f60adf1..95b30fa 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html

@@ -559,9 +559,7 @@
 	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
 	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
 	the above optimizations are harmless.
-	However,
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-	<tt>sparse</tt> will complain if you
+	However, <tt>sparse</tt> will complain if you
 	define <tt>gp</tt> with <tt>__rcu</tt> and then
 	access it without using
 	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
@@ -1849,7 +1847,8 @@
 If the nesting is not visible to the compiler, as is the case with
 mutually recursive functions each in its own translation unit,
 stack overflow will result.
-If the nesting takes the form of loops, either the control variable
+If the nesting takes the form of loops, perhaps in the guise of tail
+recursion, either the control variable
 will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
 Nevertheless, this class of RCU implementations is one
 of the most composable constructs in existence.
@@ -1977,9 +1976,8 @@
 	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
 	substituting a simple assignment.
 	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which running sparse
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
-	about simple-assignment accesses to that pointer.
+	tagged with <tt>__rcu</tt>, after which sparse
+	will complain about simple-assignment accesses to that pointer.
 	Arnd Bergmann made me aware of this requirement, and also
 	supplied the needed
 	<a href="https://lwn.net/Articles/376011/">patch series</a>.
@@ -2036,7 +2034,7 @@
 	some other synchronization mechanism, for example, reference
 	counting.
 <li>	In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
-	information is provided via both debugfs and event tracing.
+	information is provided via event tracing.
 <li>	Open-coded use of <tt>rcu_assign_pointer()</tt> and
 	<tt>rcu_dereference()</tt> to create typical linked
 	data structures can be surprisingly error-prone.
@@ -2519,11 +2517,7 @@
 <tt>nohz_full</tt> CPU running in userspace.
 RCU must therefore track <tt>nohz_full</tt> userspace
 execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
+RCU must therefore be able to sample state at two points in
 time, and be able to determine whether or not some other CPU spent
 any time idle and/or executing in userspace.
 
@@ -2936,6 +2930,20 @@
 need not exclude CPU-hotplug operations.
 
 <p>
+SRCU also differs from other RCU flavors in that SRCU's expedited and
+non-expedited grace periods are implemented by the same mechanism.
+This means that in the current SRCU implementation, expediting a
+future grace period has the side effect of expediting all prior
+grace periods that have not yet completed.
+(But please note that this is a property of the current implementation,
+not necessarily of future implementations.)
+In addition, if SRCU has been idle for longer than the interval
+specified by the <tt>srcutree.exp_holdoff</tt> kernel boot parameter
+(25&nbsp;microseconds by default),
+and if a <tt>synchronize_srcu()</tt> invocation ends this idle period,
+that invocation will be automatically expedited.
+
+<p>
 As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
 a locking bottleneck present in prior kernel versions.
 Although this will allow users to put much heavier stress on

diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 8779471..6beda55 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt

@@ -413,11 +413,11 @@
 	read-side critical sections.  It is the responsibility of the
 	RCU update-side primitives to deal with this.
 
-17.	Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
-	__rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to
-	validate your RCU code.  These can help find problems as follows:
+17.	Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+	__rcu sparse checks to validate your RCU code.	These can help
+	find problems as follows:
 
-	CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+	CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
 		structures are carried out under the proper RCU
 		read-side critical section, while holding the right
 		combination of locks, or whatever other conditions

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
deleted file mode 100644
index 6549012..0000000
--- a/Documentation/RCU/trace.txt
+++ /dev/null

@@ -1,535 +0,0 @@
-CONFIG_RCU_TRACE debugfs Files and Formats
-
-
-The rcutree and rcutiny implementations of RCU provide debugfs trace
-output that summarizes counters and state.  This information is useful for
-debugging RCU itself, and can sometimes also help to debug abuses of RCU.
-The following sections describe the debugfs files and formats, first
-for rcutree and next for rcutiny.
-
-
-CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats
-
-These implementations of RCU provide several debugfs directories under the
-top-level directory "rcu":
-
-rcu/rcu_bh
-rcu/rcu_preempt
-rcu/rcu_sched
-
-Each directory contains files for the corresponding flavor of RCU.
-Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU.
-For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
-so that activity for both appears in rcu/rcu_sched.
-
-In addition, the following file appears in the top-level directory:
-rcu/rcutorture.  This file displays rcutorture test progress.  The output
-of "cat rcu/rcutorture" looks as follows:
-
-rcutorture test sequence: 0 (test in progress)
-rcutorture update version number: 615
-
-The first line shows the number of rcutorture tests that have completed
-since boot.  If a test is currently running, the "(test in progress)"
-string will appear as shown above.  The second line shows the number of
-update cycles that the current test has started, or zero if there is
-no test in progress.
-
-
-Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly
-also rcu/rcu_preempt) the following files will be present:
-
-rcudata:
-	Displays fields in struct rcu_data.
-rcuexp:
-	Displays statistics for expedited grace periods.
-rcugp:
-	Displays grace-period counters.
-rcuhier:
-	Displays the struct rcu_node hierarchy.
-rcu_pending:
-	Displays counts of the reasons rcu_pending() decided that RCU had
-	work to do.
-rcuboost:
-	Displays RCU boosting statistics.  Only present if
-	CONFIG_RCU_BOOST=y.
-
-The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
-
-  0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
-  1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
-  2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
-  3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
-  4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
-  5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
-  6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
-  7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
-
-This file has one line per CPU, or eight for this 8-CPU system.
-The fields are as follows:
-
-o	The number at the beginning of each line is the CPU number.
-	CPUs numbers followed by an exclamation mark are offline,
-	but have been online at least once since boot.	There will be
-	no output for CPUs that have never been online, which can be
-	a good thing in the surprisingly common case where NR_CPUS is
-	substantially larger than the number of actual CPUs.
-
-o	"c" is the count of grace periods that this CPU believes have
-	completed.  Offlined CPUs and CPUs in dynticks idle mode may lag
-	quite a ways behind, for example, CPU 4 under "rcu_sched" above,
-	which has been offline through 16 RCU grace periods.  It is not
-	unusual to see offline CPUs lagging by thousands of grace periods.
-	Note that although the grace-period number is an unsigned long,
-	it is printed out as a signed long to allow more human-friendly
-	representation near boot time.
-
-o	"g" is the count of grace periods that this CPU believes have
-	started.  Again, offlined CPUs and CPUs in dynticks idle mode
-	may lag behind.  If the "c" and "g" values are equal, this CPU
-	has already reported a quiescent state for the last RCU grace
-	period that it is aware of, otherwise, the CPU believes that it
-	owes RCU a quiescent state.
-
-o	"pq" indicates that this CPU has passed through a quiescent state
-	for the current grace period.  It is possible for "pq" to be
-	"1" and "c" different than "g", which indicates that although
-	the CPU has passed through a quiescent state, either (1) this
-	CPU has not yet reported that fact, (2) some other CPU has not
-	yet reported for this grace period, or (3) both.
-
-o	"qp" indicates that RCU still expects a quiescent state from
-	this CPU.  Offlined CPUs and CPUs in dyntick idle mode might
-	well have qp=1, which is OK: RCU is still ignoring them.
-
-o	"dt" is the current value of the dyntick counter that is incremented
-	when entering or leaving idle, either due to a context switch or
-	due to an interrupt.  This number is even if the CPU is in idle
-	from RCU's viewpoint and odd otherwise.  The number after the
-	first "/" is the interrupt nesting depth when in idle state,
-	or a large number added to the interrupt-nesting depth when
-	running a non-idle task.  Some architectures do not accurately
-	count interrupt nesting when running in non-idle kernel context,
-	which can result in interesting anomalies such as negative
-	interrupt-nesting levels.  The number after the second "/"
-	is the NMI nesting depth.
-
-o	"df" is the number of times that some other CPU has forced a
-	quiescent state on behalf of this CPU due to this CPU being in
-	idle state.
-
-o	"of" is the number of times that some other CPU has forced a
-	quiescent state on behalf of this CPU due to this CPU being
-	offline.  In a perfect world, this might never happen, but it
-	turns out that offlining and onlining a CPU can take several grace
-	periods, and so there is likely to be an extended period of time
-	when RCU believes that the CPU is online when it really is not.
-	Please note that erring in the other direction (RCU believing a
-	CPU is offline when it is really alive and kicking) is a fatal
-	error, so it makes sense to err conservatively.
-
-o	"ql" is the number of RCU callbacks currently residing on
-	this CPU.  The first number is the number of "lazy" callbacks
-	that are known to RCU to only be freeing memory, and the number
-	after the "/" is the total number of callbacks, lazy or not.
-	These counters count callbacks regardless of what phase of
-	grace-period processing that they are in (new, waiting for
-	grace period to start, waiting for grace period to end, ready
-	to invoke).
-
-o	"qs" gives an indication of the state of the callback queue
-	with four characters:
-
-	"N"	Indicates that there are callbacks queued that are not
-		ready to be handled by the next grace period, and thus
-		will be handled by the grace period following the next
-		one.
-
-	"R"	Indicates that there are callbacks queued that are
-		ready to be handled by the next grace period.
-
-	"W"	Indicates that there are callbacks queued that are
-		waiting on the current grace period.
-
-	"D"	Indicates that there are callbacks queued that have
-		already been handled by a prior grace period, and are
-		thus waiting to be invoked.  Note that callbacks in
-		the process of being invoked are not counted here.
-		Callbacks in the process of being invoked are those
-		that have been removed from the rcu_data structures
-		queues by rcu_do_batch(), but which have not yet been
-		invoked.
-
-	If there are no callbacks in a given one of the above states,
-	the corresponding character is replaced by ".".
-
-o	"b" is the batch limit for this CPU.  If more than this number
-	of RCU callbacks is ready to invoke, then the remainder will
-	be deferred.
-
-o	"ci" is the number of RCU callbacks that have been invoked for
-	this CPU.  Note that ci+nci+ql is the number of callbacks that have
-	been registered in absence of CPU-hotplug activity.
-
-o	"nci" is the number of RCU callbacks that have been offloaded from
-	this CPU.  This will always be zero unless the kernel was built
-	with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot
-	parameter was specified.
-
-o	"co" is the number of RCU callbacks that have been orphaned due to
-	this CPU going offline.  These orphaned callbacks have been moved
-	to an arbitrarily chosen online CPU.
-
-o	"ca" is the number of RCU callbacks that have been adopted by this
-	CPU due to other CPUs going offline.  Note that ci+co-ca+ql is
-	the number of RCU callbacks registered on this CPU.
-
-
-Kernels compiled with CONFIG_RCU_BOOST=y display the following from
-/debug/rcu/rcu_preempt/rcudata:
-
-  0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
-  1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
-  2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
-  3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
-  4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
-  5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
-  6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
-  7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
-
-This is similar to the output discussed above, but contains the following
-additional fields:
-
-o	"kt" is the per-CPU kernel-thread state.  The digit preceding
-	the first slash is zero if there is no work pending and 1
-	otherwise.  The character between the first pair of slashes is
-	as follows:
-
-	"S"	The kernel thread is stopped, in other words, all
-		CPUs corresponding to this rcu_node structure are
-		offline.
-
-	"R"	The kernel thread is running.
-
-	"W"	The kernel thread is waiting because there is no work
-		for it to do.
-
-	"O"	The kernel thread is waiting because it has been
-		forced off of its designated CPU or because its
-		->cpus_allowed mask permits it to run on other than
-		its designated CPU.
-
-	"Y"	The kernel thread is yielding to avoid hogging CPU.
-
-	"?"	Unknown value, indicates a bug.
-
-	The number after the final slash is the CPU that the kthread
-	is actually running on.
-
-	This field is displayed only for CONFIG_RCU_BOOST kernels.
-
-o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of
-	the number of times that this CPU's per-CPU kthread has gone
-	through its loop servicing invoke_rcu_cpu_kthread() requests.
-
-	This field is displayed only for CONFIG_RCU_BOOST kernels.
-
-
-The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
-
-s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872
-
-These fields are as follows:
-
-o	"s" is the sequence number, with an odd number indicating that
-	an expedited grace period is in progress.
-
-o	"wd1", "wd2", and "wd3" are the number of times that an attempt
-	to start an expedited grace period found that someone else had
-	completed an expedited grace period that satisfies the attempted
-	request.  "Our work is done."
-
-o	"enq" is the number of quiescent states still outstanding.
-
-o	"sc" is the number of times that the attempt to start a
-	new expedited grace period succeeded.
-
-
-The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
-
-completed=31249  gpnum=31250  age=1  max=18
-
-These fields are taken from the rcu_state structure, and are as follows:
-
-o	"completed" is the number of grace periods that have completed.
-	It is comparable to the "c" field from rcu/rcudata in that a
-	CPU whose "c" field matches the value of "completed" is aware
-	that the corresponding RCU grace period has completed.
-
-o	"gpnum" is the number of grace periods that have started.  It is
-	similarly comparable to the "g" field from rcu/rcudata in that
-	a CPU whose "g" field matches the value of "gpnum" is aware that
-	the corresponding RCU grace period has started.
-
-	If these two fields are equal, then there is no grace period
-	in progress, in other words, RCU is idle.  On the other hand,
-	if the two fields differ (as they are above), then an RCU grace
-	period is in progress.
-
-o	"age" is the number of jiffies that the current grace period
-	has extended for, or zero if there is no grace period currently
-	in effect.
-
-o	"max" is the age in jiffies of the longest-duration grace period
-	thus far.
-
-The output of "cat rcu/rcu_preempt/rcuhier" looks as follows:
-
-c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0
-3/3 ..>. 0:7 ^0
-e/e ..>. 0:3 ^0    d/d ..>. 4:7 ^1
-
-The fields are as follows:
-
-o	"c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp.
-
-o	"g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp.
-
-o	"s" is the current state of the force_quiescent_state()
-	state machine.
-
-o	"jfq" is the number of jiffies remaining for this grace period
-	before force_quiescent_state() is invoked to help push things
-	along.	Note that CPUs in idle mode throughout the grace period
-	will not report on their own, but rather must be check by some
-	other CPU via force_quiescent_state().
-
-o	"j" is the low-order four hex digits of the jiffies counter.
-	Yes, Paul did run into a number of problems that turned out to
-	be due to the jiffies counter no longer counting.  Why do you ask?
-
-o	"nfqs" is the number of calls to force_quiescent_state() since
-	boot.
-
-o	"nfqsng" is the number of useless calls to force_quiescent_state(),
-	where there wasn't actually a grace period active.  This can
-	no longer happen due to grace-period processing being pushed
-	into a kthread.  The number in parentheses is the difference
-	between "nfqs" and "nfqsng", or the number of times that
-	force_quiescent_state() actually did some real work.
-
-o	"fqlh" is the number of calls to force_quiescent_state() that
-	exited immediately (without even being counted in nfqs above)
-	due to contention on ->fqslock.
-
-o	Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
-	structure.  Each line represents one level of the hierarchy,
-	from root to leaves.  It is best to think of the rcu_data
-	structures as forming yet another level after the leaves.
-	Note that there might be either one, two, three, or even four
-	levels of rcu_node structures, depending on the relationship
-	between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly
-	adjusted using the rcu_fanout_leaf kernel boot parameter), and
-	CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of
-	possible CPUs for the booting hardware).
-
-	o	The numbers separated by the "/" are the qsmask followed
-		by the qsmaskinit.  The qsmask will have one bit
-		set for each entity in the next lower level that has
-		not yet checked in for the current grace period ("e"
-		indicating CPUs 5, 6, and 7 in the example above).
-		The qsmaskinit will have one bit for each entity that is
-		currently expected to check in during each grace period.
-		The value of qsmaskinit is assigned to that of qsmask
-		at the beginning of each grace period.
-
-	o	The characters separated by the ">" indicate the state
-		of the blocked-tasks lists.  A "G" preceding the ">"
-		indicates that at least one task blocked in an RCU
-		read-side critical section blocks the current grace
-		period, while a "E" preceding the ">" indicates that
-		at least one task blocked in an RCU read-side critical
-		section blocks the current expedited grace period.
-		A "T" character following the ">" indicates that at
-		least one task is blocked within an RCU read-side
-		critical section, regardless of whether any current
-		grace period (expedited or normal) is inconvenienced.
-		A "." character appears if the corresponding condition
-		does not hold, so that "..>." indicates that no tasks
-		are blocked.  In contrast, "GE>T" indicates maximal
-		inconvenience from blocked tasks.  CONFIG_TREE_RCU
-		builds of the kernel will always show "..>.".
-
-	o	The numbers separated by the ":" are the range of CPUs
-		served by this struct rcu_node.  This can be helpful
-		in working out how the hierarchy is wired together.
-
-		For example, the example rcu_node structure shown above
-		has "0:7", indicating that it covers CPUs 0 through 7.
-
-	o	The number after the "^" indicates the bit in the
-		next higher level rcu_node structure that this rcu_node
-		structure corresponds to.  For example, the "d/d ..>. 4:7
-		^1" has a "1" in this position, indicating that it
-		corresponds to the "1" bit in the "3" shown in the
-		"3/3 ..>. 0:7 ^0" entry on the next level up.
-
-
-The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
-
-  0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
-  1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
-  2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
-  3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
-  4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
-  5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
-  6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
-  7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
-
-The fields are as follows:
-
-o	The leading number is the CPU number, with "!" indicating
-	an offline CPU.
-
-o	"np" is the number of times that __rcu_pending() has been invoked
-	for the corresponding flavor of RCU.
-
-o	"qsp" is the number of times that the RCU was waiting for a
-	quiescent state from this CPU.
-
-o	"rpq" is the number of times that the CPU had passed through
-	a quiescent state, but not yet reported it to RCU.
-
-o	"cbr" is the number of times that this CPU had RCU callbacks
-	that had passed through a grace period, and were thus ready
-	to be invoked.
-
-o	"cng" is the number of times that this CPU needed another
-	grace period while RCU was idle.
-
-o	"gpc" is the number of times that an old grace period had
-	completed, but this CPU was not yet aware of it.
-
-o	"gps" is the number of times that a new grace period had started,
-	but this CPU was not yet aware of it.
-
-o	"ndw" is the number of times that a wakeup of an rcuo
-	callback-offload kthread had to be deferred in order to avoid
-	deadlock.
-
-o	"nn" is the number of times that this CPU needed nothing.
-
-
-The output of "cat rcu/rcuboost" looks as follows:
-
-0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
-    balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0
-4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
-    balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0
-
-This information is output only for rcu_preempt.  Each two-line entry
-corresponds to a leaf rcu_node structure.  The fields are as follows:
-
-o	"n:m" is the CPU-number range for the corresponding two-line
-	entry.  In the sample output above, the first entry covers
-	CPUs zero through three and the second entry covers CPUs four
-	through seven.
-
-o	"tasks=TNEB" gives the state of the various segments of the
-	rnp->blocked_tasks list:
-
-	"T"	This indicates that there are some tasks that blocked
-		while running on one of the corresponding CPUs while
-		in an RCU read-side critical section.
-
-	"N"	This indicates that some of the blocked tasks are preventing
-		the current normal (non-expedited) grace period from
-		completing.
-
-	"E"	This indicates that some of the blocked tasks are preventing
-		the current expedited grace period from completing.
-
-	"B"	This indicates that some of the blocked tasks are in
-		need of RCU priority boosting.
-
-	Each character is replaced with "." if the corresponding
-	condition does not hold.
-
-o	"kt" is the state of the RCU priority-boosting kernel
-	thread associated with the corresponding rcu_node structure.
-	The state can be one of the following:
-
-	"S"	The kernel thread is stopped, in other words, all
-		CPUs corresponding to this rcu_node structure are
-		offline.
-
-	"R"	The kernel thread is running.
-
-	"W"	The kernel thread is waiting because there is no work
-		for it to do.
-
-	"Y"	The kernel thread is yielding to avoid hogging CPU.
-
-	"?"	Unknown value, indicates a bug.
-
-o	"ntb" is the number of tasks boosted.
-
-o	"neb" is the number of tasks boosted in order to complete an
-	expedited grace period.
-
-o	"nnb" is the number of tasks boosted in order to complete a
-	normal (non-expedited) grace period.  When boosting a task
-	that was blocking both an expedited and a normal grace period,
-	it is counted against the expedited total above.
-
-o	"j" is the low-order 16 bits of the jiffies counter in
-	hexadecimal.
-
-o	"bt" is the low-order 16 bits of the value that the jiffies
-	counter will have when we next start boosting, assuming that
-	the current grace period does not end beforehand.  This is
-	also in hexadecimal.
-
-o	"balk: nt" counts the number of times we didn't boost (in
-	other words, we balked) even though it was time to boost because
-	there were no blocked tasks to boost.  This situation occurs
-	when there is one blocked task on one rcu_node structure and
-	none on some other rcu_node structure.
-
-o	"egt" counts the number of times we balked because although
-	there were blocked tasks, none of them were blocking the
-	current grace period, whether expedited or otherwise.
-
-o	"bt" counts the number of times we balked because boosting
-	had already been initiated for the current grace period.
-
-o	"nb" counts the number of times we balked because there
-	was at least one task blocking the current non-expedited grace
-	period that never had blocked.  If it is already running, it
-	just won't help to boost its priority!
-
-o	"ny" counts the number of times we balked because it was
-	not yet time to start boosting.
-
-o	"nos" counts the number of times we balked for other
-	reasons, e.g., the grace period ended first.
-
-
-CONFIG_TINY_RCU debugfs Files and Formats
-
-These implementations of RCU provides a single debugfs file under the
-top-level directory RCU, namely rcu/rcudata, which displays fields in
-rcu_bh_ctrlblk and rcu_sched_ctrlblk.
-
-The output of "cat rcu/rcudata" is as follows:
-
-rcu_sched: qlen: 0
-rcu_bh: qlen: 0
-
-This is split into rcu_sched and rcu_bh sections.  The field is as
-follows:
-
-o	"qlen" is the number of RCU callbacks currently waiting either
-	for an RCU grace period or waiting to be invoked.  This is the
-	only field present for rcu_sched and rcu_bh, due to the
-	short-circuiting of grace period in those two cases.

diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
index 22cb309..effe7af 100644
--- a/Documentation/acpi/acpi-lid.txt
+++ b/Documentation/acpi/acpi-lid.txt

@@ -59,20 +59,28 @@
 If the userspace hasn't been prepared to ignore the unreliable "opened"
 events and the unreliable initial state notification, Linux users can use
 the following kernel parameters to handle the possible issues:
-A. button.lid_init_state=open:
+A. button.lid_init_state=method:
+   When this option is specified, the ACPI button driver reports the
+   initial lid state using the returning value of the _LID control method
+   and whether the "opened"/"closed" events are paired fully relies on the
+   firmware implementation.
+   This option can be used to fix some platforms where the returning value
+   of the _LID control method is reliable but the initial lid state
+   notification is missing.
+   This option is the default behavior during the period the userspace
+   isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
    When this option is specified, the ACPI button driver always reports the
    initial lid state as "opened" and whether the "opened"/"closed" events
    are paired fully relies on the firmware implementation.
    This may fix some platforms where the returning value of the _LID
    control method is not reliable and the initial lid state notification is
    missing.
-   This option is the default behavior during the period the userspace
-   isn't ready to handle the buggy AML tables.
 
 If the userspace has been prepared to ignore the unreliable "opened" events
 and the unreliable initial state notification, Linux users should always
 use the following kernel parameter:
-B. button.lid_init_state=ignore:
+C. button.lid_init_state=ignore:
    When this option is specified, the ACPI button driver never reports the
    initial lid state and there is a compensation mechanism implemented to
    ensure that the reliable "closed" notifications can always be delievered

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 15f79c2..f59aad5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt

@@ -866,6 +866,15 @@
 
 	dscc4.setup=	[NET]
 
+	dt_cpu_ftrs=	[PPC]
+			Format: {"off" | "known"}
+			Control how the dt_cpu_ftrs device-tree binding is
+			used for CPU feature discovery and setup (if it
+			exists).
+			off: Do not use it, fall back to legacy cpu table.
+			known: Do not pass through unknown features to guests
+			or userspace, only those that the kernel is aware of.
+
 	dump_apple_properties	[X86]
 			Dump name and content of EFI device properties on
 			x86 Macs.  Useful for driver authors to determine
@@ -3229,21 +3238,17 @@
 
 	rcutree.gp_cleanup_delay=	[KNL]
 			Set the number of jiffies to delay each step of
-			RCU grace-period cleanup.  This only has effect
-			when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
+			RCU grace-period cleanup.
 
 	rcutree.gp_init_delay=	[KNL]
 			Set the number of jiffies to delay each step of
-			RCU grace-period initialization.  This only has
-			effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
-			is set.
+			RCU grace-period initialization.
 
 	rcutree.gp_preinit_delay=	[KNL]
 			Set the number of jiffies to delay each step of
 			RCU grace-period pre-initialization, that is,
 			the propagation of recent CPU-hotplug changes up
-			the rcu_node combining tree.  This only has effect
-			when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
+			the rcu_node combining tree.
 
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
@@ -3319,6 +3324,17 @@
 			This wake_up() will be accompanied by a
 			WARN_ONCE() splat and an ftrace_dump().
 
+	rcuperf.gp_async= [KNL]
+			Measure performance of asynchronous
+			grace-period primitives such as call_rcu().
+
+	rcuperf.gp_async_max= [KNL]
+			Specify the maximum number of outstanding
+			callbacks per writer thread.  When a writer
+			thread exceeds this limit, it invokes the
+			corresponding flavor of rcu_barrier() to allow
+			previously posted callbacks to drain.
+
 	rcuperf.gp_exp= [KNL]
 			Measure performance of expedited synchronous
 			grace-period primitives.
@@ -3346,17 +3362,22 @@
 	rcuperf.perf_runnable= [BOOT]
 			Start rcuperf running at boot time.
 
+	rcuperf.perf_type= [KNL]
+			Specify the RCU implementation to test.
+
 	rcuperf.shutdown= [KNL]
 			Shut the system down after performance tests
 			complete.  This is useful for hands-off automated
 			testing.
 
-	rcuperf.perf_type= [KNL]
-			Specify the RCU implementation to test.
-
 	rcuperf.verbose= [KNL]
 			Enable additional printk() statements.
 
+	rcuperf.writer_holdoff= [KNL]
+			Write-side holdoff between grace periods,
+			in microseconds.  The default of zero says
+			no holdoff.
+
 	rcutorture.cbflood_inter_holdoff= [KNL]
 			Set holdoff time (jiffies) between successive
 			callback-flood tests.
@@ -3794,6 +3815,15 @@
 	spia_pedr=
 	spia_peddr=
 
+	srcutree.counter_wrap_check [KNL]
+			Specifies how frequently to check for
+			grace-period sequence counter wrap for the
+			srcu_data structure's ->srcu_gp_seq_needed field.
+			The greater the number of bits set in this kernel
+			parameter, the less frequently counter wrap will
+			be checked for.  Note that the bottom two bits
+			are ignored.
+
 	srcutree.exp_holdoff [KNL]
 			Specifies how many nanoseconds must elapse
 			since the end of the last SRCU grace period for
@@ -3802,6 +3832,13 @@
 			expediting.  Set to zero to disable automatic
 			expediting.
 
+	stack_guard_gap=	[MM]
+			override the default stack gap protection. The value
+			is in page units and it defines how many pages prior
+			to (for stacks growing down) resp. after (for stacks
+			growing up) the main stack are reserved for no other
+			mapping. Default value is 256 pages.
+
 	stacktrace	[FTRACE]
 			Enabled the stack tracer on boot up.
 

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 289c80f..09aa2e9 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst

@@ -1,4 +1,5 @@
 .. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
+.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
 
 =======================
 CPU Performance Scaling
@@ -75,7 +76,7 @@
 interface it comes from and may not be easily represented in an abstract,
 platform-independent way.  For this reason, ``CPUFreq`` allows scaling drivers
 to bypass the governor layer and implement their own performance scaling
-algorithms.  That is done by the ``intel_pstate`` scaling driver.
+algorithms.  That is done by the |intel_pstate| scaling driver.
 
 
 ``CPUFreq`` Policy Objects
@@ -174,13 +175,13 @@
 into account.  That is achieved by invoking the governor's ``->stop`` and
 ``->start()`` callbacks, in this order, for the entire policy.
 
-As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
+As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
 governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
-Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
+Consequently, if |intel_pstate| is used, scaling governors are not attached to
 new policy objects.  Instead, the driver's ``->setpolicy()`` callback is invoked
 to register per-CPU utilization update callbacks for each policy.  These
 callbacks are invoked by the CPU scheduler in the same way as for scaling
-governors, but in the ``intel_pstate`` case they both determine the P-state to
+governors, but in the |intel_pstate| case they both determine the P-state to
 use and change the hardware configuration accordingly in one go from scheduler
 context.
 
@@ -257,7 +258,7 @@
 
 ``scaling_available_governors``
 	List of ``CPUFreq`` scaling governors present in the kernel that can
-	be attached to this policy or (if the ``intel_pstate`` scaling driver is
+	be attached to this policy or (if the |intel_pstate| scaling driver is
 	in use) list of scaling algorithms provided by the driver that can be
 	applied to this policy.
 
@@ -274,7 +275,7 @@
 	the CPU is actually running at (due to hardware design and other
 	limitations).
 
-	Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
+	Some scaling drivers (e.g. |intel_pstate|) attempt to provide
 	information more precisely reflecting the current CPU frequency through
 	this attribute, but that still may not be the exact current CPU
 	frequency as seen by the hardware at the moment.
@@ -284,13 +285,13 @@
 
 ``scaling_governor``
 	The scaling governor currently attached to this policy or (if the
-	``intel_pstate`` scaling driver is in use) the scaling algorithm
+	|intel_pstate| scaling driver is in use) the scaling algorithm
 	provided by the driver that is currently applied to this policy.
 
 	This attribute is read-write and writing to it will cause a new scaling
 	governor to be attached to this policy or a new scaling algorithm
 	provided by the scaling driver to be applied to it (in the
-	``intel_pstate`` case), as indicated by the string written to this
+	|intel_pstate| case), as indicated by the string written to this
 	attribute (which must be one of the names listed by the
 	``scaling_available_governors`` attribute described above).
 
@@ -619,7 +620,7 @@
 the "boost" setting for the whole system.  It is not present if the underlying
 scaling driver does not support the frequency boost mechanism (or supports it,
 but provides a driver-specific interface for controlling it, like
-``intel_pstate``).
+|intel_pstate|).
 
 If the value in this file is 1, the frequency boost mechanism is enabled.  This
 means that either the hardware can be put into states in which it is able to

diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst
index c80f087..7f148f7 100644
--- a/Documentation/admin-guide/pm/index.rst
+++ b/Documentation/admin-guide/pm/index.rst

@@ -6,6 +6,7 @@
    :maxdepth: 2
 
    cpufreq
+   intel_pstate
 
 .. only::  subproject and html
 

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
new file mode 100644
index 0000000..33d7039
--- /dev/null
+++ b/Documentation/admin-guide/pm/intel_pstate.rst

@@ -0,0 +1,755 @@
+===============================================
+``intel_pstate`` CPU Performance Scaling Driver
+===============================================
+
+::
+
+ Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+General Information
+===================
+
+``intel_pstate`` is a part of the
+:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
+(``CPUFreq``).  It is a scaling driver for the Sandy Bridge and later
+generations of Intel processors.  Note, however, that some of those processors
+may not be supported.  [To understand ``intel_pstate`` it is necessary to know
+how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
+you have not done that yet.]
+
+For the processors supported by ``intel_pstate``, the P-state concept is broader
+than just an operating frequency or an operating performance point (see the
+`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
+information about that).  For this reason, the representation of P-states used
+by ``intel_pstate`` internally follows the hardware specification (for details
+refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
+Volume 3: System Programming Guide <SDM_>`_).  However, the ``CPUFreq`` core
+uses frequencies for identifying operating performance points of CPUs and
+frequencies are involved in the user space interface exposed by it, so
+``intel_pstate`` maps its internal representation of P-states to frequencies too
+(fortunately, that mapping is unambiguous).  At the same time, it would not be
+practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
+available frequencies due to the possible size of it, so the driver does not do
+that.  Some functionality of the core is limited by that.
+
+Since the hardware P-state selection interface used by ``intel_pstate`` is
+available at the logical CPU level, the driver always works with individual
+CPUs.  Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
+object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
+equivalent to CPUs.  In particular, this means that they become "inactive" every
+time the corresponding CPU is taken offline and need to be re-initialized when
+it goes back online.
+
+``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
+only way to pass early-configuration-time parameters to it is via the kernel
+command line.  However, its configuration can be adjusted via ``sysfs`` to a
+great extent.  In some configurations it even is possible to unregister it via
+``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
+registered (see `below <status_attr_>`_).
+
+
+Operation Modes
+===============
+
+``intel_pstate`` can operate in three different modes: in the active mode with
+or without hardware-managed P-states support and in the passive mode.  Which of
+them will be in effect depends on what kernel command line options are used and
+on the capabilities of the processor.
+
+Active Mode
+-----------
+
+This is the default operation mode of ``intel_pstate``.  If it works in this
+mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
+policies contains the string "intel_pstate".
+
+In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
+provides its own scaling algorithms for P-state selection.  Those algorithms
+can be applied to ``CPUFreq`` policies in the same way as generic scaling
+governors (that is, through the ``scaling_governor`` policy attribute in
+``sysfs``).  [Note that different P-state selection algorithms may be chosen for
+different policies, but that is not recommended.]
+
+They are not generic scaling governors, but their names are the same as the
+names of some of those governors.  Moreover, confusingly enough, they generally
+do not work in the same way as the generic governors they share the names with.
+For example, the ``powersave`` P-state selection algorithm provided by
+``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
+(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
+
+There are two P-state selection algorithms provided by ``intel_pstate`` in the
+active mode: ``powersave`` and ``performance``.  The way they both operate
+depends on whether or not the hardware-managed P-states (HWP) feature has been
+enabled in the processor and possibly on the processor model.
+
+Which of the P-state selection algorithms is used by default depends on the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
+Namely, if that option is set, the ``performance`` algorithm will be used by
+default, and the other one will be used by default if it is not set.
+
+Active Mode With HWP
+~~~~~~~~~~~~~~~~~~~~
+
+If the processor supports the HWP feature, it will be enabled during the
+processor initialization and cannot be disabled after that.  It is possible
+to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
+kernel in the command line.
+
+If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
+select P-states by itself, but still it can give hints to the processor's
+internal P-state selection logic.  What those hints are depends on which P-state
+selection algorithm has been applied to the given policy (or to the CPU it
+corresponds to).
+
+Even though the P-state selection is carried out by the processor automatically,
+``intel_pstate`` registers utilization update callbacks with the CPU scheduler
+in this mode.  However, they are not used for running a P-state selection
+algorithm, but for periodic updates of the current CPU frequency information to
+be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
+
+HWP + ``performance``
+.....................
+
+In this configuration ``intel_pstate`` will write 0 to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
+internal P-state selection logic is expected to focus entirely on performance.
+
+This will override the EPP/EPB setting coming from the ``sysfs`` interface
+(see `Energy vs Performance Hints`_ below).
+
+Also, in this configuration the range of P-states available to the processor's
+internal P-state selection logic is always restricted to the upper boundary
+(that is, the maximum P-state that the driver is allowed to use).
+
+HWP + ``powersave``
+...................
+
+In this configuration ``intel_pstate`` will set the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
+previously set to via ``sysfs`` (or whatever default value it was
+set to by the platform firmware).  This usually causes the processor's
+internal P-state selection logic to be less performance-focused.
+
+Active Mode Without HWP
+~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the default operation mode for processors that do not support the HWP
+feature.  It also is used by default with the ``intel_pstate=no_hwp`` argument
+in the kernel command line.  However, in this mode ``intel_pstate`` may refuse
+to work with the given processor if it does not recognize it.  [Note that
+``intel_pstate`` will never refuse to work with any processor with the HWP
+feature enabled.]
+
+In this mode ``intel_pstate`` registers utilization update callbacks with the
+CPU scheduler in order to run a P-state selection algorithm, either
+``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
+setting in ``sysfs``.  The current CPU frequency information to be made
+available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
+periodically updated by those utilization update callbacks too.
+
+``performance``
+...............
+
+Without HWP, this P-state selection algorithm is always the same regardless of
+the processor model and platform configuration.
+
+It selects the maximum P-state it is allowed to use, subject to limits set via
+``sysfs``, every time the P-state selection computations are carried out by the
+driver's utilization update callback for the given CPU (that does not happen
+more often than every 10 ms), but the hardware configuration will not be changed
+if the new P-state is the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is set.
+
+``powersave``
+.............
+
+Without HWP, this P-state selection algorithm generally depends on the
+processor model and/or the system profile setting in the ACPI tables and there
+are two variants of it.
+
+One of them is used with processors from the Atom line and (regardless of the
+processor model) on platforms with the system profile in the ACPI tables set to
+"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
+"workstation".  It is also used with processors supporting the HWP feature if
+that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
+argument in the kernel command line).  It is similar to the algorithm
+implemented by the generic ``schedutil`` scaling governor except that the
+utilization metric used by it is based on numbers coming from feedback
+registers of the CPU.  It generally selects P-states proportional to the
+current CPU utilization, so it is referred to as the "proportional" algorithm.
+
+The second variant of the ``powersave`` P-state selection algorithm, used in all
+of the other cases (generally, on processors from the Core line, so it is
+referred to as the "Core" algorithm), is based on the values read from the APERF
+and MPERF feedback registers and the previously requested target P-state.
+It does not really take CPU utilization into account explicitly, but as a rule
+it causes the CPU P-state to ramp up very quickly in response to increased
+utilization which is generally desirable in server environments.
+
+Regardless of the variant, this algorithm is run by the driver's utilization
+update callback for the given CPU when it is invoked by the CPU scheduler, but
+not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
+particular case <Tuning Interface in debugfs_>`_).  Like in the ``performance``
+case, the hardware configuration is not touched if the new P-state turns out to
+be the same as the current one.
+
+This is the default P-state selection algorithm if the
+:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
+is not set.
+
+Passive Mode
+------------
+
+This mode is used if the ``intel_pstate=passive`` argument is passed to the
+kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
+Like in the active mode without HWP support, in this mode ``intel_pstate`` may
+refuse to work with the given processor if it does not recognize it.
+
+If the driver works in this mode, the ``scaling_driver`` policy attribute in
+``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
+Then, the driver behaves like a regular ``CPUFreq`` scaling driver.  That is,
+it is invoked by generic scaling governors when necessary to talk to the
+hardware in order to change the P-state of a CPU (in particular, the
+``schedutil`` governor can invoke it directly from scheduler context).
+
+While in this mode, ``intel_pstate`` can be used with all of the (generic)
+scaling governors listed by the ``scaling_available_governors`` policy attribute
+in ``sysfs`` (and the P-state selection algorithms described above are not
+used).  Then, it is responsible for the configuration of policy objects
+corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
+governors attached to the policy objects) with accurate information on the
+maximum and minimum operating frequencies supported by the hardware (including
+the so-called "turbo" frequency ranges).  In other words, in the passive mode
+the entire range of available P-states is exposed by ``intel_pstate`` to the
+``CPUFreq`` core.  However, in this mode the driver does not register
+utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
+information comes from the ``CPUFreq`` core (and is the last frequency selected
+by the current scaling governor for the given policy).
+
+
+.. _turbo:
+
+Turbo P-states Support
+======================
+
+In the majority of cases, the entire range of P-states available to
+``intel_pstate`` can be divided into two sub-ranges that correspond to
+different types of processor behavior, above and below a boundary that
+will be referred to as the "turbo threshold" in what follows.
+
+The P-states above the turbo threshold are referred to as "turbo P-states" and
+the whole sub-range of P-states they belong to is referred to as the "turbo
+range".  These names are related to the Turbo Boost technology allowing a
+multicore processor to opportunistically increase the P-state of one or more
+cores if there is enough power to do that and if that is not going to cause the
+thermal envelope of the processor package to be exceeded.
+
+Specifically, if software sets the P-state of a CPU core within the turbo range
+(that is, above the turbo threshold), the processor is permitted to take over
+performance scaling control for that core and put it into turbo P-states of its
+choice going forward.  However, that permission is interpreted differently by
+different processor generations.  Namely, the Sandy Bridge generation of
+processors will never use any P-states above the last one set by software for
+the given core, even if it is within the turbo range, whereas all of the later
+processor generations will take it as a license to use any P-states from the
+turbo range, even above the one set by software.  In other words, on those
+processors setting any P-state from the turbo range will enable the processor
+to put the given core into all turbo P-states up to and including the maximum
+supported one as it sees fit.
+
+One important property of turbo P-states is that they are not sustainable.  More
+precisely, there is no guarantee that any CPUs will be able to stay in any of
+those states indefinitely, because the power distribution within the processor
+package may change over time  or the thermal envelope it was designed for might
+be exceeded if a turbo P-state was used for too long.
+
+In turn, the P-states below the turbo threshold generally are sustainable.  In
+fact, if one of them is set by software, the processor is not expected to change
+it to a lower one unless in a thermal stress or a power limit violation
+situation (a higher P-state may still be used if it is set for another CPU in
+the same package at the same time, for example).
+
+Some processors allow multiple cores to be in turbo P-states at the same time,
+but the maximum P-state that can be set for them generally depends on the number
+of cores running concurrently.  The maximum turbo P-state that can be set for 3
+cores at the same time usually is lower than the analogous maximum P-state for
+2 cores, which in turn usually is lower than the maximum turbo P-state that can
+be set for 1 core.  The one-core maximum turbo P-state is thus the maximum
+supported one overall.
+
+The maximum supported turbo P-state, the turbo threshold (the maximum supported
+non-turbo P-state) and the minimum supported P-state are specific to the
+processor model and can be determined by reading the processor's model-specific
+registers (MSRs).  Moreover, some processors support the Configurable TDP
+(Thermal Design Power) feature and, when that feature is enabled, the turbo
+threshold effectively becomes a configurable value that can be set by the
+platform firmware.
+
+Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
+the entire range of available P-states, including the whole turbo range, to the
+``CPUFreq`` core and (in the passive mode) to generic scaling governors.  This
+generally causes turbo P-states to be set more often when ``intel_pstate`` is
+used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
+for more information).
+
+Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
+(even if the Configurable TDP feature is enabled in the processor), its
+``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
+work as expected in all cases (that is, if set to disable turbo P-states, it
+always should prevent ``intel_pstate`` from using them).
+
+
+Processor Support
+=================
+
+To handle a given processor ``intel_pstate`` requires a number of different
+pieces of information on it to be known, including:
+
+ * The minimum supported P-state.
+
+ * The maximum supported `non-turbo P-state <turbo_>`_.
+
+ * Whether or not turbo P-states are supported at all.
+
+ * The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
+   are supported).
+
+ * The scaling formula to translate the driver's internal representation
+   of P-states into frequencies and the other way around.
+
+Generally, ways to obtain that information are specific to the processor model
+or family.  Although it often is possible to obtain all of it from the processor
+itself (using model-specific registers), there are cases in which hardware
+manuals need to be consulted to get to it too.
+
+For this reason, there is a list of supported processors in ``intel_pstate`` and
+the driver initialization will fail if the detected processor is not in that
+list, unless it supports the `HWP feature <Active Mode_>`_.  [The interface to
+obtain all of the information listed above is the same for all of the processors
+supporting the HWP feature, which is why they all are supported by
+``intel_pstate``.]
+
+
+User Space Interface in ``sysfs``
+=================================
+
+Global Attributes
+-----------------
+
+``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level.  They are located in the
+``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
+CPUs.
+
+Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
+argument is passed to the kernel in the command line.
+
+``max_perf_pct``
+	Maximum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``min_perf_pct``
+	Minimum P-state the driver is allowed to set in percent of the
+	maximum supported performance level (the highest supported `turbo
+	P-state <turbo_>`_).
+
+	This attribute will not be exposed if the
+	``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
+	command line.
+
+``num_pstates``
+	Number of P-states supported by the processor (between 0 and 255
+	inclusive) including both turbo and non-turbo P-states (see
+	`Turbo P-states Support`_).
+
+	The value of this attribute is not affected by the ``no_turbo``
+	setting described `below <no_turbo_attr_>`_.
+
+	This attribute is read-only.
+
+``turbo_pct``
+	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
+	range of supported P-states, in percent.
+
+	This attribute is read-only.
+
+.. _no_turbo_attr:
+
+``no_turbo``
+	If set (equal to 1), the driver is not allowed to set any turbo P-states
+	(see `Turbo P-states Support`_).  If unset (equalt to 0, which is the
+	default), turbo P-states can be set by the driver.
+	[Note that ``intel_pstate`` does not support the general ``boost``
+	attribute (supported by some other scaling drivers) which is replaced
+	by this one.]
+
+	This attrubute does not affect the maximum supported frequency value
+	supplied to the ``CPUFreq`` core and exposed via the policy interface,
+	but it affects the maximum possible value of per-policy P-state	limits
+	(see `Interpretation of Policy Attributes`_ below for details).
+
+.. _status_attr:
+
+``status``
+	Operation mode of the driver: "active", "passive" or "off".
+
+	"active"
+		The driver is functional and in the `active mode
+		<Active Mode_>`_.
+
+	"passive"
+		The driver is functional and in the `passive mode
+		<Passive Mode_>`_.
+
+	"off"
+		The driver is not functional (it is not registered as a scaling
+		driver with the ``CPUFreq`` core).
+
+	This attribute can be written to in order to change the driver's
+	operation mode or to unregister it.  The string written to it must be
+	one of the possible values of it and, if successful, the write will
+	cause the driver to switch over to the operation mode represented by
+	that string - or to be unregistered in the "off" case.  [Actually,
+	switching over from the active mode to the passive mode or the other
+	way around causes the driver to be unregistered and registered again
+	with a different set of callbacks, so all of its settings (the global
+	as well as the per-policy ones) are then reset to their default
+	values, possibly depending on the target operation mode.]
+
+	That only is supported in some configurations, though (for example, if
+	the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
+	the operation mode of the driver cannot be changed), and if it is not
+	supported in the current configuration, writes to this attribute with
+	fail with an appropriate error.
+
+Interpretation of Policy Attributes
+-----------------------------------
+
+The interpretation of some ``CPUFreq`` policy attributes described in
+:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
+and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+
+First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
+``scaling_cur_freq`` attributes are produced by applying a processor-specific
+multiplier to the internal P-state representation used by ``intel_pstate``.
+Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
+attributes are capped by the frequency corresponding to the maximum P-state that
+the driver is allowed to set.
+
+If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
+not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
+and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
+Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
+``scaling_min_freq`` to go down to that value if they were above it before.
+However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
+restored after unsetting ``no_turbo``, unless these attributes have been written
+to after ``no_turbo`` was set.
+
+If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
+and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
+which also is the value of ``cpuinfo_max_freq`` in either case.
+
+Next, the following policy attributes have special meaning if
+``intel_pstate`` works in the `active mode <Active Mode_>`_:
+
+``scaling_available_governors``
+	List of P-state selection algorithms provided by ``intel_pstate``.
+
+``scaling_governor``
+	P-state selection algorithm provided by ``intel_pstate`` currently in
+	use with the given policy.
+
+``scaling_cur_freq``
+	Frequency of the average P-state of the CPU represented by the given
+	policy for the time interval between the last two invocations of the
+	driver's utilization update callback by the CPU scheduler for that CPU.
+
+The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
+same as for other scaling drivers.
+
+Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
+depends on the operation mode of the driver.  Namely, it is either
+"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
+`passive mode <Passive Mode_>`_).
+
+Coordination of P-State Limits
+------------------------------
+
+``intel_pstate`` allows P-state limits to be set in two ways: with the help of
+the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
+<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
+``CPUFreq`` policy attributes.  The coordination between those limits is based
+on the following rules, regardless of the current operation mode of the driver:
+
+ 1. All CPUs are affected by the global limits (that is, none of them can be
+    requested to run faster than the global maximum and none of them can be
+    requested to run slower than the global minimum).
+
+ 2. Each individual CPU is affected by its own per-policy limits (that is, it
+    cannot be requested to run faster than its own per-policy maximum and it
+    cannot be requested to run slower than its own per-policy minimum).
+
+ 3. The global and per-policy limits can be set independently.
+
+If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
+resulting effective values are written into its registers whenever the limits
+change in order to request its internal P-state selection logic to always set
+P-states within these limits.  Otherwise, the limits are taken into account by
+scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
+every time before setting a new P-state for a CPU.
+
+Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
+is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
+at all and the only way to set the limits is by using the policy attributes.
+
+
+Energy vs Performance Hints
+---------------------------
+
+If ``intel_pstate`` works in the `active mode with the HWP feature enabled
+<Active Mode With HWP_>`_ in the processor, additional attributes are present
+in every ``CPUFreq`` policy directory in ``sysfs``.  They are intended to allow
+user space to help ``intel_pstate`` to adjust the processor's internal P-state
+selection logic by focusing it on performance or on energy-efficiency, or
+somewhere between the two extremes:
+
+``energy_performance_preference``
+	Current value of the energy vs performance hint for the given policy
+	(or the CPU represented by it).
+
+	The hint can be changed by writing to this attribute.
+
+``energy_performance_available_preferences``
+	List of strings that can be written to the
+	``energy_performance_preference`` attribute.
+
+	They represent different energy vs performance hints and should be
+	self-explanatory, except that ``default`` represents whatever hint
+	value was set by the platform firmware.
+
+Strings written to the ``energy_performance_preference`` attribute are
+internally translated to integer values written to the processor's
+Energy-Performance Preference (EPP) knob (if supported) or its
+Energy-Performance Bias (EPB) knob.
+
+[Note that tasks may by migrated from one CPU to another by the scheduler's
+load-balancing algorithm and if different energy vs performance hints are
+set for those CPUs, that may lead to undesirable outcomes.  To avoid such
+issues it is better to set the same energy vs performance hint for all CPUs
+or to pin every task potentially sensitive to them to a specific CPU.]
+
+.. _acpi-cpufreq:
+
+``intel_pstate`` vs ``acpi-cpufreq``
+====================================
+
+On the majority of systems supported by ``intel_pstate``, the ACPI tables
+provided by the platform firmware contain ``_PSS`` objects returning information
+that can be used for CPU performance scaling (refer to the `ACPI specification`_
+for details on the ``_PSS`` objects and the format of the information returned
+by them).
+
+The information returned by the ACPI ``_PSS`` objects is used by the
+``acpi-cpufreq`` scaling driver.  On systems supported by ``intel_pstate``
+the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
+interface, but the set of P-states it can use is limited by the ``_PSS``
+output.
+
+On those systems each ``_PSS`` object returns a list of P-states supported by
+the corresponding CPU which basically is a subset of the P-states range that can
+be used by ``intel_pstate`` on the same system, with one exception: the whole
+`turbo range <turbo_>`_ is represented by one item in it (the topmost one).  By
+convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
+than the frequency of the highest non-turbo P-state listed by it, but the
+corresponding P-state representation (following the hardware specification)
+returned for it matches the maximum supported turbo P-state (or is the
+special value 255 meaning essentially "go as high as you can get").
+
+The list of P-states returned by ``_PSS`` is reflected by the table of
+available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
+scaling governors and the minimum and maximum supported frequencies reported by
+it come from that list as well.  In particular, given the special representation
+of the turbo range described above, this means that the maximum supported
+frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
+of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
+affects decisions made by the scaling governors, except for ``powersave`` and
+``performance``.
+
+For example, if a given governor attempts to select a frequency proportional to
+estimated CPU load and maps the load of 100% to the maximum supported frequency
+(possibly multiplied by a constant), then it will tend to choose P-states below
+the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
+in that case the turbo range corresponds to a small fraction of the frequency
+band it can use (1 MHz vs 1 GHz or more).  In consequence, it will only go to
+the turbo range for the highest loads and the other loads above 50% that might
+benefit from running at turbo frequencies will be given non-turbo P-states
+instead.
+
+One more issue related to that may appear on systems supporting the
+`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
+turbo threshold.  Namely, if that is not coordinated with the lists of P-states
+returned by ``_PSS`` properly, there may be more than one item corresponding to
+a turbo P-state in those lists and there may be a problem with avoiding the
+turbo range (if desirable or necessary).  Usually, to avoid using turbo
+P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
+by ``_PSS``, but that is not sufficient when there are other turbo P-states in
+the list returned by it.
+
+Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
+`passive mode <Passive Mode_>`_, except that the number of P-states it can set
+is limited to the ones listed by the ACPI ``_PSS`` objects.
+
+
+Kernel Command Line Options for ``intel_pstate``
+================================================
+
+Several kernel command line options can be used to pass early-configuration-time
+parameters to ``intel_pstate`` in order to enforce specific behavior of it.  All
+of them have to be prepended with the ``intel_pstate=`` prefix.
+
+``disable``
+	Do not register ``intel_pstate`` as the scaling driver even if the
+	processor is supported by it.
+
+``passive``
+	Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
+	start with.
+
+	This option implies the ``no_hwp`` one described below.
+
+``force``
+	Register ``intel_pstate`` as the scaling driver instead of
+	``acpi-cpufreq`` even if the latter is preferred on the given system.
+
+	This may prevent some platform features (such as thermal controls and
+	power capping) that rely on the availability of ACPI P-states
+	information from functioning as expected, so it should be used with
+	caution.
+
+	This option does not work with processors that are not supported by
+	``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
+	driver is used instead of ``acpi-cpufreq``.
+
+``no_hwp``
+	Do not enable the `hardware-managed P-states (HWP) feature
+	<Active Mode With HWP_>`_ even if it is supported by the processor.
+
+``hwp_only``
+	Register ``intel_pstate`` as the scaling driver only if the
+	`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
+	supported by the processor.
+
+``support_acpi_ppc``
+	Take ACPI ``_PPC`` performance limits into account.
+
+	If the preferred power management profile in the FADT (Fixed ACPI
+	Description Table) is set to "Enterprise Server" or "Performance
+	Server", the ACPI ``_PPC`` limits are taken into account by default
+	and this option has no effect.
+
+``per_cpu_perf_limits``
+	Use per-logical-CPU P-State limits (see `Coordination of P-state
+	Limits`_ for details).
+
+
+Diagnostics and Tuning
+======================
+
+Trace Events
+------------
+
+There are two static trace events that can be used for ``intel_pstate``
+diagnostics.  One of them is the ``cpu_frequency`` trace event generally used
+by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
+to ``intel_pstate``.  Both of them are triggered by ``intel_pstate`` only if
+it works in the `active mode <Active Mode_>`_.
+
+The following sequence of shell commands can be used to enable them and see
+their output (if the kernel is generally configured to support event tracing)::
+
+ # cd /sys/kernel/debug/tracing/
+ # echo 1 > events/power/pstate_sample/enable
+ # echo 1 > events/power/cpu_frequency/enable
+ # cat trace
+ gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
+ cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
+
+If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
+``cpu_frequency`` trace event will be triggered either by the ``schedutil``
+scaling governor (for the policies it is attached to), or by the ``CPUFreq``
+core (for the policies with other scaling governors).
+
+``ftrace``
+----------
+
+The ``ftrace`` interface can be used for low-level diagnostics of
+``intel_pstate``.  For example, to check how often the function to set a
+P-state is called, the ``ftrace`` filter can be set to to
+:c:func:`intel_pstate_set_pstate`::
+
+ # cd /sys/kernel/debug/tracing/
+ # cat available_filter_functions | grep -i pstate
+ intel_pstate_set_pstate
+ intel_pstate_cpu_init
+ ...
+ # echo intel_pstate_set_pstate > set_ftrace_filter
+ # echo function > current_tracer
+ # cat trace | head -15
+ # tracer: function
+ #
+ # entries-in-buffer/entries-written: 80/80   #P:4
+ #
+ #                              _-----=> irqs-off
+ #                             / _----=> need-resched
+ #                            | / _---=> hardirq/softirq
+ #                            || / _--=> preempt-depth
+ #                            ||| /     delay
+ #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+ #              | |       |   ||||       |         |
+             Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
+  gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
+      gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
+           <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
+
+Tuning Interface in ``debugfs``
+-------------------------------
+
+The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
+processors in the active mode <powersave_>`_ is based on a `PID controller`_
+whose parameters were chosen to address a number of different use cases at the
+same time.  However, it still is possible to fine-tune it to a specific workload
+and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
+provided for this purpose.  [Note that the ``pstate_snb`` directory will be
+present only if the specific P-state selection algorithm matching the interface
+in it actually is in use.]
+
+The following files present in that directory can be used to modify the PID
+controller parameters at run time:
+
+| ``deadband``
+| ``d_gain_pct``
+| ``i_gain_pct``
+| ``p_gain_pct``
+| ``sample_rate_ms``
+| ``setpoint``
+
+Note, however, that achieving desirable results this way generally requires
+expert-level understanding of the power vs performance tradeoff, so extra care
+is recommended when attempting to do that.
+
+
+.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
+.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
+.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
+.. _PID controller: https://en.wikipedia.org/wiki/PID_controller

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf..9490f28 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt

@@ -632,7 +632,7 @@
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.

diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index 55e43f1..fce9291 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst

@@ -303,6 +303,11 @@
 	void smp_mb__before_atomic(void);
 	void smp_mb__after_atomic(void);
 
+Preceding a non-value-returning read-modify-write atomic operation with
+smp_mb__before_atomic() and following it with smp_mb__after_atomic()
+provides the same full ordering that is provided by value-returning
+read-modify-write atomic operations.
+
 For example, smp_mb__before_atomic() can be used like so::
 
 	obj->dead = 1;

diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
deleted file mode 100644
index 3fdcdfd..0000000
--- a/Documentation/cpu-freq/intel-pstate.txt
+++ /dev/null

@@ -1,281 +0,0 @@
-Intel P-State driver
---------------------
-
-This driver provides an interface to control the P-State selection for the
-SandyBridge+ Intel processors.
-
-The following document explains P-States:
-http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
-As stated in the document, P-State doesn’t exactly mean a frequency. However, for
-the sake of the relationship with cpufreq, P-State and frequency are used
-interchangeably.
-
-Understanding the cpufreq core governors and policies are important before
-discussing more details about the Intel P-State driver. Based on what callbacks
-a cpufreq driver provides to the cpufreq core, it can support two types of
-drivers:
-- with target_index() callback: In this mode, the drivers using cpufreq core
-simply provide the minimum and maximum frequency limits and an additional
-interface target_index() to set the current frequency. The cpufreq subsystem
-has a number of scaling governors ("performance", "powersave", "ondemand",
-etc.). Depending on which governor is in use, cpufreq core will call for
-transitions to a specific frequency using target_index() callback.
-- setpolicy() callback: In this mode, drivers do not provide target_index()
-callback, so cpufreq core can't request a transition to a specific frequency.
-The driver provides minimum and maximum frequency limits and callbacks to set a
-policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
-The cpufreq core can request the driver to operate in any of the two policies:
-"performance" and "powersave". The driver decides which frequency to use based
-on the above policy selection considering minimum and maximum frequency limits.
-
-The Intel P-State driver falls under the latter category, which implements the
-setpolicy() callback. This driver decides what P-State to use based on the
-requested policy from the cpufreq core. If the processor is capable of
-selecting its next P-State internally, then the driver will offload this
-responsibility to the processor (aka HWP: Hardware P-States). If not, the
-driver implements algorithms to select the next P-State.
-
-Since these policies are implemented in the driver, they are not same as the
-cpufreq scaling governors implementation, even if they have the same name in
-the cpufreq sysfs (scaling_governors). For example the "performance" policy is
-similar to cpufreq’s "performance" governor, but "powersave" is completely
-different than the cpufreq "powersave" governor. The strategy here is similar
-to cpufreq "ondemand", where the requested P-State is related to the system load.
-
-Sysfs Interface
-
-In addition to the frequency-controlling interfaces provided by the cpufreq
-core, the driver provides its own sysfs files to control the P-State selection.
-These files have been added to /sys/devices/system/cpu/intel_pstate/.
-Any changes made to these files are applicable to all CPUs (even in a
-multi-package system, Refer to later section on placing "Per-CPU limits").
-
-      max_perf_pct: Limits the maximum P-State that will be requested by
-      the driver. It states it as a percentage of the available performance. The
-      available (P-State) performance may be reduced by the no_turbo
-      setting described below.
-
-      min_perf_pct: Limits the minimum P-State that will be requested by
-      the driver. It states it as a percentage of the max (non-turbo)
-      performance level.
-
-      no_turbo: Limits the driver to selecting P-State below the turbo
-      frequency range.
-
-      turbo_pct: Displays the percentage of the total performance that
-      is supported by hardware that is in the turbo range. This number
-      is independent of whether turbo has been disabled or not.
-
-      num_pstates: Displays the number of P-States that are supported
-      by hardware. This number is independent of whether turbo has
-      been disabled or not.
-
-For example, if a system has these parameters:
-	Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
-	Max non turbo ratio: 0x17
-	Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
-
-Sysfs will show :
-	max_perf_pct:100, which corresponds to 1 core ratio
-	min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
-	no_turbo:0, turbo is not disabled
-	num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
-	turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
-
-Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
-Volume 3: System Programming Guide" to understand ratios.
-
-There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
-that can be used for controlling the operation mode of the driver:
-
-      status: Three settings are possible:
-      "off"     - The driver is not in use at this time.
-      "active"  - The driver works as a P-state governor (default).
-      "passive" - The driver works as a regular cpufreq one and collaborates
-                  with the generic cpufreq governors (it sets P-states as
-                  requested by those governors).
-      The current setting is returned by reads from this attribute.  Writing one
-      of the above strings to it changes the operation mode as indicated by that
-      string, if possible.  If HW-managed P-states (HWP) are enabled, it is not
-      possible to change the driver's operation mode and attempts to write to
-      this attribute will fail.
-
-cpufreq sysfs for Intel P-State
-
-Since this driver registers with cpufreq, cpufreq sysfs is also presented.
-There are some important differences, which need to be considered.
-
-scaling_cur_freq: This displays the real frequency which was used during
-the last sample period instead of what is requested. Some other cpufreq driver,
-like acpi-cpufreq, displays what is requested (Some changes are on the
-way to fix this for acpi-cpufreq driver). The same is true for frequencies
-displayed at /proc/cpuinfo.
-
-scaling_governor: This displays current active policy. Since each CPU has a
-cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
-is not possible with Intel P-States, as there is one common policy for all
-CPUs. Here, the last requested policy will be applicable to all CPUs. It is
-suggested that one use the cpupower utility to change policy to all CPUs at the
-same time.
-
-scaling_setspeed: This attribute can never be used with Intel P-State.
-
-scaling_max_freq/scaling_min_freq: This interface can be used similarly to
-the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
-are converted to nearest possible P-State, this is prone to rounding errors.
-This method is not preferred to limit performance.
-
-affected_cpus: Not used
-related_cpus: Not used
-
-For contemporary Intel processors, the frequency is controlled by the
-processor itself and the P-State exposed to software is related to
-performance levels.  The idea that frequency can be set to a single
-frequency is fictional for Intel Core processors. Even if the scaling
-driver selects a single P-State, the actual frequency the processor
-will run at is selected by the processor itself.
-
-Per-CPU limits
-
-The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
-the intel_pstate driver to use per-CPU performance limits.  When it is set,
-the sysfs control interface described above is subject to limitations.
-- The following controls are not available for both read and write
-	/sys/devices/system/cpu/intel_pstate/max_perf_pct
-	/sys/devices/system/cpu/intel_pstate/min_perf_pct
-- The following controls can be used to set performance limits, as far as the
-architecture of the processor permits:
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
-	/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
-- User can still observe turbo percent and number of P-States from
-	/sys/devices/system/cpu/intel_pstate/turbo_pct
-	/sys/devices/system/cpu/intel_pstate/num_pstates
-- User can read write system wide turbo status
-	/sys/devices/system/cpu/no_turbo
-
-Support of energy performance hints
-It is possible to provide hints to the HWP algorithms in the processor
-to be more performance centric to more energy centric. When the driver
-is using HWP, two additional cpufreq sysfs attributes are presented for
-each logical CPU.
-These attributes are:
-	- energy_performance_available_preferences
-	- energy_performance_preference
-
-To get list of supported hints:
-$ cat energy_performance_available_preferences
-    default performance balance_performance balance_power power
-
-The current preference can be read or changed via cpufreq sysfs
-attribute "energy_performance_preference". Reading from this attribute
-will display current effective setting. User can write any of the valid
-preference string to this attribute. User can always restore to power-on
-default by writing "default".
-
-Since threads can migrate to different CPUs, this is possible that the
-new CPU may have different energy performance preference than the previous
-one. To avoid such issues, either threads can be pinned to specific CPUs
-or set the same energy performance preference value to all CPUs.
-
-Tuning Intel P-State driver
-
-When the performance can be tuned using PID (Proportional Integral
-Derivative) controller, debugfs files are provided for adjusting performance.
-They are presented under:
-/sys/kernel/debug/pstate_snb/
-
-The PID tunable parameters are:
-      deadband
-      d_gain_pct
-      i_gain_pct
-      p_gain_pct
-      sample_rate_ms
-      setpoint
-
-To adjust these parameters, some understanding of driver implementation is
-necessary. There are some tweeks described here, but be very careful. Adjusting
-them requires expert level understanding of power and performance relationship.
-These limits are only useful when the "powersave" policy is active.
-
--To make the system more responsive to load changes, sample_rate_ms can
-be adjusted  (current default is 10ms).
--To make the system use higher performance, even if the load is lower, setpoint
-can be adjusted to a lower number. This will also lead to faster ramp up time
-to reach the maximum P-State.
-If there are no derivative and integral coefficients, The next P-State will be
-equal to:
-	current P-State - ((setpoint - current cpu load) * p_gain_pct)
-
-For example, if the current PID parameters are (Which are defaults for the core
-processors like SandyBridge):
-      deadband = 0
-      d_gain_pct = 0
-      i_gain_pct = 0
-      p_gain_pct = 20
-      sample_rate_ms = 10
-      setpoint = 97
-
-If the current P-State = 0x08 and current load = 100, this will result in the
-next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
-goes up by only 1. If during next sample interval the current load doesn't
-change and still 100, then P-State goes up by one again. This process will
-continue as long as the load is more than the setpoint until the maximum P-State
-is reached.
-
-For the same load at setpoint = 60, this will result in the next P-State
-= 0x08 - ((60 - 100) * 0.2) = 16
-So by changing the setpoint from 97 to 60, there is an increase of the
-next P-State from 9 to 16. So this will make processor execute at higher
-P-State for the same CPU load. If the load continues to be more than the
-setpoint during next sample intervals, then P-State will go up again till the
-maximum P-State is reached. But the ramp up time to reach the maximum P-State
-will be much faster when the setpoint is 60 compared to 97.
-
-Debugging Intel P-State driver
-
-Event tracing
-To debug P-State transition, the Linux event tracing interface can be used.
-There are two specific events, which can be enabled (Provided the kernel
-configs related to event tracing are enabled).
-
-# cd /sys/kernel/debug/tracing/
-# echo 1 > events/power/pstate_sample/enable
-# echo 1 > events/power/cpu_frequency/enable
-# cat trace
-gnome-terminal--4510  [001] ..s.  1177.680733: pstate_sample: core_busy=107
-	scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
-		freq=2474476
-cat-5235  [002] ..s.  1177.681723: cpu_frequency: state=2900000 cpu_id=2
-
-
-Using ftrace
-
-If function level tracing is required, the Linux ftrace interface can be used.
-For example if we want to check how often a function to set a P-State is
-called, we can set ftrace filter to intel_pstate_set_pstate.
-
-# cd /sys/kernel/debug/tracing/
-# cat available_filter_functions | grep -i pstate
-intel_pstate_set_pstate
-intel_pstate_cpu_init
-...
-
-# echo intel_pstate_set_pstate > set_ftrace_filter
-# echo function > current_tracer
-# cat trace | head -15
-# tracer: function
-#
-# entries-in-buffer/entries-written: 80/80   #P:4
-#
-#                              _-----=> irqs-off
-#                             / _----=> need-resched
-#                            | / _---=> hardirq/softirq
-#                            || / _--=> preempt-depth
-#                            ||| /     delay
-#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-#              | |       |   ||||       |         |
-            Xorg-3129  [000] ..s.  2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
- gnome-terminal--4510  [002] ..s.  2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
-     gnome-shell-3409  [001] ..s.  2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
-          <idle>-0     [000] ..s.  2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func

diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index ffdcc97..78aa00a 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst

@@ -103,9 +103,3 @@
 
 The optional make variable CF can be used to pass arguments to sparse.  The
 build system passes -Wbitwise to sparse automatically.
-
-Checking RCU annotations
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-RCU annotations are not checked by default.  To enable RCU annotation
-checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.

diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index e9c5a1d..f465647 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt

@@ -22,7 +22,8 @@
 - #clock-cells : must contain 1
 - #reset-cells : must contain 1
 
-For the PRCM CCUs on H3/A64, one more clock is needed:
+For the PRCM CCUs on H3/A64, two more clocks are needed:
+- "pll-periph": the SoC's peripheral PLL from the main CCU
 - "iosc": the SoC's internal frequency oscillator
 
 Example for generic CCU:
@@ -39,8 +40,8 @@
 r_ccu: clock@01f01400 {
 	compatible = "allwinner,sun50i-a64-r-ccu";
 	reg = <0x01f01400 0x100>;
-	clocks = <&osc24M>, <&osc32k>, <&iosc>;
-	clock-names = "hosc", "losc", "iosc";
+	clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
+	clock-names = "hosc", "losc", "iosc", "pll-periph";
 	#clock-cells = <1>;
 	#reset-cells = <1>;
 };

diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
index 42c3bb2..01e331a 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt

@@ -41,9 +41,9 @@
 Optional properties:
 
 In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
 
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
 
 - reg: an additional register set is needed, for the GPIO Blink
   Counter on/off registers.
@@ -71,7 +71,7 @@
 		};
 
 		gpio1: gpio@18140 {
-			compatible = "marvell,armada-370-xp-gpio";
+			compatible = "marvell,armada-370-gpio";
 			reg = <0x18140 0x40>, <0x181c8 0x08>;
 			reg-names = "gpio", "pwm";
 			ngpios = <17>;

diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
index 6db2210..025cf8c 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt

@@ -36,7 +36,7 @@
                 control gpios
 
  - threshold:   allows setting the "click"-threshold in the range
-                from 20 to 80.
+                from 0 to 80.
 
  - gain:        allows setting the sensitivity in the range from 0 to
                 31. Note that lower values indicate higher

diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
index 0548569..9630ac0 100644
--- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt
+++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt

@@ -16,6 +16,11 @@
 - reg:                  Base address of PMIC on Hi6220 SoC.
 - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
 - pmic-gpios:           The GPIO used by PMIC IRQ.
+- #clock-cells:		From common clock binding; shall be set to 0
+
+Optional properties:
+- clock-output-names: From common clock binding to override the
+  default output clock name
 
 Example:
 	pmic: pmic@f8000000 {
@@ -24,4 +29,5 @@
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+		#clock-cells = <0>;
 	}

diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
index bbd083f..1db6e00 100644
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt

@@ -31,7 +31,7 @@
 		compatible = "st,stm32-timers";
 		reg = <0x40010000 0x400>;
 		clocks = <&rcc 0 160>;
-		clock-names = "clk_int";
+		clock-names = "int";
 
 		pwm {
 			compatible = "st,stm32-pwm";

diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
index e254368..9029b45 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt

@@ -18,6 +18,8 @@
   "ext_clock" (External clock provided to the card).
 - post-power-on-delay-ms : Delay in ms after powering the card and
 	de-asserting the reset-gpios (if any)
+- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
+	during power off of the card.
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index d6c6e41..8ec2ca2 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt

@@ -34,7 +34,7 @@
       "brcm,bcm6328-switch"
       "brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
 
-See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required and optional properties.
 
 Examples:

diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 7ef9dbb..1d4d0f4 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt

@@ -26,6 +26,10 @@
 - interrupt-controller	: Indicates the switch is itself an interrupt
 			  controller. This is used for the PHY interrupts.
 #interrupt-cells = <2>	: Controller uses two cells, number and flag
+- eeprom-length		: Set to the length of an EEPROM connected to the
+			  switch. Must be set if the switch can not detect
+			  the presence and/or size of a connected EEPROM,
+			  otherwise optional.
 - mdio			: Container of PHY and devices on the switches MDIO
 			  bus.
 - mdio?		: Container of PHYs and devices on the external MDIO

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index a1e3693..6f55bdd 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt

@@ -15,6 +15,10 @@
 - phy-reset-active-high : If present then the reset sequence using the GPIO
   specified in the "phy-reset-gpios" property is reversed (H=reset state,
   L=operation state).
+- phy-reset-post-delay : Post reset delay in milliseconds. If present then
+  a delay of phy-reset-post-delay milliseconds will be observed after the
+  phy-reset-gpios has been toggled. Can be omitted thus no delay is
+  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
 - phy-supply : regulator that powers the Ethernet PHY.
 - phy-handle : phandle to the PHY device connected to this device.
 - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.

diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt
index 16c3a950..acfafc8 100644
--- a/Documentation/devicetree/bindings/net/smsc911x.txt
+++ b/Documentation/devicetree/bindings/net/smsc911x.txt

@@ -27,6 +27,7 @@
   of the device. On many systems this is wired high so the device goes
   out of reset at power-on, but if it is under program control, this
   optional GPIO can wake up in response to it.
+- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies
 
 Examples:
 

diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index 71a3c13..f01d154 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt

@@ -247,7 +247,6 @@
 bias-pull-up		- pull up the pin
 bias-pull-down		- pull down the pin
 bias-pull-pin-default	- use pin-default pull state
-bi-directional		- pin supports simultaneous input/output operations
 drive-push-pull		- drive actively high and low
 drive-open-drain	- drive with open drain
 drive-open-source	- drive with open source
@@ -260,7 +259,6 @@
 power-source		- select between different power supplies
 low-power-enable	- enable low power mode
 low-power-disable	- disable low power mode
-output-enable		- enable output on pin regardless of output value
 output-low		- set the pin to output mode with low level
 output-high		- set the pin to output mode with high level
 slew-rate		- set the slew rate

diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index 00bea03..fcf199b 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt

@@ -10,6 +10,7 @@
   - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
   - "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs;
   - "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs;
+  - "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs;
   - "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs;
   - "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs;
   - "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs;

diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index f10dd59..8444dc3 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt

@@ -316,7 +316,7 @@
         struct autofs_v5_packet {
                 int proto_version;                /* Protocol version */
                 int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 __u32 dev;
                 __u64 ino;
                 __u32 uid;
@@ -341,12 +341,12 @@
 `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
 most one packet, and any unread portion of a packet will be discarded.
 
-The `wait_queue_token` is a unique number which can identify a
+The `wait_queue_entry_token` is a unique number which can identify a
 particular request to be acknowledged.  When a message is sent over
 the pipe the affected dentry is marked as either "active" or
 "expiring" and other accesses to it block until the message is
 acknowledged using one of the ioctls below and the relevant
-`wait_queue_token`.
+`wait_queue_entry_token`.
 
 Communicating with autofs: root directory ioctls
 ------------------------------------------------
@@ -358,7 +358,7 @@
 The available ioctl commands are:
 
 - **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_token" number
+    to the ioctl command is the "wait_queue_entry_token" number
     corresponding to the notification being acknowledged.
 - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
     the error code `ENOENT`.
@@ -382,14 +382,14 @@
         struct autofs_packet_expire_multi {
                 int proto_version;              /* Protocol version */
                 int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_token;
+                autofs_wqt_t wait_queue_entry_token;
                 int len;
                 char name[NAME_MAX+1];
         };
 
      is required.  This is filled in with the name of something
      that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_entry_token`
      is present in the structure, no "wait queue" is established
      and no acknowledgment is needed.
 - **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to

diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst
index 2032f0b..1ccc94b 100644
--- a/Documentation/input/devices/edt-ft5x06.rst
+++ b/Documentation/input/devices/edt-ft5x06.rst

@@ -15,7 +15,7 @@
 The driver allows configuration of the touch screen via a set of sysfs files:
 
 /sys/class/input/eventX/device/device/threshold:
-    allows setting the "click"-threshold in the range from 20 to 80.
+    allows setting the "click"-threshold in the range from 0 to 80.
 
 /sys/class/input/eventX/device/device/gain:
     allows setting the sensitivity in the range from 0 to 31. Note that

diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index df31e30..2cb7dc5 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt

@@ -109,13 +109,12 @@
 	on that CPU.  If a thread that expects to run on the de-jittered
 	CPU awakens, the scheduler will send an IPI that can result in
 	a subsequent SCHED_SOFTIRQ.
-2.	Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
-	CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU
-	to be de-jittered is marked as an adaptive-ticks CPU using the
-	"nohz_full=" boot parameter.  This reduces the number of
-	scheduler-clock interrupts that the de-jittered CPU receives,
-	minimizing its chances of being selected to do the load balancing
-	work that runs in SCHED_SOFTIRQ context.
+2.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
+	is marked as an adaptive-ticks CPU using the "nohz_full="
+	boot parameter.  This reduces the number of scheduler-clock
+	interrupts that the de-jittered CPU receives, minimizing its
+	chances of being selected to do the load balancing work that
+	runs in SCHED_SOFTIRQ context.
 3.	To the extent possible, keep the CPU out of the kernel when it
 	is non-idle, for example, by avoiding system calls and by
 	forcing both kernel threads and interrupts to execute elsewhere.
@@ -135,11 +134,10 @@
 RCU_SOFTIRQ:  Do at least one of the following:
 1.	Offload callbacks and keep the CPU in either dyntick-idle or
 	adaptive-ticks state by doing all of the following:
-	a.	Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
-		CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU
-		to be de-jittered is marked as an adaptive-ticks CPU using
-		the "nohz_full=" boot parameter.  Bind the rcuo kthreads
-		to housekeeping CPUs, which can tolerate OS jitter.
+	a.	CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
+		de-jittered is marked as an adaptive-ticks CPU using the
+		"nohz_full=" boot parameter.  Bind the rcuo kthreads to
+		housekeeping CPUs, which can tolerate OS jitter.
 	b.	To the extent possible, keep the CPU out of the kernel
 		when it is non-idle, for example, by avoiding system
 		calls and by forcing both kernel threads and interrupts
@@ -236,11 +234,10 @@
 	is feasible only if your workload never requires RCU priority
 	boosting, for example, if you ensure frequent idle time on all
 	CPUs that might execute within the kernel.
-3.	Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y,
-	which offloads all RCU callbacks to kthreads that can be moved
-	off of CPUs susceptible to OS jitter.  This approach prevents the
-	rcuc/%u kthreads from having any work to do, so that they are
-	never awakened.
+3.	Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
+	boot parameter offloading RCU callbacks from all CPUs susceptible
+	to OS jitter.  This approach prevents the rcuc/%u kthreads from
+	having any work to do, so that they are never awakened.
 4.	Ensure that the CPU never enters the kernel, and, in particular,
 	avoid initiating any CPU hotplug operations on this CPU.  This is
 	another way of preventing any callbacks from being queued on the

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 732f10e..9d5e0f8 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt

@@ -27,7 +27,7 @@
  (2) to provide a guide as to how to use the barriers that are available.
 
 Note that an architecture can provide more than the minimum requirement
-for any particular barrier, but if the architecure provides less than
+for any particular barrier, but if the architecture provides less than
 that, that architecture is incorrect.
 
 Note also that it is possible that a barrier may be a no-op for an

diff --git a/Documentation/networking/dpaa.txt b/Documentation/networking/dpaa.txt
new file mode 100644
index 0000000..76e016d
--- /dev/null
+++ b/Documentation/networking/dpaa.txt

@@ -0,0 +1,194 @@
+The QorIQ DPAA Ethernet Driver
+==============================
+
+Authors:
+Madalin Bucur <madalin.bucur@nxp.com>
+Camelia Groza <camelia.groza@nxp.com>
+
+Contents
+========
+
+	- DPAA Ethernet Overview
+	- DPAA Ethernet Supported SoCs
+	- Configuring DPAA Ethernet in your kernel
+	- DPAA Ethernet Frame Processing
+	- DPAA Ethernet Features
+	- Debugging
+
+DPAA Ethernet Overview
+======================
+
+DPAA stands for Data Path Acceleration Architecture and it is a
+set of networking acceleration IPs that are available on several
+generations of SoCs, both on PowerPC and ARM64.
+
+The Freescale DPAA architecture consists of a series of hardware blocks
+that support Ethernet connectivity. The Ethernet driver depends upon the
+following drivers in the Linux kernel:
+
+ - Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
+    drivers/iommu/fsl_*
+ - Frame Manager (FMan)
+    drivers/net/ethernet/freescale/fman
+ - Queue Manager (QMan), Buffer Manager (BMan)
+    drivers/soc/fsl/qbman
+
+A simplified view of the dpaa_eth interfaces mapped to FMan MACs:
+
+  dpaa_eth       /eth0\     ...       /ethN\
+  driver        |      |             |      |
+  -------------   ----   -----------   ----   -------------
+       -Ports  / Tx  Rx \    ...    / Tx  Rx \
+  FMan        |          |         |          |
+       -MACs  |   MAC0   |         |   MACN   |
+             /   dtsec0   \  ...  /   dtsecN   \ (or tgec)
+            /              \     /              \(or memac)
+  ---------  --------------  ---  --------------  ---------
+      FMan, FMan Port, FMan SP, FMan MURAM drivers
+  ---------------------------------------------------------
+      FMan HW blocks: MURAM, MACs, Ports, SP
+  ---------------------------------------------------------
+
+The dpaa_eth relation to the QMan, BMan and FMan:
+              ________________________________
+  dpaa_eth   /            eth0                \
+  driver    /                                  \
+  ---------   -^-   -^-   -^-   ---    ---------
+  QMan driver / \   / \   / \  \   /  | BMan    |
+             |Rx | |Rx | |Tx | |Tx |  | driver  |
+  ---------  |Dfl| |Err| |Cnf| |FQs|  |         |
+  QMan HW    |FQ | |FQ | |FQs| |   |  |         |
+             /   \ /   \ /   \  \ /   |         |
+  ---------   ---   ---   ---   -v-    ---------
+            |        FMan QMI         |         |
+            | FMan HW       FMan BMI  | BMan HW |
+              -----------------------   --------
+
+where the acronyms used above (and in the code) are:
+DPAA = Data Path Acceleration Architecture
+FMan = DPAA Frame Manager
+QMan = DPAA Queue Manager
+BMan = DPAA Buffers Manager
+QMI = QMan interface in FMan
+BMI = BMan interface in FMan
+FMan SP = FMan Storage Profiles
+MURAM = Multi-user RAM in FMan
+FQ = QMan Frame Queue
+Rx Dfl FQ = default reception FQ
+Rx Err FQ = Rx error frames FQ
+Tx Cnf FQ = Tx confirmation FQs
+Tx FQs = transmission frame queues
+dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps)
+tgec = ten gigabit Ethernet controller (10 Gbps)
+memac = multirate Ethernet MAC (10/100/1000/10000)
+
+DPAA Ethernet Supported SoCs
+============================
+
+The DPAA drivers enable the Ethernet controllers present on the following SoCs:
+
+# PPC
+P1023
+P2041
+P3041
+P4080
+P5020
+P5040
+T1023
+T1024
+T1040
+T1042
+T2080
+T4240
+B4860
+
+# ARM
+LS1043A
+LS1046A
+
+Configuring DPAA Ethernet in your kernel
+========================================
+
+To enable the DPAA Ethernet driver, the following Kconfig options are required:
+
+# common for arch/arm64 and arch/powerpc platforms
+CONFIG_FSL_DPAA=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
+CONFIG_FSL_XGMAC_MDIO=y
+
+# for arch/powerpc only
+CONFIG_FSL_PAMU=y
+
+# common options needed for the PHYs used on the RDBs
+CONFIG_VITESSE_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_AQUANTIA_PHY=y
+
+DPAA Ethernet Frame Processing
+==============================
+
+On Rx, buffers for the incoming frames are retrieved from one of the three
+existing buffers pools. The driver initializes and seeds these, each with
+buffers of different sizes: 1KB, 2KB and 4KB.
+
+On Tx, all transmitted frames are returned to the driver through Tx
+confirmation frame queues. The driver is then responsible for freeing the
+buffers. In order to do this properly, a backpointer is added to the buffer
+before transmission that points to the skb. When the buffer returns to the
+driver on a confirmation FQ, the skb can be correctly consumed.
+
+DPAA Ethernet Features
+======================
+
+Currently the DPAA Ethernet driver enables the basic features required for
+a Linux Ethernet driver. The support for advanced features will be added
+gradually.
+
+The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
+checksum offload feature is enabled by default and cannot be controlled through
+ethtool.
+
+The driver has support for multiple prioritized Tx traffic classes. Priorities
+range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
+strict priority levels. Each traffic class contains NR_CPU TX queues. By
+default, only one traffic class is enabled and the lowest priority Tx queues
+are used. Higher priority traffic classes can be enabled with the mqprio
+qdisc. For example, all four traffic classes are enabled on an interface with
+the following command. Furthermore, skb priority levels are mapped to traffic
+classes as follows:
+
+	* priorities 0 to 3 - traffic class 0 (low priority)
+	* priorities 4 to 7 - traffic class 1 (medium-low priority)
+	* priorities 8 to 11 - traffic class 2 (medium-high priority)
+	* priorities 12 to 15 - traffic class 3 (high priority)
+
+tc qdisc add dev <int> root handle 1: \
+	 mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
+
+Debugging
+=========
+
+The following statistics are exported for each interface through ethtool:
+
+	- interrupt count per CPU
+	- Rx packets count per CPU
+	- Tx packets count per CPU
+	- Tx confirmed packets count per CPU
+	- Tx S/G frames count per CPU
+	- Tx error count per CPU
+	- Rx error count per CPU
+	- Rx error count per type
+	- congestion related statistics:
+		- congestion status
+		- time spent in congestion
+		- number of time the device entered congestion
+		- dropped packets count per cause
+
+The driver also exports the following information in sysfs:
+
+	- the FQ IDs for each FQ type
+	/sys/devices/platform/dpaa-ethernet.0/net/<int>/fqids
+
+	- the IDs of the buffer pools in use
+	/sys/devices/platform/dpaa-ethernet.0/net/<int>/bpids

diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 59f4db2..f55639d 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt

@@ -122,7 +122,7 @@
 or will be computed in the stack. Capable hardware can pass the hash in
 the receive descriptor for the packet; this would usually be the same
 hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
-skb->rx_hash and can be used elsewhere in the stack as a hash of the
+skb->hash and can be used elsewhere in the stack as a hash of the
 packet’s flow.
 
 Each receive hardware queue has an associated list of CPUs to which

diff --git a/Documentation/networking/tcp.txt b/Documentation/networking/tcp.txt
index bdc4c0d..9c7139d 100644
--- a/Documentation/networking/tcp.txt
+++ b/Documentation/networking/tcp.txt

@@ -1,7 +1,7 @@
 TCP protocol
 ============
 
-Last updated: 9 February 2008
+Last updated: 3 June 2017
 
 Contents
 ========
@@ -29,18 +29,19 @@
 A congestion control mechanism can be registered through functions in
 tcp_cong.c. The functions used by the congestion control mechanism are
 registered via passing a tcp_congestion_ops struct to
-tcp_register_congestion_control. As a minimum name, ssthresh,
-cong_avoid must be valid.
+tcp_register_congestion_control. As a minimum, the congestion control
+mechanism must provide a valid name and must implement either ssthresh,
+cong_avoid and undo_cwnd hooks or the "omnipotent" cong_control hook.
 
 Private data for a congestion control mechanism is stored in tp->ca_priv.
 tcp_ca(tp) returns a pointer to this space.  This is preallocated space - it
 is important to check the size of your private data will fit this space, or
-alternatively space could be allocated elsewhere and a pointer to it could
+alternatively, space could be allocated elsewhere and a pointer to it could
 be stored here.
 
 There are three kinds of congestion control algorithms currently: The
 simplest ones are derived from TCP reno (highspeed, scalable) and just
-provide an alternative the congestion window calculation. More complex
+provide an alternative congestion window calculation. More complex
 ones like BIC try to look at other events to provide better
 heuristics.  There are also round trip time based algorithms like
 Vegas and Westwood+.
@@ -49,21 +50,15 @@
 needs to maintain fairness and performance. Please review current
 research and RFC's before developing new modules.
 
-The method that is used to determine which congestion control mechanism is
-determined by the setting of the sysctl net.ipv4.tcp_congestion_control.
-The default congestion control will be the last one registered (LIFO);
-so if you built everything as modules, the default will be reno. If you
-build with the defaults from Kconfig, then CUBIC will be builtin (not a
-module) and it will end up the default.
+The default congestion control mechanism is chosen based on the
+DEFAULT_TCP_CONG Kconfig parameter. If you really want a particular default
+value then you can set it using sysctl net.ipv4.tcp_congestion_control. The
+module will be autoloaded if needed and you will get the expected protocol. If
+you ask for an unknown congestion method, then the sysctl attempt will fail.
 
-If you really want a particular default value then you will need
-to set it with the sysctl.  If you use a sysctl, the module will be autoloaded
-if needed and you will get the expected protocol. If you ask for an
-unknown congestion method, then the sysctl attempt will fail.
-
-If you remove a tcp congestion control module, then you will get the next
+If you remove a TCP congestion control module, then you will get the next
 available one. Since reno cannot be built as a module, and cannot be
-deleted, it will always be available.
+removed, it will always be available.
 
 How the new TCP output machine [nyi] works.
 ===========================================

diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index cbc1b46..e89e36e 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt

@@ -7,6 +7,8 @@
  0. WARNING
  1. Overview
  2. Scheduling algorithm
+   2.1 Main algorithm
+   2.2 Bandwidth reclaiming
  3. Scheduling Real-Time Tasks
    3.1 Definitions
    3.2 Schedulability Analysis for Uniprocessor Systems
@@ -44,6 +46,9 @@
 2. Scheduling algorithm
 ==================
 
+2.1 Main algorithm
+------------------
+
  SCHED_DEADLINE uses three parameters, named "runtime", "period", and
  "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
  "runtime" microseconds of execution time every "period" microseconds, and
@@ -113,6 +118,160 @@
          remaining runtime = remaining runtime + runtime
 
 
+2.2 Bandwidth reclaiming
+------------------------
+
+ Bandwidth reclaiming for deadline tasks is based on the GRUB (Greedy
+ Reclamation of Unused Bandwidth) algorithm [15, 16, 17] and it is enabled
+ when flag SCHED_FLAG_RECLAIM is set.
+
+ The following diagram illustrates the state names for tasks handled by GRUB:
+
+                             ------------
+                 (d)        |   Active   |
+              ------------->|            |
+              |             | Contending |
+              |              ------------
+              |                A      |
+          ----------           |      |
+         |          |          |      |
+         | Inactive |          |(b)   | (a)
+         |          |          |      |
+          ----------           |      |
+              A                |      V
+              |              ------------
+              |             |   Active   |
+              --------------|     Non    |
+                 (c)        | Contending |
+                             ------------
+
+ A task can be in one of the following states:
+
+  - ActiveContending: if it is ready for execution (or executing);
+
+  - ActiveNonContending: if it just blocked and has not yet surpassed the 0-lag
+    time;
+
+  - Inactive: if it is blocked and has surpassed the 0-lag time.
+
+ State transitions:
+
+  (a) When a task blocks, it does not become immediately inactive since its
+      bandwidth cannot be immediately reclaimed without breaking the
+      real-time guarantees. It therefore enters a transitional state called
+      ActiveNonContending. The scheduler arms the "inactive timer" to fire at
+      the 0-lag time, when the task's bandwidth can be reclaimed without
+      breaking the real-time guarantees.
+
+      The 0-lag time for a task entering the ActiveNonContending state is
+      computed as
+
+                        (runtime * dl_period)
+             deadline - ---------------------
+                             dl_runtime
+
+      where runtime is the remaining runtime, while dl_runtime and dl_period
+      are the reservation parameters.
+
+  (b) If the task wakes up before the inactive timer fires, the task re-enters
+      the ActiveContending state and the "inactive timer" is canceled.
+      In addition, if the task wakes up on a different runqueue, then
+      the task's utilization must be removed from the previous runqueue's active
+      utilization and must be added to the new runqueue's active utilization.
+      In order to avoid races between a task waking up on a runqueue while the
+       "inactive timer" is running on a different CPU, the "dl_non_contending"
+      flag is used to indicate that a task is not on a runqueue but is active
+      (so, the flag is set when the task blocks and is cleared when the
+      "inactive timer" fires or when the task  wakes up).
+
+  (c) When the "inactive timer" fires, the task enters the Inactive state and
+      its utilization is removed from the runqueue's active utilization.
+
+  (d) When an inactive task wakes up, it enters the ActiveContending state and
+      its utilization is added to the active utilization of the runqueue where
+      it has been enqueued.
+
+ For each runqueue, the algorithm GRUB keeps track of two different bandwidths:
+
+  - Active bandwidth (running_bw): this is the sum of the bandwidths of all
+    tasks in active state (i.e., ActiveContending or ActiveNonContending);
+
+  - Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
+    runqueue, including the tasks in Inactive state.
+
+
+ The algorithm reclaims the bandwidth of the tasks in Inactive state.
+ It does so by decrementing the runtime of the executing task Ti at a pace equal
+ to
+
+           dq = -max{ Ui, (1 - Uinact) } dt
+
+ where Uinact is the inactive utilization, computed as (this_bq - running_bw),
+ and Ui is the bandwidth of task Ti.
+
+
+ Let's now see a trivial example of two deadline tasks with runtime equal
+ to 4 and period equal to 8 (i.e., bandwidth equal to 0.5):
+
+     A            Task T1
+     |
+     |                               |
+     |                               |
+     |--------                       |----
+     |       |                       V
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+     A            Task T2
+     |
+     |                               |
+     |                               |
+     |       ------------------------|
+     |       |                       V
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+     A            running_bw
+     |
+   1 -----------------               ------
+     |               |               |
+  0.5-               -----------------
+     |                               |
+     |---|---|---|---|---|---|---|---|--------->t
+     0   1   2   3   4   5   6   7   8
+
+
+  - Time t = 0:
+
+    Both tasks are ready for execution and therefore in ActiveContending state.
+    Suppose Task T1 is the first task to start execution.
+    Since there are no inactive tasks, its runtime is decreased as dq = -1 dt.
+
+  - Time t = 2:
+
+    Suppose that task T1 blocks
+    Task T1 therefore enters the ActiveNonContending state. Since its remaining
+    runtime is equal to 2, its 0-lag time is equal to t = 4.
+    Task T2 start execution, with runtime still decreased as dq = -1 dt since
+    there are no inactive tasks.
+
+  - Time t = 4:
+
+    This is the 0-lag time for Task T1. Since it didn't woken up in the
+    meantime, it enters the Inactive state. Its bandwidth is removed from
+    running_bw.
+    Task T2 continues its execution. However, its runtime is now decreased as
+    dq = - 0.5 dt because Uinact = 0.5.
+    Task T2 therefore reclaims the bandwidth unused by Task T1.
+
+  - Time t = 8:
+
+    Task T1 wakes up. It enters the ActiveContending state again, and the
+    running_bw is incremented.
+
+
 3. Scheduling Real-Time Tasks
 =============================
 
@@ -330,6 +489,15 @@
   14 - J. Erickson, U. Devi and S. Baruah. Improved tardiness bounds for
        Global EDF. Proceedings of the 22nd Euromicro Conference on
        Real-Time Systems, 2010.
+  15 - G. Lipari, S. Baruah, Greedy reclamation of unused bandwidth in
+       constant-bandwidth servers, 12th IEEE Euromicro Conference on Real-Time
+       Systems, 2000.
+  16 - L. Abeni, J. Lelli, C. Scordino, L. Palopoli, Greedy CPU reclaiming for
+       SCHED DEADLINE. In Proceedings of the Real-Time Linux Workshop (RTLWS),
+       Dusseldorf, Germany, 2014.
+  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
+       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
+       Computing, 2016.
 
 
 4. Bandwidth management

diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 5338673..773d2bf 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst

@@ -16,6 +16,8 @@
     6-jack in back, 2-jack in front
 6stack-digout
     6-jack with a SPDIF out
+6stack-automute
+    6-jack with headphone jack detection
 
 ALC260
 ======
@@ -62,6 +64,8 @@
     Enables docking station I/O for some Lenovos
 hp-gpio-led
     GPIO LED support on HP laptops
+hp-dock-gpio-mic1-led
+    HP dock with mic LED support
 dell-headset-multi
     Headset jack, which can also be used as mic-in
 dell-headset-dock
@@ -72,6 +76,12 @@
     Combo jack sensing on ALC283
 tpt440-dock
     Pin configs for Lenovo Thinkpad Dock support
+tpt440
+    Lenovo Thinkpad T440s setup
+tpt460
+    Lenovo Thinkpad T460/560 setup
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC66x/67x/892
 ==============
@@ -97,6 +107,8 @@
     Inverted internal mic workaround
 dell-headset-multi
     Headset jack, which can also be used as mic-in
+dual-codecs
+    Lenovo laptops with dual codecs
 
 ALC680
 ======
@@ -114,6 +126,8 @@
     Inverted internal mic workaround
 no-primary-hp
     VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
+dual-codecs
+    ALC1220 dual codecs for Gaming mobos
 
 ALC861/660
 ==========
@@ -206,65 +220,47 @@
 
 Conexant 5045
 =============
-laptop-hpsense
-    Laptop with HP sense (old model laptop)
-laptop-micsense
-    Laptop with Mic sense (old model fujitsu)
-laptop-hpmicsense
-    Laptop with HP and Mic senses
-benq
-    Benq R55E
-laptop-hp530
-    HP 530 laptop
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
+toshiba-p105
+    Toshiba P105 quirk
+hp-530
+    HP 530 quirk
 
 Conexant 5047
 =============
-laptop
-    Basic Laptop config 
-laptop-hp
-    Laptop config for some HP models (subdevice 30A5)
-laptop-eapd
-    Laptop config with EAPD support
-test
-    for testing/debugging purpose, almost all controls can be
-    adjusted.  Appearing only when compiled with $CONFIG_SND_DEBUG=y
+cap-mix-amp
+    Fix max input level on mixer widget
 
 Conexant 5051
 =============
-laptop
-    Basic Laptop config (default)
-hp
-    HP Spartan laptop
-hp-dv6736
-    HP dv6736
-hp-f700
-    HP Compaq Presario F700
-ideapad
-    Lenovo IdeaPad laptop
-toshiba
-    Toshiba Satellite M300
+lenovo-x200
+    Lenovo X200 quirk
 
 Conexant 5066
 =============
-laptop
-    Basic Laptop config (default)
-hp-laptop
-    HP laptops, e g G60
-asus
-    Asus K52JU, Lenovo G560
-dell-laptop
-    Dell laptops
-dell-vostro
-    Dell Vostro
-olpc-xo-1_5
-    OLPC XO 1.5
-ideapad
-    Lenovo IdeaPad U150
+stereo-dmic
+    Workaround for inverted stereo digital mic
+gpio1
+    Enable GPIO1 pin
+headphone-mic-pin
+    Enable headphone mic NID 0x18 without detection
+tp410
+    Thinkpad T400 & co quirks
 thinkpad
-    Lenovo Thinkpad
+    Thinkpad mute/mic LED quirk
+lemote-a1004
+    Lemote A1004 quirk
+lemote-a1205
+    Lemote A1205 quirk
+olpc-xo
+    OLPC XO quirk
+mute-led-eapd
+    Mute LED control via EAPD
+hp-dock
+    HP dock support
+mute-led-gpio
+    Mute LED control via GPIO
 
 STAC9200
 ========
@@ -444,6 +440,8 @@
     Dell desktops/laptops
 alienware
     Alienware M17x
+asus-mobo
+    Pin configs for ASUS mobo with 5.1/SPDIF out
 auto
     BIOS setup (default)
 
@@ -477,6 +475,8 @@
     Pin fixup for HP Envy TS bass speaker (NID 0x10)
 hp-bnb13-eq
     Hardware equalizer setup for HP laptops
+hp-envy-ts-bass
+    HP Envy TS bass support
 auto
     BIOS setup (default)
 
@@ -496,10 +496,22 @@
 
 Cirrus Logic CS4206/4207
 ========================
+mbp53
+    MacBook Pro 5,3
 mbp55
     MacBook Pro 5,5
 imac27
     IMac 27 Inch
+imac27_122
+    iMac 12,2
+apple
+    Generic Apple quirk
+mbp101
+    MacBookPro 10,1
+mbp81
+    MacBookPro 8,1
+mba42
+    MacBookAir 4,2
 auto
     BIOS setup (default)
 
@@ -509,6 +521,10 @@
     MacBook Air 6,1 and 6,2
 gpio0
     Enable GPIO 0 amp
+mbp11
+    MacBookPro 11,2
+macmini
+    MacMini 7,1
 auto
     BIOS setup (default)
 

diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index 6eaf576..2dcaf9a 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt

@@ -194,32 +194,9 @@
 
 Another approach is to offload RCU callback processing to "rcuo" kthreads
 using the CONFIG_RCU_NOCB_CPU=y Kconfig option.  The specific CPUs to
-offload may be selected via several methods:
-
-1.	One of three mutually exclusive Kconfig options specify a
-	build-time default for the CPUs to offload:
-
-	a.	The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in
-		no CPUs being offloaded.
-
-	b.	The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes
-		CPU 0 to be offloaded.
-
-	c.	The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all
-		CPUs to be offloaded.  Note that the callbacks will be
-		offloaded to "rcuo" kthreads, and that those kthreads
-		will in fact run on some CPU.  However, this approach
-		gives fine-grained control on exactly which CPUs the
-		callbacks run on, along with their scheduling priority
-		(including the default of SCHED_OTHER), and it further
-		allows this control to be varied dynamically at runtime.
-
-2.	The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated
-	list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1,
-	3, 4, and 5.  The specified CPUs will be offloaded in addition to
-	any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or
-	CONFIG_RCU_NOCB_CPU_ALL=y.  This means that the "rcu_nocbs=" boot
-	parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y.
+offload may be selected using The "rcu_nocbs=" kernel boot parameter,
+which takes a comma-separated list of CPUs and CPU ranges, for example,
+"1,3-5" selects CPUs 1, 3, 4, and 5.
 
 The offloaded CPUs will never queue RCU callbacks, and therefore RCU
 never prevents offloaded CPUs from entering either dyntick-idle mode

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 94a987b..fff8ff6 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt

@@ -1609,7 +1609,7 @@
   <idle>-0       3dN.2   14us : sched_avg_update <-__cpu_load_update
   <idle>-0       3dN.2   14us : _raw_spin_unlock <-cpu_load_update_nohz
   <idle>-0       3dN.2   14us : sub_preempt_count <-_raw_spin_unlock
-  <idle>-0       3dN.1   15us : calc_load_exit_idle <-tick_nohz_idle_exit
+  <idle>-0       3dN.1   15us : calc_load_nohz_stop <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_cancel <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : hrtimer_try_to_cancel <-hrtimer_cancel

diff --git a/MAINTAINERS b/MAINTAINERS
index 9e98464..d357695e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1172,7 +1172,7 @@
 
 ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
-M:	Ryan Mallon <rmallon@gmail.com>
+M:	Alexander Sverdlin <alexander.sverdlin@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-ep93xx/
@@ -1489,13 +1489,15 @@
 M:	Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	arch/arm/mach-mvebu/
-F:	drivers/rtc/rtc-armada38x.c
 F:	arch/arm/boot/dts/armada*
 F:	arch/arm/boot/dts/kirkwood*
+F:	arch/arm/configs/mvebu_*_defconfig
+F:	arch/arm/mach-mvebu/
 F:	arch/arm64/boot/dts/marvell/armada*
 F:	drivers/cpufreq/mvebu-cpufreq.c
-F:	arch/arm/configs/mvebu_*_defconfig
+F:	drivers/irqchip/irq-armada-370-xp.c
+F:	drivers/irqchip/irq-mvebu-*
+F:	drivers/rtc/rtc-armada38x.c
 
 ARM/Marvell Berlin SoC support
 M:	Jisheng Zhang <jszhang@marvell.com>
@@ -1721,7 +1723,6 @@
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:	Kukjin Kim <kgene@kernel.org>
 M:	Krzysztof Kozlowski <krzk@kernel.org>
-R:	Javier Martinez Canillas <javier@osg.samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 Q:	https://patchwork.kernel.org/project/linux-samsung-soc/list/
@@ -1829,7 +1830,6 @@
 ARM/STI ARCHITECTURE
 M:	Patrice Chotard <patrice.chotard@st.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	kernel@stlinux.com
 W:	http://www.stlinux.com
 S:	Maintained
 F:	arch/arm/mach-sti/
@@ -2322,6 +2322,15 @@
 F:	drivers/input/touchscreen/atmel_mxt_ts.c
 F:	include/linux/platform_data/atmel_mxt_ts.h
 
+ATOMIC INFRASTRUCTURE
+M:	Will Deacon <will.deacon@arm.com>
+M:	Peter Zijlstra <peterz@infradead.org>
+R:	Boqun Feng <boqun.feng@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	arch/*/include/asm/atomic*.h
+F:	include/*/atomic*.h
+
 ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
 M:	Bradley Grove <linuxdrivers@attotech.com>
 L:	linux-scsi@vger.kernel.org
@@ -2964,7 +2973,7 @@
 
 C6X ARCHITECTURE
 M:	Mark Salter <msalter@redhat.com>
-M:	Aurelien Jacquiot <a-jacquiot@ti.com>
+M:	Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
 L:	linux-c6x-dev@linux-c6x.org
 W:	http://www.linux-c6x.org/wiki/index.php/Main_Page
 S:	Maintained
@@ -5622,7 +5631,7 @@
 
 GENWQE (IBM Generic Workqueue Card)
 M:	Frank Haverkamp <haver@linux.vnet.ibm.com>
-M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 S:	Supported
 F:	drivers/misc/genwqe/
 
@@ -5667,7 +5676,6 @@
 
 GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
-M:	Alexandre Courbot <gnurou@gmail.com>
 L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
 S:	Maintained
@@ -7143,7 +7151,7 @@
 F:	drivers/media/platform/rcar_jpu.c
 
 JSM Neo PCI based serial card
-M:	Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+M:	Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
 L:	linux-serial@vger.kernel.org
 S:	Maintained
 F:	drivers/tty/serial/jsm/
@@ -7556,7 +7564,7 @@
 F:	drivers/ata/sata_promise.*
 
 LIBLOCKDEP
-M:	Sasha Levin <sasha.levin@oracle.com>
+M:	Sasha Levin <alexander.levin@verizon.com>
 S:	Maintained
 F:	tools/lib/lockdep/
 
@@ -7707,7 +7715,7 @@
 
 LIVE PATCHING
 M:	Josh Poimboeuf <jpoimboe@redhat.com>
-M:	Jessica Yu <jeyu@redhat.com>
+M:	Jessica Yu <jeyu@kernel.org>
 M:	Jiri Kosina <jikos@kernel.org>
 M:	Miroslav Benes <mbenes@suse.cz>
 R:	Petr Mladek <pmladek@suse.com>
@@ -8508,7 +8516,7 @@
 F:	drivers/media/radio/radio-miropcm20*
 
 MELLANOX MLX4 core VPI driver
-M:	Yishai Hadas <yishaih@mellanox.com>
+M:	Tariq Toukan <tariqt@mellanox.com>
 L:	netdev@vger.kernel.org
 L:	linux-rdma@vger.kernel.org
 W:	http://www.mellanox.com
@@ -8516,7 +8524,6 @@
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx4/
 F:	include/linux/mlx4/
-F:	include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX4 IB driver
 M:	Yishai Hadas <yishaih@mellanox.com>
@@ -8526,6 +8533,7 @@
 S:	Supported
 F:	drivers/infiniband/hw/mlx4/
 F:	include/linux/mlx4/
+F:	include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX5 core VPI driver
 M:	Saeed Mahameed <saeedm@mellanox.com>
@@ -8538,7 +8546,6 @@
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx5/core/
 F:	include/linux/mlx5/
-F:	include/uapi/rdma/mlx5-abi.h
 
 MELLANOX MLX5 IB driver
 M:	Matan Barak <matanb@mellanox.com>
@@ -8549,6 +8556,7 @@
 S:	Supported
 F:	drivers/infiniband/hw/mlx5/
 F:	include/linux/mlx5/
+F:	include/uapi/rdma/mlx5-abi.h
 
 MELEXIS MLX90614 DRIVER
 M:	Crt Mori <cmo@melexis.com>
@@ -8588,7 +8596,7 @@
 F:	drivers/media/dvb-frontends/mn88473*
 
 MODULE SUPPORT
-M:	Jessica Yu <jeyu@redhat.com>
+M:	Jessica Yu <jeyu@kernel.org>
 M:	Rusty Russell <rusty@rustcorp.com.au>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
 S:	Maintained
@@ -10450,7 +10458,7 @@
 
 PXA RTC DRIVER
 M:	Robert Jarzmik <robert.jarzmik@free.fr>
-L:	rtc-linux@googlegroups.com
+L:	linux-rtc@vger.kernel.org
 S:	Maintained
 
 QAT DRIVER
@@ -10757,7 +10765,7 @@
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:	Alessandro Zummo <a.zummo@towertech.it>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
-L:	rtc-linux@googlegroups.com
+L:	linux-rtc@vger.kernel.org
 Q:	http://patchwork.ozlabs.org/project/rtc-linux/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
 S:	Maintained
@@ -11268,7 +11276,6 @@
 
 STI CEC DRIVER
 M:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
-L:	kernel@stlinux.com
 S:	Maintained
 F:	drivers/staging/media/st-cec/
 F:	Documentation/devicetree/bindings/media/stih-cec.txt
@@ -11778,6 +11785,7 @@
 S:	Supported
 F:	arch/arm/mach-davinci/
 F:	drivers/i2c/busses/i2c-davinci.c
+F:	arch/arm/boot/dts/da850*
 
 TI DAVINCI SERIES MEDIA DRIVER
 M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
@@ -13463,6 +13471,17 @@
 T:	git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
 S:	Maintained
 
+UUID HELPERS
+M:	Christoph Hellwig <hch@lst.de>
+R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-kernel@vger.kernel.org
+T:	git git://git.infradead.org/users/hch/uuid.git
+F:	lib/uuid.c
+F:	lib/test_uuid.c
+F:	include/linux/uuid.h
+F:	include/uapi/linux/uuid.h
+S:	Maintained
+
 UVESAFB DRIVER
 M:	Michal Januszewski <spock@gentoo.org>
 L:	linux-fbdev@vger.kernel.org
@@ -13861,7 +13880,7 @@
 F:	drivers/net/wireless/wl3501*
 
 WOLFSON MICROELECTRONICS DRIVERS
-L:	patches@opensource.wolfsonmicro.com
+L:	patches@opensource.cirrus.com
 T:	git https://github.com/CirrusLogic/linux-drivers.git
 W:	https://github.com/CirrusLogic/linux-drivers/wiki
 S:	Supported

diff --git a/Makefile b/Makefile
index 63e10bd..283c623 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -1437,7 +1437,7 @@
 	@echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
 	@echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
 	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
+	@echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
 	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
 	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
 	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'

diff --git a/arch/Kconfig b/arch/Kconfig
index 6c00e5b..f76b214 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -867,4 +867,13 @@
 config ARCH_WANT_RELAX_ORDER
 	bool
 
+config REFCOUNT_FULL
+	bool "Perform full reference count validation at the expense of speed"
+	help
+	  Enabling this switches the refcounting infrastructure from a fast
+	  unchecked atomic_t implementation to a fully state checked
+	  implementation, which can be (slightly) slower but provides protections
+	  against various use-after-free conditions that can be used in
+	  security flaw exploits.
+
 source "kernel/gcov/Kconfig"

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242d..4104a08 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h

@@ -86,8 +86,6 @@ struct task_struct;
 #define TSK_K_BLINK(tsk)	TSK_K_REG(tsk, 4)
 #define TSK_K_FP(tsk)		TSK_K_REG(tsk, 0)
 
-#define thread_saved_pc(tsk)	TSK_K_BLINK(tsk)
-
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 

diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 3e25e8d..2e13683 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c

@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f..bf3285b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig

@@ -1416,6 +1416,7 @@
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
 	config VMSPLIT_3G_OPT
+		depends on !ARM_LPAE
 		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
@@ -2061,6 +2062,23 @@
 	  is only useful for kernels that may run on systems that have
 	  UEFI firmware.
 
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
+	  NOTE: This does *NOT* enable or encourage the use of DMI quirks,
+	  i.e., the the practice of identifying the platform via DMI to
+	  decide whether certain workarounds for buggy hardware and/or
+	  firmware need to be enabled. This would require the DMI subsystem
+	  to be enabled much earlier than we do on ARM, which is non-trivial.
+
 endmenu
 
 menu "CPU Power Management"

diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 9d5dc4f..a17ca8d 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S

@@ -17,14 +17,13 @@
 		@ there.
 		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-		mov	r0, r0
+ AR_CLASS(	mov	r0, r0		)
+  M_CLASS(	nop.w			)
 #endif
 		.endm
 
 		.macro	__EFI_HEADER
 #ifdef CONFIG_EFI_STUB
-		b	__efi_start
-
 		.set	start_offset, __efi_start - start
 		.org	start + 0x3c
 		@

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 7c711ba..8a75687 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S

@@ -130,19 +130,22 @@
 		.rept	7
 		__nop
 		.endr
-   ARM(		mov	r0, r0		)
-   ARM(		b	1f		)
- THUMB(		badr	r12, 1f		)
- THUMB(		bx	r12		)
+#ifndef CONFIG_THUMB2_KERNEL
+		mov	r0, r0
+#else
+ AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
+  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
+		.thumb
+#endif
+		W(b)	1f
 
 		.word	_magic_sig	@ Magic numbers to help the loader
 		.word	_magic_start	@ absolute load/run zImage address
 		.word	_magic_end	@ zImage end address
 		.word	0x04030201	@ endianness flag
 
- THUMB(		.thumb			)
-1:		__EFI_HEADER
-
+		__EFI_HEADER
+1:
  ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
  AR_CLASS(	mrs	r9, cpsr	)
 #ifdef CONFIG_ARM_VIRT_EXT

diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index c5d2589..fc864a8 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts

@@ -220,7 +220,7 @@
 
 	mmc1_pins: pinmux_mmc1_pins {
 		pinctrl-single,pins = <
-			AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7)		/* spi0_cs1.gpio0_6 */
+			AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7)		/* uart0_rtsn.gpio1_9 */
 		>;
 	};
 
@@ -280,10 +280,6 @@
 			AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7)	/* nKbdReset - gpmc_ad13.gpio1_13 */
 			AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7)	/* nDispReset - gpmc_ad14.gpio1_14 */
 			AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7)	/* USB1_enPower - gpmc_a1.gpio1_17 */
-			/* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */
-			AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */
-			AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */
-			AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7)	/* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */
 			/* PDI Bus - Battery system */
 			AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7)	/* nBattReset  gpmc_a0.gpio1_16 */
 			AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7)	/* BattPDIData gpmc_ad15.gpio1_15 */
@@ -384,7 +380,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc1_pins>;
 	bus-width = <4>;
-	cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
+	cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&vmmcsd_fixed>;
 };
 

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 561f27d..9444a9a 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi

@@ -3,6 +3,11 @@
 #include <dt-bindings/clock/bcm2835-aux.h>
 #include <dt-bindings/gpio/gpio.h>
 
+/* firmware-provided startup stubs live here, where the secondary CPUs are
+ * spinning.
+ */
+/memreserve/ 0x00000000 0x00001000;
+
 /* This include file covers the common peripherals and configuration between
  * bcm2835 and bcm2836 implementations, leaving the CPU configuration to
  * bcm2835.dtsi and bcm2836.dtsi.

diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dts b/arch/arm/boot/dts/imx6ul-14x14-evk.dts
index f18e1f1..d2be8aa 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dts
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dts

@@ -120,10 +120,16 @@
 
 		ethphy0: ethernet-phy@2 {
 			reg = <2>;
+			micrel,led-mode = <1>;
+			clocks = <&clks IMX6UL_CLK_ENET_REF>;
+			clock-names = "rmii-ref";
 		};
 
 		ethphy1: ethernet-phy@1 {
 			reg = <1>;
+			micrel,led-mode = <1>;
+			clocks = <&clks IMX6UL_CLK_ENET2_REF>;
+			clock-names = "rmii-ref";
 		};
 	};
 };

diff --git a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
index b6f2682..66f615a 100644
--- a/arch/arm/boot/dts/keystone-k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/keystone-k2l-netcp.dtsi

@@ -137,8 +137,8 @@
 	/* NetCP address range */
 	ranges = <0 0x26000000 0x1000000>;
 
-	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>, <&clkosr>;
-	clock-names = "pa_clk", "ethss_clk", "cpts", "osr_clk";
+	clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
+	clock-names = "pa_clk", "ethss_clk", "cpts";
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 0>,

diff --git a/arch/arm/boot/dts/keystone-k2l.dtsi b/arch/arm/boot/dts/keystone-k2l.dtsi
index b58e7eb..1486504 100644
--- a/arch/arm/boot/dts/keystone-k2l.dtsi
+++ b/arch/arm/boot/dts/keystone-k2l.dtsi

@@ -232,6 +232,14 @@
 			};
 		};
 
+		osr: sram@70000000 {
+			compatible = "mmio-sram";
+			reg = <0x70000000 0x10000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			clocks = <&clkosr>;
+		};
+
 		dspgpio0: keystone_dsp_gpio@02620240 {
 			compatible = "ti,keystone-dsp-gpio";
 			gpio-controller;

diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index 1aeeacb..d4f600d 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi

@@ -558,10 +558,11 @@
 		};
 
 		r_ccu: clock@1f01400 {
-			compatible = "allwinner,sun50i-a64-r-ccu";
+			compatible = "allwinner,sun8i-h3-r-ccu";
 			reg = <0x01f01400 0x100>;
-			clocks = <&osc24M>, <&osc32k>, <&iosc>;
-			clock-names = "hosc", "losc", "iosc";
+			clocks = <&osc24M>, <&osc32k>, <&iosc>,
+				 <&ccu 9>;
+			clock-names = "hosc", "losc", "iosc", "pll-periph";
 			#clock-cells = <1>;
 			#reset-cells = <1>;
 		};

diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index 33a8eb2..06e2331 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts

@@ -1,4 +1,4 @@
-#include <versatile-ab.dts>
+#include "versatile-ab.dts"
 
 / {
 	model = "ARM Versatile PB";

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index cf06247..2b913f1 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c

@@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
 	return ret;
 }
 
-typedef void (*phys_reset_t)(unsigned long);
+typedef typeof(cpu_reset) phys_reset_t;
 
 void mcpm_cpu_power_down(void)
 {
@@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void)
 	 * on the CPU.
 	 */
 	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
-	phys_reset(__pa_symbol(mcpm_entry_point));
+	phys_reset(__pa_symbol(mcpm_entry_point), false);
 
 	/* should never get here */
 	BUG();
@@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg)
 	__mcpm_cpu_down(cpu, cluster);
 
 	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
-	phys_reset(__pa_symbol(mcpm_entry_point));
+	phys_reset(__pa_symbol(mcpm_entry_point), false);
 	BUG();
 }
 

diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 36ec9c8..3234fe9 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h

@@ -19,7 +19,8 @@ struct dev_archdata {
 #ifdef CONFIG_XEN
 	const struct dma_map_ops *dev_dma_ops;
 #endif
-	bool dma_coherent;
+	unsigned int dma_coherent:1;
+	unsigned int dma_ops_setup:1;
 };
 
 struct omap_device;

diff --git a/arch/arm/include/asm/dmi.h b/arch/arm/include/asm/dmi.h
new file mode 100644
index 0000000..df2d2ff
--- /dev/null
+++ b/arch/arm/include/asm/dmi.h

@@ -0,0 +1,19 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#define dmi_early_remap(x, l)		memremap(x, l, MEMREMAP_WB)
+#define dmi_early_unmap(x, l)		memunmap(x)
+#define dmi_remap(x, l)			memremap(x, l, MEMREMAP_WB)
+#define dmi_unmap(x)			memunmap(x)
+#define dmi_alloc(l)			kzalloc(l, GFP_KERNEL)
+
+#endif

diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 302240c..a0d726a 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h

@@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t;
 #define pgprot_noncached(prot)	(prot)
 #define pgprot_writecombine(prot) (prot)
 #define pgprot_dmacoherent(prot) (prot)
+#define pgprot_device(prot)	(prot)
 
 
 /*

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a95..4e80bf7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c

@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
 	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
 
-		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+		if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
 			cacheid = 0;
 		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 572a8df..c9a0a52 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c

@@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
  */
 static void ipi_cpu_stop(unsigned int cpu)
 {
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (system_state <= SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();

diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 570ed4a..5386528 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S

@@ -104,7 +104,6 @@
 	@  - Write permission implies XN: disabled
 	@  - Instruction cache: enabled
 	@  - Data/Unified cache: enabled
-	@  - Memory alignment checks: enabled
 	@  - MMU: enabled (this code must be run from an identity mapping)
 	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
 	ldr	r2, =HSCTLR_MASK
@@ -112,8 +111,8 @@
 	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
 	ldr	r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
 	and	r1, r1, r2
- ARM(	ldr	r2, =(HSCTLR_M | HSCTLR_A)			)
- THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
+ ARM(	ldr	r2, =(HSCTLR_M)					)
+ THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_TE)			)
 	orr	r1, r1, r2
 	orr	r0, r0, r1
 	mcr	p15, 4, r0, c1, c0, 0	@ HSCR

diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 841e924..cbd959b 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig

@@ -1,6 +1,7 @@
 menuconfig ARCH_AT91
 	bool "Atmel SoCs"
 	depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7
+	select ARM_CPU_SUSPEND if PM
 	select COMMON_CLK_AT91
 	select GPIOLIB
 	select PINCTRL

diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c
index efb8035..b5cc05d 100644
--- a/arch/arm/mach-davinci/pm.c
+++ b/arch/arm/mach-davinci/pm.c

@@ -153,7 +153,8 @@ int __init davinci_pm_init(void)
 	davinci_sram_suspend = sram_alloc(davinci_cpu_suspend_sz, NULL);
 	if (!davinci_sram_suspend) {
 		pr_err("PM: cannot allocate SRAM memory\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto no_sram_mem;
 	}
 
 	davinci_sram_push(davinci_sram_suspend, davinci_cpu_suspend,
@@ -161,6 +162,10 @@ int __init davinci_pm_init(void)
 
 	suspend_set_ops(&davinci_pm_ops);
 
+	return 0;
+
+no_sram_mem:
+	iounmap(pm_config.ddrpsc_reg_base);
 no_ddrpsc_mem:
 	iounmap(pm_config.ddrpll_reg_base);
 no_ddrpll_mem:

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c742dfd..bd83c53 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c

@@ -2311,7 +2311,14 @@ int arm_iommu_attach_device(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
 
-static void __arm_iommu_detach_device(struct device *dev)
+/**
+ * arm_iommu_detach_device
+ * @dev: valid struct device pointer
+ *
+ * Detaches the provided device from a previously attached map.
+ * This voids the dma operations (dma_map_ops pointer)
+ */
+void arm_iommu_detach_device(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping;
 
@@ -2324,22 +2331,10 @@ static void __arm_iommu_detach_device(struct device *dev)
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
 	to_dma_iommu_mapping(dev) = NULL;
+	set_dma_ops(dev, NULL);
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
-
-/**
- * arm_iommu_detach_device
- * @dev: valid struct device pointer
- *
- * Detaches the provided device from a previously attached map.
- * This voids the dma operations (dma_map_ops pointer)
- */
-void arm_iommu_detach_device(struct device *dev)
-{
-	__arm_iommu_detach_device(dev);
-	set_dma_ops(dev, NULL);
-}
 EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
 
 static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
@@ -2379,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 	if (!mapping)
 		return;
 
-	__arm_iommu_detach_device(dev);
+	arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }
 
@@ -2430,9 +2425,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		dev->dma_ops = xen_dma_ops;
 	}
 #endif
+	dev->archdata.dma_ops_setup = true;
 }
 
 void arch_teardown_dma_ops(struct device *dev)
 {
+	if (!dev->archdata.dma_ops_setup)
+		return;
+
 	arm_teardown_iommu_dma_ops(dev);
 }

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 2239fde..f0701d8 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c

@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 31af3cb5..e46a6a4 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c

@@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void)
 
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
+	if (!memblock_limit)
+		memblock_limit = arm_lowmem_limit;
+
 	/*
 	 * Round the memblock limit down to a pmd size.  This
 	 * helps to ensure that we will allocate memory from the
 	 * last full pmd, which should be mapped.
 	 */
-	if (memblock_limit)
-		memblock_limit = round_down(memblock_limit, PMD_SIZE);
-	if (!memblock_limit)
-		memblock_limit = arm_lowmem_limit;
+	memblock_limit = round_down(memblock_limit, PMD_SIZE);
 
 	if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
 		if (memblock_end_of_DRAM() > arm_lowmem_limit) {

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3dcd7ec..b2024db 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig

@@ -1084,10 +1084,6 @@
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
-config KEYS_COMPAT
-	def_bool y
-	depends on COMPAT && KEYS
-
 endmenu
 
 menu "Power management options"

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index c7f669f..166c9ef 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi

@@ -406,8 +406,9 @@
 		r_ccu: clock@1f01400 {
 			compatible = "allwinner,sun50i-a64-r-ccu";
 			reg = <0x01f01400 0x100>;
-			clocks = <&osc24M>, <&osc32k>, <&iosc>;
-			clock-names = "hosc", "losc", "iosc";
+			clocks = <&osc24M>, <&osc32k>, <&iosc>,
+				 <&ccu 11>;
+			clock-names = "hosc", "losc", "iosc", "pll-periph";
 			#clock-cells = <1>;
 			#reset-cells = <1>;
 		};

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
index 4d314a2..732e2e0 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi

@@ -40,7 +40,7 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "sunxi-h3-h5.dtsi"
+#include <arm/sunxi-h3-h5.dtsi>
 
 / {
 	cpus {

diff --git a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi b/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi
deleted file mode 120000
index 036f01d..0000000
--- a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi
+++ /dev/null

@@ -1 +0,0 @@
-../../../../arm/boot/dts/sunxi-h3-h5.dtsi
\ No newline at end of file

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index 75bce2d..49f6a62 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts

@@ -81,6 +81,45 @@
 		};
 	};
 
+	reg_sys_5v: regulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "SYS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_vdd_3v3: regulator@1 {
+		compatible = "regulator-fixed";
+		regulator-name = "VDD_3V3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	reg_5v_hub: regulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "5V_HUB";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		gpio = <&gpio0 7 0>;
+		regulator-always-on;
+		vin-supply = <&reg_sys_5v>;
+	};
+
+	wl1835_pwrseq: wl1835-pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		/* WLAN_EN GPIO */
+		reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
+		clocks = <&pmic>;
+		clock-names = "ext_clock";
+		power-off-delay-us = <10>;
+	};
+
 	soc {
 		spi0: spi@f7106000 {
 			status = "ok";
@@ -256,11 +295,31 @@
 
 		/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
 
-		dwmmc_2: dwmmc2@f723f000 {
-			ti,non-removable;
+		dwmmc_0: dwmmc0@f723d000 {
+			cap-mmc-highspeed;
 			non-removable;
-			/* WL_EN */
-			vmmc-supply = <&wlan_en_reg>;
+			bus-width = <0x8>;
+			vmmc-supply = <&ldo19>;
+		};
+
+		dwmmc_1: dwmmc1@f723e000 {
+			card-detect-delay = <200>;
+			cap-sd-highspeed;
+			sd-uhs-sdr12;
+			sd-uhs-sdr25;
+			sd-uhs-sdr50;
+			vqmmc-supply = <&ldo7>;
+			vmmc-supply = <&ldo10>;
+			bus-width = <0x4>;
+			disable-wp;
+			cd-gpios = <&gpio1 0 1>;
+		};
+
+		dwmmc_2: dwmmc2@f723f000 {
+			bus-width = <0x4>;
+			non-removable;
+			vmmc-supply = <&reg_vdd_3v3>;
+			mmc-pwrseq = <&wl1835_pwrseq>;
 
 			#address-cells = <0x1>;
 			#size-cells = <0x0>;
@@ -272,18 +331,6 @@
 				interrupts = <3 IRQ_TYPE_EDGE_RISING>;
 			};
 		};
-
-		wlan_en_reg: regulator@1 {
-			compatible = "regulator-fixed";
-			regulator-name = "wlan-en-regulator";
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <1800000>;
-			/* WLAN_EN GPIO */
-			gpio = <&gpio0 5 0>;
-			/* WLAN card specific delay */
-			startup-delay-us = <70000>;
-			enable-active-high;
-		};
 	};
 
 	leds {
@@ -330,6 +377,7 @@
 	pmic: pmic@f8000000 {
 		compatible = "hisilicon,hi655x-pmic";
 		reg = <0x0 0xf8000000 0x0 0x1000>;
+		#clock-cells = <0>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
 		pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;

diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 1e5129b..5013e4b 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi

@@ -725,20 +725,10 @@
 			status = "disabled";
 		};
 
-		fixed_5v_hub: regulator@0 {
-			compatible = "regulator-fixed";
-			regulator-name = "fixed_5v_hub";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			regulator-boot-on;
-			gpio = <&gpio0 7 0>;
-			regulator-always-on;
-		};
-
 		usb_phy: usbphy {
 			compatible = "hisilicon,hi6220-usb-phy";
 			#phy-cells = <0>;
-			phy-supply = <&fixed_5v_hub>;
+			phy-supply = <&reg_5v_hub>;
 			hisilicon,peripheral-syscon = <&sys_ctrl>;
 		};
 
@@ -766,17 +756,12 @@
 
 		dwmmc_0: dwmmc0@f723d000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			cap-mmc-highspeed;
-			non-removable;
 			reg = <0x0 0xf723d000 0x0 0x1000>;
 			interrupts = <0x0 0x48 0x4>;
 			clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
 			reset-names = "reset";
-			bus-width = <0x8>;
-			vmmc-supply = <&ldo19>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
 				     &emmc_cfg_func &emmc_rst_cfg_func>;
@@ -784,13 +769,7 @@
 
 		dwmmc_1: dwmmc1@f723e000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
-			card-detect-delay = <200>;
 			hisilicon,peripheral-syscon = <&ao_ctrl>;
-			cap-sd-highspeed;
-			sd-uhs-sdr12;
-			sd-uhs-sdr25;
-			sd-uhs-sdr50;
 			reg = <0x0 0xf723e000 0x0 0x1000>;
 			interrupts = <0x0 0x49 0x4>;
 			#address-cells = <0x1>;
@@ -799,11 +778,6 @@
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
 			reset-names = "reset";
-			vqmmc-supply = <&ldo7>;
-			vmmc-supply = <&ldo10>;
-			bus-width = <0x4>;
-			disable-wp;
-			cd-gpios = <&gpio1 0 1>;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
 			pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
@@ -811,15 +785,12 @@
 
 		dwmmc_2: dwmmc2@f723f000 {
 			compatible = "hisilicon,hi6220-dw-mshc";
-			num-slots = <0x1>;
 			reg = <0x0 0xf723f000 0x0 0x1000>;
 			interrupts = <0x0 0x4a 0x4>;
 			clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
 			clock-names = "ciu", "biu";
 			resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
 			reset-names = "reset";
-			bus-width = <0x4>;
-			broken-cd;
 			pinctrl-names = "default", "idle";
 			pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
 			pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;

diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index ac8df52..b4bc42e 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi

@@ -231,8 +231,7 @@
 			cpm_crypto: crypto@800000 {
 				compatible = "inside-secure,safexcel-eip197";
 				reg = <0x800000 0x200000>;
-				interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
-				| IRQ_TYPE_LEVEL_HIGH)>,
+				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,

diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 7740a75..6e20588 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi

@@ -221,8 +221,7 @@
 			cps_crypto: crypto@800000 {
 				compatible = "inside-secure,safexcel-eip197";
 				reg = <0x800000 0x200000>;
-				interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
-				| IRQ_TYPE_LEVEL_HIGH)>,
+				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>,

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 65cdd87..97c123e 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig

@@ -68,6 +68,7 @@
 CONFIG_PCIE_ARMADA_8K=y
 CONFIG_PCI_AARDVARK=y
 CONFIG_PCIE_RCAR=y
+CONFIG_PCIE_ROCKCHIP=m
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
 CONFIG_ARM64_VA_BITS_48=y
@@ -208,6 +209,8 @@
 CONFIG_WL18XX=m
 CONFIG_WLCORE_SDIO=m
 CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_ADC=m
+CONFIG_KEYBOARD_CROS_EC=y
 CONFIG_KEYBOARD_GPIO=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_PM8941_PWRKEY=y
@@ -263,6 +266,7 @@
 CONFIG_SPI_ORION=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_QUP=y
+CONFIG_SPI_ROCKCHIP=y
 CONFIG_SPI_S3C64XX=y
 CONFIG_SPI_SPIDEV=m
 CONFIG_SPMI=y
@@ -292,6 +296,7 @@
 CONFIG_CPU_THERMAL=y
 CONFIG_THERMAL_EMULATION=y
 CONFIG_EXYNOS_THERMAL=y
+CONFIG_ROCKCHIP_THERMAL=m
 CONFIG_WATCHDOG=y
 CONFIG_S3C2410_WATCHDOG=y
 CONFIG_MESON_GXBB_WATCHDOG=m
@@ -300,12 +305,14 @@
 CONFIG_BCM2835_WDT=y
 CONFIG_MFD_CROS_EC=y
 CONFIG_MFD_CROS_EC_I2C=y
+CONFIG_MFD_CROS_EC_SPI=y
 CONFIG_MFD_EXYNOS_LPASS=m
 CONFIG_MFD_HI655X_PMIC=y
 CONFIG_MFD_MAX77620=y
 CONFIG_MFD_SPMI_PMIC=y
 CONFIG_MFD_RK808=y
 CONFIG_MFD_SEC_CORE=y
+CONFIG_REGULATOR_FAN53555=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_GPIO=y
 CONFIG_REGULATOR_HI655X=y
@@ -473,8 +480,10 @@
 CONFIG_EXTCON_USB_GPIO=y
 CONFIG_IIO=y
 CONFIG_EXYNOS_ADC=y
+CONFIG_ROCKCHIP_SARADC=m
 CONFIG_PWM=y
 CONFIG_PWM_BCM2835=m
+CONFIG_PWM_CROS_EC=m
 CONFIG_PWM_MESON=m
 CONFIG_PWM_ROCKCHIP=y
 CONFIG_PWM_SAMSUNG=y
@@ -484,6 +493,7 @@
 CONFIG_PHY_SUN4I_USB=y
 CONFIG_PHY_ROCKCHIP_INNO_USB2=y
 CONFIG_PHY_ROCKCHIP_EMMC=y
+CONFIG_PHY_ROCKCHIP_PCIE=m
 CONFIG_PHY_XGENE=y
 CONFIG_PHY_TEGRA_XUSB=y
 CONFIG_ARM_SCPI_PROTOCOL=y

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 0e99978..59cca1d 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h

@@ -23,9 +23,9 @@
 #define ACPI_MADT_GICC_LENGTH	\
 	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
 
-#define BAD_MADT_GICC_ENTRY(entry, end)						\
-	(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||	\
-	 (entry)->header.length != ACPI_MADT_GICC_LENGTH)
+#define BAD_MADT_GICC_ENTRY(entry, end)					\
+	(!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH ||	\
+	(unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
 
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 15c142c..b4d13d9 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h

@@ -286,6 +286,10 @@
 #define SCTLR_ELx_A	(1 << 1)
 #define SCTLR_ELx_M	1
 
+#define SCTLR_EL2_RES1	((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \
+			 (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
+			 (1 << 28) | (1 << 29))
+
 #define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
 			 SCTLR_ELx_SA | SCTLR_ELx_I)
 

diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 5d17f37..82cd075 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c

@@ -11,7 +11,6 @@
  *
  */
 
-#include <linux/dmi.h>
 #include <linux/efi.h>
 #include <linux/init.h>
 
@@ -117,20 +116,6 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
 				   set_permissions, md);
 }
 
-static int __init arm64_dmi_init(void)
-{
-	/*
-	 * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
-	 * be called early because dmi_id_init(), which is an arch_initcall
-	 * itself, depends on dmi_scan_machine() having been called already.
-	 */
-	dmi_scan_machine();
-	if (dmi_available)
-		dmi_set_dump_stack_arch_desc();
-	return 0;
-}
-core_initcall(arm64_dmi_init);
-
 /*
  * UpdateCapsule() depends on the system being shutdown via
  * ResetSystem().

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 4f0e3eb..c7e3e63 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c

@@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 		return NULL;
 
 	root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
-	if (!root_ops)
+	if (!root_ops) {
+		kfree(ri);
 		return NULL;
+	}
 
 	ri->cfg = pci_acpi_setup_ecam_mapping(root);
 	if (!ri->cfg) {

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6e0e16a..3211198 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c

@@ -961,8 +961,7 @@ void smp_send_stop(void)
 		cpumask_copy(&mask, cpu_online_mask);
 		cpumask_clear_cpu(smp_processor_id(), &mask);
 
-		if (system_state == SYSTEM_BOOTING ||
-		    system_state == SYSTEM_RUNNING)
+		if (system_state <= SYSTEM_RUNNING)
 			pr_crit("SMP: stopping secondary CPUs\n");
 		smp_cross_call(&mask, IPI_CPU_STOP);
 	}

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31..d0cb007 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c

@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
 		/* tkr_mono.cycle_last == tkr_raw.cycle_last */
 		vdso_data->cs_cycle_last	= tk->tkr_mono.cycle_last;
 		vdso_data->raw_time_sec		= tk->raw_time.tv_sec;
-		vdso_data->raw_time_nsec	= tk->raw_time.tv_nsec;
+		vdso_data->raw_time_nsec	= (tk->raw_time.tv_nsec <<
+						   tk->tkr_raw.shift) +
+						  tk->tkr_raw.xtime_nsec;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->tkr_mono.xtime_nsec;
-		/* tkr_raw.xtime_nsec == 0 */
 		vdso_data->cs_mono_mult		= tk->tkr_mono.mult;
 		vdso_data->cs_raw_mult		= tk->tkr_raw.mult;
 		/* tkr_mono.shift == tkr_raw.shift */

diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b467..76320e9 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S

@@ -256,7 +256,6 @@
 	seqcnt_check fail=monotonic_raw
 
 	/* All computations are done with left-shifted nsecs. */
-	lsl	x14, x14, x12
 	get_nsec_per_sec res=x9
 	lsl	x9, x9, x12
 

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 839425c..3f96155 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S

@@ -106,10 +106,13 @@
 	tlbi	alle2
 	dsb	sy
 
-	mrs	x4, sctlr_el2
-	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
-	ldr	x5, =SCTLR_ELx_FLAGS
-	orr	x4, x4, x5
+	/*
+	 * Preserve all the RES1 bits while setting the default flags,
+	 * as well as the EE bit on BE. Drop the A flag since the compiler
+	 * is allowed to generate unaligned accesses.
+	 */
+	ldr	x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
+CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	msr	sctlr_el2, x4
 	isb
 

diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 79f37e3..6260b69 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c

@@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
 		 * The vgic_set_vmcr() will convert to ICH_VMCR layout.
 		 */
-		vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
-		vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
+		vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
+		vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
 		vgic_set_vmcr(vcpu, &vmcr);
 	} else {
 		val = 0;
@@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		 * The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
 		 * Extract it directly using ICC_CTLR_EL1 reg definitions.
 		 */
-		val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
-		val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
+		val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
+		val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
 
 		p->regval = val;
 	}
@@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		p->regval = 0;
 
 	vgic_get_vmcr(vcpu, &vmcr);
-	if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
+	if (!vmcr.cbpr) {
 		if (p->is_write) {
 			vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
 				     ICC_BPR1_EL1_SHIFT;

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 71f9305..c870d6f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c

@@ -36,6 +36,7 @@ int bpf_jit_enable __read_mostly;
 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
 
 /* Map BPF registers to A64 registers */
 static const int bpf2a64[] = {
@@ -57,6 +58,7 @@ static const int bpf2a64[] = {
 	/* temporary registers for internal BPF JIT */
 	[TMP_REG_1] = A64_R(10),
 	[TMP_REG_2] = A64_R(11),
+	[TMP_REG_3] = A64_R(12),
 	/* tail_call_cnt */
 	[TCALL_CNT] = A64_R(26),
 	/* temporary register for blinding constants */
@@ -319,6 +321,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	const u8 src = bpf2a64[insn->src_reg];
 	const u8 tmp = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
+	const u8 tmp3 = bpf2a64[TMP_REG_3];
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const int i = insn - ctx->prog->insnsi;
@@ -689,10 +692,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
 		emit(A64_LDXR(isdw, tmp2, tmp), ctx);
 		emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
-		emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx);
+		emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
 		jmp_offset = -3;
 		check_imm19(jmp_offset);
-		emit(A64_CBNZ(0, tmp2, jmp_offset), ctx);
+		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
 		break;
 
 	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */

diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af9..dbdbb8a 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h

@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)	(tsk->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)							\

diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da..7c87b5b 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h

@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define release_segments(mm)		do { } while (0)
 
 /*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
  * saved kernel SP and DP of a blocked thread.
  */
 #ifdef _BIG_ENDIAN

diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30..a2cdb15 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c

@@ -69,14 +69,6 @@ void hard_reset_now (void)
 	while(1) /* waiting for RETRIBUTION! */ ;
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->irp;
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.

diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8f..fe87b38 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c

@@ -85,14 +85,6 @@ hard_reset_now(void)
 }
 
 /*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return task_pt_regs(t)->erp;
-}
-
-/*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
  * is scheduled.

diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815d..bc2729e 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h

@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /* Free all resources held by a thread. */
 static inline void release_thread(struct task_struct *dead_task)
 {

diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9c..e4d08d7 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h

@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
 #define release_segments(mm)		do { } while (0)
 #define forget_segments()		do { } while (0)
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)

diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h
index a89bdde..139093f 100644
--- a/arch/frv/include/asm/timex.h
+++ b/arch/frv/include/asm/timex.h

@@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
 #define vxtime_lock()		do {} while (0)
 #define vxtime_unlock()		do {} while (0)
 
+/* This attribute is used in include/linux/jiffies.h alongside with
+ * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
+ * for frv does not contain another section specification.
+ */
+#define __jiffy_arch_data	__attribute__((__section__(".data")))
+
 #endif
 

diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92a..a957b37 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c

@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(tsk->thread.pc))
-		return ((unsigned long *)tsk->thread.fp)[2];
-	else
-		return tsk->thread.pc;
-}
-
 int elf_check_arch(const struct elf32_hdr *hdr)
 {
 	unsigned long hsr0 = __get_HSR(0);

diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index da82c25..46aa289 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c

@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(current->mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto success;
 	}
 

diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7..afa5314 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h

@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\

diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5b..d1ddcab 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c

@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;

diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a8254..ce67940 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h

@@ -33,9 +33,6 @@
 /*  task_struct, defined elsewhere, is the "process descriptor" */
 struct task_struct;
 
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /*

diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715ba..656050c 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c

@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
 }
 
 /*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return 0;
-}
-
-/*
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,

diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c
index ec90afd..c599eb1 100644
--- a/arch/hexagon/mm/uaccess.c
+++ b/arch/hexagon/mm/uaccess.c

@@ -37,15 +37,14 @@ __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count)
 	long uncleared;
 
 	while (count > PAGE_SIZE) {
-		uncleared = __copy_to_user_hexagon(dest, &empty_zero_page,
-						PAGE_SIZE);
+		uncleared = raw_copy_to_user(dest, &empty_zero_page, PAGE_SIZE);
 		if (uncleared)
 			return count - (PAGE_SIZE - uncleared);
 		count -= PAGE_SIZE;
 		dest += PAGE_SIZE;
 	}
 	if (count)
-		count = __copy_to_user_hexagon(dest, &empty_zero_page, count);
+		count = raw_copy_to_user(dest, &empty_zero_page, count);
 
 	return count;
 }

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d6..ab982f0 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h

@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-	struct unw_frame_info info;
-	unsigned long ip;
-
-	unw_init_from_blocked_task(&info, t);
-	if (unw_unwind(&info) < 0)
-		return 0;
-	unw_get_ip(&info, &ip);
-	return ip;
-}
-
-/*
  * Get the current instruction/program counter value.
  */
 #define current_text_addr() \

diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367..657874e 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h

@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
 
-extern unsigned long thread_saved_pc(struct task_struct *);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)  do { } while (0)
 #define release_segments(mm)  do { } while (0)

diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfe..8cd7e03 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c

@@ -39,14 +39,6 @@
 
 #include <linux/err.h>
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return tsk->thread.lr;
-}
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 

diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e8..94c3603 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h

@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\

diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c94..7df92f8 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c

@@ -40,20 +40,6 @@
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(sw->retpc))
-		return ((unsigned long *)sw->a6)[1];
-	else
-		return sw->retpc;
-}
-
 void arch_cpu_idle(void)
 {
 #if defined(MACH_ATARI_ONLY)

diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 232a12b..2dbbb7c 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c

@@ -567,8 +567,7 @@ static void stop_this_cpu(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
+	if (system_state <= SYSTEM_RUNNING) {
 		spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();

diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196..330d556 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h

@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 # define KSTK_EIP(tsk)	(0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk)	\
-	((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */

diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817..6527ec2 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c

@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct cpu_context *ctx =
-		&(((struct thread_info *)(tsk->stack))->cpu_context);
-
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(ctx->r15))
-		return (unsigned long)ctx->r15;
-	else
-		return ctx->r14;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */

diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 2728a9a..145b5ce 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile

@@ -128,19 +128,19 @@
 			-DADDR_BITS=$(ADDR_BITS) \
 			-DADDR_CELLS=$(itb_addr_cells)
 
-$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
 
-$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
 
-$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX)  FORCE
 	$(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
 
-$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
 
-$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
 	$(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
 
 quiet_cmd_itb-image = ITB     $@

diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index d34536e..279b6d1 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h

@@ -35,7 +35,12 @@ extern pte_t *pkmap_page_table;
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define LAST_PKMAP 512
+#else
 #define LAST_PKMAP 1024
+#endif
+
 #define LAST_PKMAP_MASK (LAST_PKMAP-1)
 #define PKMAP_NR(virt)	((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr)	(PKMAP_BASE + ((nr) << PAGE_SHIFT))

diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h
index 291846d..ad1a999 100644
--- a/arch/mips/include/asm/kprobes.h
+++ b/arch/mips/include/asm/kprobes.h

@@ -43,7 +43,8 @@ typedef union mips_instruction kprobe_opcode_t;
 
 #define flush_insn_slot(p)						\
 do {									\
-	flush_icache_range((unsigned long)p->addr,			\
+	if (p->addr)							\
+		flush_icache_range((unsigned long)p->addr,		\
 			   (unsigned long)p->addr +			\
 			   (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));	\
 } while (0)

diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 6f94bed..74afe8c 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h

@@ -19,6 +19,10 @@
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
+#ifdef CONFIG_HIGHMEM
+#include <asm/highmem.h>
+#endif
+
 extern int temp_tlb_entry;
 
 /*
@@ -62,7 +66,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
 
 #define VMALLOC_START	  MAP_BASE
 
-#define PKMAP_BASE		(0xfe000000UL)
+#define PKMAP_END	((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1))
+#define PKMAP_BASE	(PKMAP_END - PAGE_SIZE * LAST_PKMAP)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)

diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index b11facd..f702a45 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c

@@ -804,8 +804,10 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
 			break;
 		}
 		/* Compact branch: BNEZC || JIALC */
-		if (insn.i_format.rs)
+		if (!insn.i_format.rs) {
+			/* JIALC: set $31/ra */
 			regs->regs[31] = epc + 4;
+		}
 		regs->cp0_epc += 8;
 		break;
 #endif

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 8d83fc2..38a3029 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S

@@ -11,6 +11,7 @@
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/compiler.h>
+#include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -119,6 +120,7 @@
 	andi	t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
 	beqz	t0, work_notifysig
 work_resched:
+	TRACE_IRQS_OFF
 	jal	schedule
 
 	local_irq_disable		# make sure need_resched and
@@ -155,6 +157,7 @@
 	beqz	t0, work_pending	# trace bit set?
 	local_irq_enable		# could let syscall_trace_leave()
 					# call schedule() instead
+	TRACE_IRQS_ON
 	move	a0, sp
 	jal	syscall_trace_leave
 	b	resume_userspace

diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 30a3b75..9d9b8fba 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c

@@ -38,20 +38,6 @@ void arch_ftrace_update_code(int command)
 
 #endif
 
-/*
- * Check if the address is in kernel space
- *
- * Clone core_kernel_text() from kernel/extable.c, but doesn't call
- * init_kernel_text() for Ftrace doesn't trace functions in init sections.
- */
-static inline int in_kernel_space(unsigned long ip)
-{
-	if (ip >= (unsigned long)_stext &&
-	    ip <= (unsigned long)_etext)
-		return 1;
-	return 0;
-}
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
@@ -198,7 +184,7 @@ int ftrace_make_nop(struct module *mod,
 	 * If ip is in kernel space, no long call, otherwise, long call is
 	 * needed.
 	 */
-	new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+	new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
 #ifdef CONFIG_64BIT
 	return ftrace_modify_code(ip, new);
 #else
@@ -218,12 +204,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	unsigned int new;
 	unsigned long ip = rec->ip;
 
-	new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
+	new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
 
 #ifdef CONFIG_64BIT
 	return ftrace_modify_code(ip, new);
 #else
-	return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ?
+	return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ?
 						INSN_NOP : insn_la_mcount[1]);
 #endif
 }
@@ -289,7 +275,7 @@ unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
 	 * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
 	 * kernel, move after the instruction "move ra, at"(offset is 16)
 	 */
-	ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
+	ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24);
 
 	/*
 	 * search the text until finding the non-store instruction or "s{d,w}
@@ -394,7 +380,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
 	 * entries configured through the tracing/set_graph_function interface.
 	 */
 
-	insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+	insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
 	trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
 
 	/* Only trace if the calling function expects to */

diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index cf05220..d1bb506 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S

@@ -106,8 +106,8 @@
 	beq		t0, t1, dtb_found
 #endif
 	li		t1, -2
-	beq		a0, t1, dtb_found
 	move		t2, a1
+	beq		a0, t1, dtb_found
 
 	li		t2, 0
 dtb_found:

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 313a88b..f3e301f 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c

@@ -1597,7 +1597,6 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 		break;
 	case CPU_P5600:
 	case CPU_P6600:
-	case CPU_I6400:
 		/* 8-bit event numbers */
 		raw_id = config & 0x1ff;
 		base_id = raw_id & 0xff;
@@ -1610,6 +1609,11 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
 		raw_event.range = P;
 #endif
 		break;
+	case CPU_I6400:
+		/* 8-bit event numbers */
+		base_id = config & 0xff;
+		raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
+		break;
 	case CPU_1004K:
 		if (IS_BOTH_COUNTERS_1004K_EVENT(base_id))
 			raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;

diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 5f928c3..d994160 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c

@@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT);
  * state. Actually per-core rather than per-CPU.
  */
 static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
-static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
 
 /* Indicates online CPUs coupled with the current CPU */
 static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
@@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu)
 {
 	enum cps_pm_state state;
 	unsigned core = cpu_data[cpu].core;
-	unsigned dlinesz = cpu_data[cpu].dcache.linesz;
 	void *entry_fn, *core_rc;
 
 	for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
@@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu)
 	}
 
 	if (!per_cpu(ready_count, core)) {
-		core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+		core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
 		if (!core_rc) {
 			pr_err("Failed allocate core %u ready_count\n", core);
 			return -ENOMEM;
 		}
-		per_cpu(ready_count_alloc, core) = core_rc;
-
-		/* Ensure ready_count is aligned to a cacheline boundary */
-		core_rc += dlinesz - 1;
-		core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
 		per_cpu(ready_count, core) = core_rc;
 	}
 

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 918d4c7..5351e1f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c

@@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs, *regs = current_pt_regs();
 	unsigned long childksp;
-	p->set_child_tid = p->clear_child_tid = NULL;
 
 	childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
 

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9681b58..38dfa27 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 {
 	struct pt_regs regs;
 	mm_segment_t old_fs = get_fs();
+
+	regs.cp0_status = KSU_KERNEL;
 	if (sp) {
 		regs.regs[29] = (unsigned long)sp;
 		regs.regs[31] = 0;

diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336d..7cd9216 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c

@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
 			  bool user, bool kernel)
 {
-	int idx_user, idx_kernel;
+	/*
+	 * Initialize idx_user and idx_kernel to workaround bogus
+	 * maybe-initialized warning when using GCC 6.
+	 */
+	int idx_user = 0, idx_kernel = 0;
 	unsigned long flags, old_entryhi;
 
 	local_irq_save(flags);

diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 4a2d03c7..caa62f2 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c

@@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 		return ieee754dp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
 		DPDNORMZ;
-	/* QNAN is handled separately below */
+	/* QNAN and ZERO cases are handled separately below */
 	}
 
 	switch (CLPAIR(xc, yc)) {
@@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 	}
 	assert(rm & (DP_HIDDEN_BIT << 3));
 
+	if (zc == IEEE754_CLASS_ZERO)
+		return ieee754dp_format(rs, re, rm);
+
 	/* And now the addition */
 	assert(zm & DP_HIDDEN_BIT);
 

diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index a8cd8b4..c91d5e5 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c

@@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 		return ieee754sp_nanxcpt(z);
 	case IEEE754_CLASS_DNORM:
 		SPDNORMZ;
-	/* QNAN is handled separately below */
+	/* QNAN and ZERO cases are handled separately below */
 	}
 
 	switch (CLPAIR(xc, yc)) {
@@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 	}
 	assert(rm & (SP_HIDDEN_BIT << 3));
 
+	if (zc == IEEE754_CLASS_ZERO)
+		return ieee754sp_format(rs, re, rm);
+
 	/* And now the addition */
 
 	assert(zm & SP_HIDDEN_BIT);

diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index fe8df14..e08598c 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c

@@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev,
  * systems and only the R10000 and R12000 are used in such systems, the
  * SGI IP28 Indigo² rsp. SGI IP32 aka O2.
  */
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-	return !plat_device_is_coherent(dev) &&
-	       (boot_cpu_type() == CPU_R10000 ||
-		boot_cpu_type() == CPU_R12000 ||
-		boot_cpu_type() == CPU_BMIPS5000);
+	if (plat_device_is_coherent(dev))
+		return false;
+
+	switch (boot_cpu_type()) {
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_BMIPS5000:
+		return true;
+
+	default:
+		/*
+		 * Presence of MAARs suggests that the CPU supports
+		 * speculatively prefetching data, and therefore requires
+		 * the post-DMA flush/invalidate.
+		 */
+		return cpu_has_maar;
+	}
 }
 
 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)

diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 64dd8bd..28adeab 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c

@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index adc6911..b19a3c5 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c

@@ -51,15 +51,15 @@ void __init pagetable_init(void)
 	/*
 	 * Fixed mappings:
 	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-	fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+	fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
 
 #ifdef CONFIG_HIGHMEM
 	/*
 	 * Permanent kmaps:
 	 */
 	vaddr = PKMAP_BASE;
-	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+	fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
 	pgd = swapper_pg_dir + __pgd_offset(vaddr);
 	pud = pud_offset(pgd, vaddr);

diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17ab..3ae4791 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h

@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(task) ((task)->thread.uregs)

diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa426..89e8027 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c

@@ -40,14 +40,6 @@
 #include "internal.h"
 
 /*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
  * power off function, if any
  */
 void (*pm_power_off)(void);

diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d..4944e2e 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h

@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)	((tsk)->thread.kregs->ea)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \

diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c..396d8f3 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h

@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()

diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index f8da545..f9b7700 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c

@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return (unsigned long)user_regs(t->stack)->pc;
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 }
@@ -167,8 +162,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	top_of_kernel_stack = sp;
 
-	p->set_child_tid = p->clear_child_tid = NULL;
-
 	/* Locate userspace context on stack... */
 	sp -= STACK_FRAME_OVERHEAD;	/* redzone */
 	sp -= sizeof(struct pt_regs);

diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a3661ee..4c6694b 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h

@@ -163,12 +163,7 @@ struct thread_struct {
 	.flags		= 0 \
 	}
 
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
 /*

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b..b64d7d2 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c

@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-	return t->thread.regs.kpc;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {

diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index e528863..378a754 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c

@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	unsigned long task_size = TASK_SIZE;
 	int do_color_align, last_mmap;
 	struct vm_unmapped_area_info info;
@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		else
 			addr = PAGE_ALIGN(addr);
 
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 
@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	int do_color_align, last_mmap;
@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
 		else
 			addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			goto found_addr;
 	}
 

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f7c8f997..bf4391d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig

@@ -380,22 +380,6 @@
 
 menu "Kernel options"
 
-config PPC_DT_CPU_FTRS
-	bool "Device-tree based CPU feature discovery & setup"
-	depends on PPC_BOOK3S_64
-	default n
-	help
-	  This enables code to use a new device tree binding for describing CPU
-	  compatibility and features. Saying Y here will attempt to use the new
-	  binding if the firmware provides it. Currently only the skiboot
-	  firmware provides this binding.
-	  If you're not sure say Y.
-
-config PPC_CPUFEATURES_ENABLE_UNKNOWN
-	bool "cpufeatures pass through unknown features to guest/userspace"
-	depends on PPC_DT_CPU_FTRS
-	default y
-
 config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
@@ -1215,11 +1199,6 @@
 
 source "security/Kconfig"
 
-config KEYS_COMPAT
-	bool
-	depends on COMPAT && KEYS
-	default y
-
 source "crypto/Kconfig"
 
 config PPC_LIB_RHEAP

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b4b5e6b..0c4e470 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h

@@ -8,7 +8,7 @@
 #define H_PTE_INDEX_SIZE  9
 #define H_PMD_INDEX_SIZE  7
 #define H_PUD_INDEX_SIZE  9
-#define H_PGD_INDEX_SIZE  12
+#define H_PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index f2c562a..0151af6 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h

@@ -104,7 +104,7 @@
 		"1:	"PPC_TLNEI"	%4,0\n"			\
 		_EMIT_BUG_ENTRY					\
 		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_TAINT(TAINT_WARN)),		\
+		  "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
 		  "i" (sizeof(struct bug_entry)),		\
 		  "r" (__ret_warn_on));				\
 	}							\

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index c2d5095..d02ad93 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h

@@ -214,7 +214,6 @@ enum {
 #define CPU_FTR_DAWR			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_DABRX			LONG_ASM_CONST(0x0800000000000000)
 #define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_SUBCORE			LONG_ASM_CONST(0x2000000000000000)
 #define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x4000000000000000)
 
 #ifndef __ASSEMBLY__
@@ -463,7 +462,7 @@ enum {
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
+	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
 #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \

diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f..8814a72 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h

@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 			   struct kprobe_ctlblk *kcb);

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a2123f2..1189d04 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h

@@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_128TB (0x0000800000000000UL)
 #define TASK_SIZE_512TB (0x0002000000000000UL)
 
-#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64	TASK_SIZE_512TB
+#define TASK_SIZE_USER64		TASK_SIZE_512TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_128TB
 #else
-#define TASK_SIZE_USER64	TASK_SIZE_64TB
+#define TASK_SIZE_USER64		TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_64TB
 #endif
 
 /*
@@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
  * space during mmap's.
  */
 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
 
 #define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@@ -143,21 +148,15 @@ void release_thread(struct task_struct *);
  * with 128TB and conditionally enable upto 512TB
  */
 #ifdef CONFIG_PPC_BOOK3S_64
-#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ? \
-				 TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ?			\
+				 TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
 #else
 #define DEFAULT_MAP_WINDOW	TASK_SIZE
 #endif
 
 #ifdef __powerpc64__
 
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Limit stack to 128TB */
-#define STACK_TOP_USER64 TASK_SIZE_128TB
-#else
-#define STACK_TOP_USER64 TASK_SIZE_USER64
-#endif
-
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
 #define STACK_TOP (is_32bit_task() ? \
@@ -379,12 +378,6 @@ struct thread_struct {
 }
 #endif
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.regs)
 
 unsigned long get_wchan(struct task_struct *p);

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8b3b46b..3297715 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h

@@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
 extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 
+static inline int early_cpu_to_node(int cpu)
+{
+	int nid;
+
+	nid = numa_cpu_lookup_table[cpu];
+
+	/*
+	 * Fall back to node 0 if nid is unset (it should be, except bugs).
+	 * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+	 */
+	return (nid < 0) ? 0 : nid;
+}
 #else
 
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
 static inline void dump_numa_cpu_topology(void) {}
 
 static inline int sysfs_add_device_to_node(struct device *dev, int nid)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8..41e88d3 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h

@@ -267,13 +267,7 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index c8a822a..c23ff43 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h

@@ -94,11 +94,13 @@ struct xive_q {
  * store at 0 and some ESBs support doing a trigger via a
  * separate trigger page.
  */
-#define XIVE_ESB_GET		0x800
-#define XIVE_ESB_SET_PQ_00	0xc00
-#define XIVE_ESB_SET_PQ_01	0xd00
-#define XIVE_ESB_SET_PQ_10	0xe00
-#define XIVE_ESB_SET_PQ_11	0xf00
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
 
 #define XIVE_ESB_VAL_P		0x2
 #define XIVE_ESB_VAL_Q		0x1

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 3e7ce86..4d87714 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h

@@ -46,6 +46,8 @@
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
 #define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
 #define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
+#define PPC_FEATURE2_DARN		0x00200000 /* darn random number insn */
+#define PPC_FEATURE2_SCV		0x00100000 /* scv syscall */
 
 /*
  * IMPORTANT!

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b3e88b..6f84983 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c

@@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void);
 #define COMMON_USER_POWER9	COMMON_USER_POWER8
 #define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
 				 PPC_FEATURE2_ARCH_3_00 | \
-				 PPC_FEATURE2_HAS_IEEE128)
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN )
 
 #ifdef CONFIG_PPC_BOOK3E_64
 #define COMMON_USER_BOOKE	(COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index fcc7588..4c7656d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c

@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/jump_label.h>
+#include <linux/libfdt.h>
 #include <linux/memblock.h>
 #include <linux/printk.h>
 #include <linux/sched.h>
@@ -642,7 +643,6 @@ static struct dt_cpu_feature_match __initdata
 	{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
-	{"subcore", feat_enable, CPU_FTR_SUBCORE},
 	{"no-execute", feat_enable, 0},
 	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
 	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
@@ -671,12 +671,24 @@ static struct dt_cpu_feature_match __initdata
 	{"wait-v3", feat_enable, 0},
 };
 
-/* XXX: how to configure this? Default + boot time? */
-#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
-#define CPU_FEATURE_ENABLE_UNKNOWN 1
-#else
-#define CPU_FEATURE_ENABLE_UNKNOWN 0
-#endif
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+	if (!str)
+		return 0;
+
+	if (!strcmp(str, "off"))
+		using_dt_cpu_ftrs = false;
+	else if (!strcmp(str, "known"))
+		enable_unknown = false;
+	else
+		return 1;
+
+	return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
 
 static void __init cpufeatures_setup_start(u32 isa)
 {
@@ -707,7 +719,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
 		}
 	}
 
-	if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
+	if (!known && enable_unknown) {
 		if (!feat_try_enable_unknown(f)) {
 			pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
 				f->name);
@@ -756,6 +768,26 @@ static void __init cpufeatures_setup_finished(void)
 		cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
 }
 
+static int __init disabled_on_cmdline(void)
+{
+	unsigned long root, chosen;
+	const char *p;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return false;
+
+	p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+	if (!p)
+		return false;
+
+	if (strstr(p, "dt_cpu_ftrs=off"))
+		return true;
+
+	return false;
+}
+
 static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
 					int depth, void *data)
 {
@@ -766,8 +798,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
 	return 0;
 }
 
-static bool __initdata using_dt_cpu_ftrs = false;
-
 bool __init dt_cpu_ftrs_in_use(void)
 {
 	return using_dt_cpu_ftrs;
@@ -775,6 +805,8 @@ bool __init dt_cpu_ftrs_in_use(void)
 
 bool __init dt_cpu_ftrs_init(void *fdt)
 {
+	using_dt_cpu_ftrs = false;
+
 	/* Setup and verify the FDT, if it fails we just bail */
 	if (!early_init_dt_verify(fdt))
 		return false;
@@ -782,6 +814,9 @@ bool __init dt_cpu_ftrs_init(void *fdt)
 	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
 		return false;
 
+	if (disabled_on_cmdline())
+		return false;
+
 	cpufeatures_setup_cpu();
 
 	using_dt_cpu_ftrs = true;
@@ -1027,5 +1062,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 
 void __init dt_cpu_ftrs_scan(void)
 {
+	if (!using_dt_cpu_ftrs)
+		return;
+
 	of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
 }

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b8..b886795 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -1411,10 +1411,8 @@
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa410		/* weird error? */
+	andis.	r0,r4,0xa450		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	andis.  r0,r4,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@
 
 	/* Error */
 	blt-	13f
+
+	/* Reload DSISR into r4 for the DABR check below */
+	ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:	ld	r4,_DAR(r1)
+11:	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc43435..01addfb 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c

@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+	struct kprobe *p = kprobe_running();
+	return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
 	return  (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
+
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	 * saved regs...
 	 */
 	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
 	preempt_enable_no_resched();
 	return 1;
 }

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index baae104..2ad725e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c

@@ -1666,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
+	current->thread.load_fp = 0;
 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
 	current->thread.fp_save_area = NULL;
 #ifdef CONFIG_ALTIVEC
@@ -1674,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.vr_save_area = NULL;
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
+	current->thread.load_vec = 0;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_SPE
 	memset(current->thread.evr, 0, sizeof(current->thread.evr));
@@ -1685,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.tm_tfhar = 0;
 	current->thread.tm_texasr = 0;
 	current->thread.tm_tfiar = 0;
+	current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 }
 EXPORT_SYMBOL(start_thread);

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 40c4887..f830562 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c

@@ -161,7 +161,9 @@ static struct ibm_pa_feature {
 	{ .pabyte = 0,  .pabit = 3, .cpu_features  = CPU_FTR_CTRL },
 	{ .pabyte = 0,  .pabit = 6, .cpu_features  = CPU_FTR_NOEXECUTE },
 	{ .pabyte = 1,  .pabit = 2, .mmu_features  = MMU_FTR_CI_LARGE_PAGE },
+#ifdef CONFIG_PPC_RADIX_MMU
 	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX },
+#endif
 	{ .pabyte = 1,  .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
 	{ .pabyte = 5,  .pabit = 0, .cpu_features  = CPU_FTR_REAL_LE,
 				    .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 71dcda9..857129a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c

@@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_PPC_MM_SLICES
 #ifdef CONFIG_PPC64
-	init_mm.context.addr_limit = TASK_SIZE_128TB;
+	init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 #else
 #error	"context.addr_limit not initialized."
 #endif

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f35ff9d..4640f6d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c

@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
 #endif
 
 /*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+	ti->task = NULL;
+	ti->cpu = cpu;
+	ti->preempt_count = 0;
+	ti->local_flags = 0;
+	ti->flags = 0;
+	klp_init_thread_info(ti);
+}
+
+/*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
  * stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
 	 * Since we use these as temporary stacks during secondary CPU
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
 	 */
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
@@ -661,7 +686,7 @@ void __init emergency_stack_init(void)
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
+	return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
 				    __pa(MAX_DMA_ADDRESS));
 }
 
@@ -672,7 +697,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-	if (cpu_to_node(from) == cpu_to_node(to))
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
 		return LOCAL_DISTANCE;
 	else
 		return REMOTE_DISTANCE;

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index df2a416..1069f74 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -97,7 +97,7 @@ int smp_generic_cpu_bootable(unsigned int nr)
 	/* Special case - we inhibit secondary thread startup
 	 * during boot if the user requests it.
 	 */
-	if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
 		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
 			return 0;
 		if (smt_enabled_at_boot

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a9..c98e90b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S

@@ -45,10 +45,14 @@
 	stdu	r1,-SWITCH_FRAME_SIZE(r1)
 
 	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(0,r1)
-	SAVE_8GPRS(8,r1)
-	SAVE_8GPRS(16,r1)
-	SAVE_8GPRS(24,r1)
+	SAVE_GPR(0, r1)
+	SAVE_10GPRS(2, r1)
+	SAVE_10GPRS(12, r1)
+	SAVE_10GPRS(22, r1)
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE
+	std	r8, GPR1(r1)
 
 	/* Load special regs for save below */
 	mfmsr   r8
@@ -95,18 +99,44 @@
 	bl	ftrace_stub
 	nop
 
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
+	/* Load the possibly modified NIP */
+	ld	r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-	cmpd	r14,r3		/* has NIP been altered? */
+	cmpd	r14, r15	/* has NIP been altered? */
 #endif
 
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+	/* NIP has not been altered, skip over further checks */
+	beq	1f
+
+	/* Check if there is an active kprobe on us */
+	subi	r3, r14, 4
+	bl	is_current_kprobe_addr
+	nop
+
+	/*
+	 * If r3 == 1, then this is a kprobe/jprobe.
+	 * else, this is livepatched function.
+	 *
+	 * The conditional branch for livepatch_handler below will use the
+	 * result of this comparison. For kprobe/jprobe, we just need to branch to
+	 * the new NIP, not call livepatch_handler. The branch below is bne, so we
+	 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+	 * CR0[EQ] = (r3 == 1).
+	 */
+	cmpdi	r3, 1
+1:
+#endif
+
+	/* Load CTR with the possibly modified NIP */
+	mtctr	r15
+
 	/* Restore gprs */
-	REST_8GPRS(0,r1)
-	REST_8GPRS(8,r1)
-	REST_8GPRS(16,r1)
-	REST_8GPRS(24,r1)
+	REST_GPR(0,r1)
+	REST_10GPRS(2,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
 
 	/* Restore possibly modified LR */
 	ld	r0, _LINK(r1)
@@ -119,7 +149,10 @@
 	addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+	 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+	 * not on a kprobe/jprobe, then handle livepatch.
+	 */
 	bne-	livepatch_handler
 #endif
 

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4f..8d1a365 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c

@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
+		/*
+		 * POWER9 DD1 has an erratum where writing TBU40 causes
+		 * the timebase to lose ticks.  So we don't let the
+		 * timebase offset be changed on P9 DD1.  (It is
+		 * initialized to zero.)
+		 */
+		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			break;
 		/* round up to multiple of 2^24 */
 		vcpu->arch.vcore->tb_offset =
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 	int srcu_idx;
+	unsigned long ebb_regs[3] = {};	/* shut up GCC */
+	unsigned long user_tar = 0;
+	unsigned int user_vrsave;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return -EINVAL;
 	}
 
+	/*
+	 * Don't allow entry with a suspended transaction, because
+	 * the guest entry/exit code will lose it.
+	 * If the guest has TM enabled, save away their TM-related SPRs
+	 * (they will get restored by the TM unavailable interrupt).
+	 */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+	    (current->thread.regs->msr & MSR_TM)) {
+		if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			return -EINVAL;
+		}
+		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+		current->thread.regs->msr &= ~MSR_TM;
+	}
+#endif
+
 	kvmppc_core_prepare_to_enter(vcpu);
 
 	/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	flush_all_to_thread(current);
 
+	/* Save userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		ebb_regs[0] = mfspr(SPRN_EBBHR);
+		ebb_regs[1] = mfspr(SPRN_EBBRR);
+		ebb_regs[2] = mfspr(SPRN_BESCR);
+		user_tar = mfspr(SPRN_TAR);
+	}
+	user_vrsave = mfspr(SPRN_VRSAVE);
+
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 	} while (is_kvmppc_resume_guest(r));
 
+	/* Restore userspace EBB and other register values */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_EBBHR, ebb_regs[0]);
+		mtspr(SPRN_EBBRR, ebb_regs[1]);
+		mtspr(SPRN_BESCR, ebb_regs[2]);
+		mtspr(SPRN_TAR, user_tar);
+		mtspr(SPRN_FSCR, current->thread.fscr);
+	}
+	mtspr(SPRN_VRSAVE, user_vrsave);
+
  out:
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&vcpu->kvm->arch.vcpus_running);

diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a2..404deb5 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S

@@ -121,10 +121,20 @@
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
 	 */
+BEGIN_FTR_SECTION
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_KVM(r5)
+	ld	r9, KVM_HOST_LPCR(r6)
+	andis.	r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mfspr	r8,SPRN_DEC
 	mftb	r7
-	mtspr	SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+	bne	32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r8,r8
+32:	mtspr	SPRN_HDEC,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76..4888dd4 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -32,12 +32,29 @@
 #include <asm/opal.h>
 #include <asm/xive-regs.h>
 
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)			\
+BEGIN_FTR_SECTION;				\
+	extsw	reg, reg;			\
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
 
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS			144
+#define STACK_SLOT_TRAP		(SFS-4)
+#define STACK_SLOT_TID		(SFS-16)
+#define STACK_SLOT_PSSCR	(SFS-24)
+#define STACK_SLOT_PID		(SFS-32)
+#define STACK_SLOT_IAMR		(SFS-40)
+#define STACK_SLOT_CIABR	(SFS-48)
+#define STACK_SLOT_DAWR		(SFS-56)
+#define STACK_SLOT_DAWRX	(SFS-64)
+
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
+	/* HDEC value came from DEC in the first place, it will fit */
 	mfspr	r3, SPRN_HDEC
 	mtspr	SPRN_DEC, r3
 	/*
@@ -295,8 +314,9 @@
 
 	/* See if our timeslice has expired (HDEC is negative) */
 	mfspr	r0, SPRN_HDEC
+	EXTEND_HDEC(r0)
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpwi	r0, 0
+	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
 
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@
 	bl	kvmhv_accumulate_time
 #endif
 13:	mr	r3, r12
-	stw	r12, 112-4(r1)
+	stw	r12, STACK_SLOT_TRAP(r1)
 	bl	kvmhv_commence_exit
 	nop
-	lwz	r12, 112-4(r1)
+	lwz	r12, STACK_SLOT_TRAP(r1)
 	b	kvmhv_switch_to_host
 
 /*
@@ -390,8 +410,8 @@
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	lis	r6, 0x7fff
-	ori	r6, r6, 0xffff
+	LOAD_REG_ADDR(r6, decrementer_max)
+	ld	r6, 0(r6)
 	mtspr	SPRN_HDEC, r6
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@
  *                                                                            *
  *****************************************************************************/
 
-/* Stack frame offsets */
-#define STACK_SLOT_TID		(112-16)
-#define STACK_SLOT_PSSCR	(112-24)
-#define STACK_SLOT_PID		(112-32)
-
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -565,7 +580,7 @@
 	 */
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -112(r1)
+	stdu	r1, -SFS(r1)
 
 	/* Save R1 in the PACA */
 	std	r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@
 	mfspr	r5, SPRN_TIDR
 	mfspr	r6, SPRN_PSSCR
 	mfspr	r7, SPRN_PID
+	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_TID(r1)
 	std	r6, STACK_SLOT_PSSCR(r1)
 	std	r7, STACK_SLOT_PID(r1)
+	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_CIABR
+	mfspr	r6, SPRN_DAWR
+	mfspr	r7, SPRN_DAWRX
+	std	r5, STACK_SLOT_CIABR(r1)
+	std	r6, STACK_SLOT_DAWR(r1)
+	std	r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -968,7 +993,8 @@
 
 	/* Check if HDEC expires soon */
 	mfspr	r3, SPRN_HDEC
-	cmpwi	r3, 512		/* 1 microsecond */
+	EXTEND_HDEC(r3)
+	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
 #ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@
 	 * set by the guest could disrupt the host.
 	 */
 	li	r0, 0
-	mtspr	SPRN_IAMR, r0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
 BEGIN_FTR_SECTION
+	mtspr	SPRN_IAMR, r0
 	mtspr	SPRN_TCSCR, r0
 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
 	li	r0, 1
@@ -1525,6 +1550,7 @@
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
+	mtspr	SPRN_UAMOR, r6
 
 	/* Switch DSCR back to host value */
 	mfspr	r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@
 
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_CIABR(r1)
+	ld	r6, STACK_SLOT_DAWR(r1)
+	ld	r7, STACK_SLOT_DAWRX(r1)
+	mtspr	SPRN_CIABR, r5
+	mtspr	SPRN_DAWR, r6
+	mtspr	SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_TID(r1)
 	ld	r6, STACK_SLOT_PSSCR(r1)
 	ld	r7, STACK_SLOT_PID(r1)
+	ld	r8, STACK_SLOT_IAMR(r1)
 	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
 	mtspr	SPRN_PID, r7
+	mtspr	SPRN_IAMR, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
 
-	ld	r0, 112+PPC_LR_STKOFF(r1)
-	addi	r1, r1, 112
+	ld	r0, SFS+PPC_LR_STKOFF(r1)
+	addi	r1, r1, SFS
 	mtlr	r0
 	blr
 
@@ -2366,12 +2402,13 @@
 	mfspr	r3, SPRN_DEC
 	mfspr	r4, SPRN_HDEC
 	mftb	r5
-	cmpw	r3, r4
+	extsw	r3, r3
+	EXTEND_HDEC(r4)
+	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
 67:
 	/* save expiry time of guest decrementer */
-	extsw	r3, r3
 	add	r3, r3, r5
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	ld	r5, HSTATE_KVM_VCORE(r13)

diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 023a311..4636ca6 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c

@@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		__x_writeq(0, __x_eoi_page(xd));
+		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		opal_int_eoi(hw_irq);
 	} else {
@@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 		 * properly.
 		 */
 		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			__x_readq(__x_eoi_page(xd));
+			__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
 		else {
 			eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
 

diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 6575b9a..a12e863 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c

@@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	/*

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9dbd2a73..0ee6be4 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c

@@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index c6dca2a..a3edf81 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c

@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * mm->context.addr_limit. Default to max task size so that we copy the
 	 * default values to paca which will help us to handle slb miss early.
 	 */
-	mm->context.addr_limit = TASK_SIZE_128TB;
+	mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 
 	/*
 	 * The old code would re-promote on fork, we don't do that when using

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 966b9fc..45f6740 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c

@@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
 	if ((mm->task_size - len) < addr)
 		return 0;
 	vma = find_vma(mm, addr);
-	return (!vma || (addr + len) <= vma->vm_start);
+	return (!vma || (addr + len) <= vm_start_gap(vma));
 }
 
 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)

diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fd..09ceea6 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c

@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs_user_copy)
 {
 	regs_user->regs = task_pt_regs(current);
-	regs_user->abi  = perf_reg_abi(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
 }

diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 018f8e9..bb28e1a 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c

@@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= ISA207_TEST_ADDER,
+	.test_adder		= P9_DD1_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
@@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= P9_DD1_TEST_ADDER,
+	.test_adder		= ISA207_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 33244e3..4fd64d3 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig

@@ -59,6 +59,17 @@
 
 	  In case of doubt, say Y
 
+config PPC_DT_CPU_FTRS
+	bool "Device-tree based CPU feature discovery & setup"
+	depends on PPC_BOOK3S_64
+	default y
+	help
+	  This enables code to use a new device tree binding for describing CPU
+	  compatibility and features. Saying Y here will attempt to use the new
+	  binding if the firmware provides it. Currently only the skiboot
+	  firmware provides this binding.
+	  If you're not sure say Y.
+
 config UDBG_RTAS_CONSOLE
 	bool "RTAS based debug console"
 	depends on PPC_RTAS

diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 96c2b8a..0c45cdb 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c

@@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 	    (REGION_ID(ea) != USER_REGION_ID)) {
 
 		spin_unlock(&spu->register_lock);
-		ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
+		ret = hash_page(ea,
+				_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+				0x300, dsisr);
 		spin_lock(&spu->register_lock);
 
 		if (!ret) {

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index e5a891a..84b7ac9 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c

@@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
 	if (!dump_skip(cprm, skip))
 		goto Eio;
+
+	rc = 0;
 out:
 	free_page((unsigned long)buf);
 	return rc;

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 067defe..b5d960d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c

@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
 	if (WARN_ON(!gpdev))
 		return NULL;
 
-	if (WARN_ON(!gpdev->dev.of_node))
+	/* Not all PCI devices have device-tree nodes */
+	if (!gpdev->dev.of_node)
 		return NULL;
 
 	/* Get assoicated PCI device */
@@ -448,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
 	return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -464,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	/* Invalidating the entire process doesn't use a va */
 	return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid)
+			unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -485,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
+static void mmio_invalidate_wait(
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+	struct npu *npu;
+	int i, reg;
+
+	/* Wait for all invalidations to complete */
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* Wait for completion */
+		npu = mmio_atsd_reg[i].npu;
+		reg = mmio_atsd_reg[i].reg;
+		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+			cpu_relax();
+
+		put_mmio_atsd_reg(npu, reg);
+
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		if (flush)
+			mmio_invalidate_pid(npu, 0, true);
+	}
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-			unsigned long address)
+			unsigned long address, bool flush)
 {
-	int i, j, reg;
+	int i, j;
 	struct npu *npu;
 	struct pnv_phb *nphb;
 	struct pci_dev *npdev;
-	struct {
-		struct npu *npu;
-		int reg;
-	} mmio_atsd_reg[NV_MAX_NPUS];
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
@@ -524,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
 			if (va)
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid);
+					mmio_invalidate_va(npu, address, pid,
+							flush);
 			else
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid);
+					mmio_invalidate_pid(npu, pid, flush);
 
 			/*
 			 * The NPU hardware forwards the shootdown to all GPUs
@@ -543,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 */
 	flush_tlb_mm(npu_context->mm);
 
-	/* Wait for all invalidations to complete */
-	for (i = 0; i <= max_npu2_index; i++) {
-		if (mmio_atsd_reg[i].reg < 0)
-			continue;
-
-		/* Wait for completion */
-		npu = mmio_atsd_reg[i].npu;
-		reg = mmio_atsd_reg[i].reg;
-		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-			cpu_relax();
-		put_mmio_atsd_reg(npu, reg);
-	}
+	mmio_invalidate_wait(mmio_atsd_reg, flush);
+	if (flush)
+		/* Wait for the flush to complete */
+		mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -570,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 	 * There should be no more translation requests for this PID, but we
 	 * need to ensure any entries for it are removed from the TLB.
 	 */
-	mmio_invalidate(npu_context, 0, 0);
+	mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -580,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -589,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -599,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address <= end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address);
+	for (address = start; address < end; address += PAGE_SIZE)
+		mmio_invalidate(npu_context, 1, address, false);
+
+	/* Do the flush only on the final addess == end */
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -650,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
-	if (!mm) {
-		/* kernel thread contexts are not supported */
+	if (!mm || mm->context.id == 0) {
+		/*
+		 * Kernel thread contexts are not supported and context id 0 is
+		 * reserved on the GPU.
+		 */
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -714,7 +751,7 @@ static void pnv_npu2_release_context(struct kref *kref)
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
 			struct pci_dev *gpdev)
 {
-	struct pnv_phb *nphb, *phb;
+	struct pnv_phb *nphb;
 	struct npu *npu;
 	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
 	struct device_node *nvlink_dn;
@@ -728,13 +765,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
 
 	nphb = pci_bus_to_host(npdev->bus)->private_data;
 	npu = &nphb->npu;
-	phb = pci_bus_to_host(gpdev->bus)->private_data;
 	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
 	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
 							&nvlink_index)))
 		return;
 	npu_context->npdev[npu->index][nvlink_index] = NULL;
-	opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id,
+	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
 	kref_put(&npu_context->kref, pnv_npu2_release_context);
 }

diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 0babef1..8c61192 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c

@@ -407,7 +407,13 @@ static DEVICE_ATTR(subcores_per_core, 0644,
 
 static int subcore_init(void)
 {
-	if (!cpu_has_feature(CPU_FTR_SUBCORE))
+	unsigned pvr_ver;
+
+	pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+	if (pvr_ver != PVR_POWER8 &&
+	    pvr_ver != PVR_POWER8E &&
+	    pvr_ver != PVR_POWER8NVL)
 		return 0;
 
 	/*

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e104c71..1fb162b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c

@@ -124,6 +124,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
 		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
 		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
 	}
 
@@ -147,6 +148,7 @@ static void dlpar_update_drconf_property(struct device_node *dn,
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
 		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
 		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
 	}
 

diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ef470b4..6afddae2 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c

@@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc);
+	struct u8_gpio_chip *u8_gc =
+		container_of(mm_gc, struct u8_gpio_chip, mm_gc);
 
 	u8_gc->data = in_8(mm_gc->regs);
 }

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 9138250..8f5e303 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c

@@ -297,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		out_be64(xd->eoi_mmio, 0);
+		out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		/*
 		 * The FW told us to call it. This happens for some

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e161faf..6967add 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig

@@ -363,9 +363,6 @@
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
-config KEYS_COMPAT
-	def_bool y if COMPAT && KEYS
-
 config SMP
 	def_bool y
 	prompt "Symmetric multi-processing support"

diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index a5039fa..2820722 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig

@@ -30,6 +30,7 @@
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -90,6 +94,8 @@
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -359,6 +365,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -367,16 +374,19 @@
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -442,6 +452,8 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -452,7 +464,6 @@
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -471,6 +482,7 @@
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -487,12 +499,18 @@
 CONFIG_XFS_RT=y
 CONFIG_XFS_DEBUG=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
@@ -558,6 +576,7 @@
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA_TEST=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
 CONFIG_DEBUG_OBJECTS_FREE=y
@@ -580,7 +599,6 @@
 CONFIG_WQ_WATCHDOG=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
 CONFIG_PROVE_LOCKING=y
@@ -595,6 +613,7 @@
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
 CONFIG_FAIL_PAGE_ALLOC=y
@@ -616,13 +635,12 @@
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_LKDTM=m
 CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_SORT=y
 CONFIG_KPROBES_SANITY_TEST=y
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_STRING_HELPERS=y
-CONFIG_TEST_KSTRTOX=y
 CONFIG_DMA_API_DEBUG=y
 CONFIG_TEST_BPF=m
 CONFIG_BUG_ON_DATA_CORRUPTION=y
@@ -630,6 +648,7 @@
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -640,7 +659,9 @@
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
@@ -648,6 +669,7 @@
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -657,8 +679,10 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -674,6 +698,7 @@
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -685,6 +710,7 @@
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
@@ -692,6 +718,7 @@
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
 CONFIG_CORDIC=m
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y

diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 83970b5..3c6b781 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig

@@ -31,6 +31,7 @@
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -46,7 +47,10 @@
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -88,6 +92,8 @@
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -356,6 +362,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -364,16 +371,18 @@
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -439,6 +448,8 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -449,7 +460,6 @@
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -468,6 +478,7 @@
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -483,11 +494,15 @@
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -553,7 +568,6 @@
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -576,6 +590,7 @@
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -599,6 +614,7 @@
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -611,6 +627,7 @@
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -626,16 +643,19 @@
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m

diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index fbc6542..653d72b 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig

@@ -31,6 +31,7 @@
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -86,6 +90,8 @@
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -354,6 +360,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -362,16 +369,18 @@
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -437,6 +446,8 @@
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -447,7 +458,6 @@
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -466,6 +476,7 @@
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -481,11 +492,15 @@
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -551,7 +566,6 @@
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -574,6 +588,7 @@
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -597,6 +612,7 @@
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -609,6 +625,7 @@
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -624,6 +641,7 @@
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -635,6 +653,7 @@
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m

diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index e23d97c1..afa46a7 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig

@@ -12,8 +12,10 @@
 CONFIG_NR_CPUS=2
 # CONFIG_HOTPLUG_CPU is not set
 CONFIG_HZ_100=y
+# CONFIG_ARCH_RANDOM is not set
 # CONFIG_COMPACTION is not set
 # CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
 # CONFIG_CHECK_STACK is not set
 # CONFIG_CHSC_SCH is not set
 # CONFIG_SCM_BUS is not set
@@ -36,11 +38,11 @@
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_ZFCP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
 CONFIG_RAW_DRIVER=y
 # CONFIG_SCLP_ASYNC is not set
 # CONFIG_HMC_DRV is not set
@@ -54,9 +56,9 @@
 # CONFIG_INOTIFY_USER is not set
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set

diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 97189db..20244a3 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig

@@ -28,6 +28,7 @@
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -108,7 +109,6 @@
 CONFIG_SCSI_VIRTIO=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_MD_MULTIPATH=m
 CONFIG_BLK_DEV_DM=y
 CONFIG_DM_CRYPT=m
@@ -131,6 +131,7 @@
 CONFIG_VIRTIO_NET=y
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 CONFIG_DEVKMEM=y
@@ -162,7 +163,6 @@
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
@@ -172,14 +172,12 @@
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
@@ -190,7 +188,6 @@
 CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
@@ -230,6 +227,7 @@
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 6702630..144809a 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h

@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/blkdev.h>
 
 struct arqb {
 	u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
 	int (*probe) (struct scm_device *scmdev);
 	int (*remove) (struct scm_device *scmdev);
 	void (*notify) (struct scm_device *scmdev, enum scm_event event);
-	void (*handler) (struct scm_device *scmdev, void *data, int error);
+	void (*handler) (struct scm_device *scmdev, void *data,
+			blk_status_t error);
 };
 
 int scm_driver_register(struct scm_driver *scmdrv);
 void scm_driver_unregister(struct scm_driver *scmdrv);
 
 int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
 
 #endif /* _ASM_S390_EADM_H */

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 426614a..65d07ac 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h

@@ -541,7 +541,6 @@ struct kvm_s390_float_interrupt {
 	struct mutex ais_lock;
 	u8 simm;
 	u8 nimm;
-	int ais_enabled;
 };
 
 struct kvm_hw_wp_info_arch {

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395f..aeac013 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h

@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
 /* Free guarded storage control block for current */
 void exit_thread_gs(void);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)

diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index e784bed..2b498e5 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h

@@ -109,7 +109,7 @@ struct sysinfo_2_2_2 {
 	unsigned short cpus_shared;
 	char reserved_4[3];
 	unsigned char vsne;
-	uuid_be uuid;
+	uuid_t uuid;
 	char reserved_5[160];
 	char ext_name[256];
 };
@@ -134,7 +134,7 @@ struct sysinfo_3_2_2 {
 		char reserved_1[3];
 		unsigned char evmne;
 		unsigned int reserved_2;
-		uuid_be uuid;
+		uuid_t uuid;
 	} vm[8];
 	char reserved_3[1504];
 	char ext_names[8][256];

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index e408d9c..6315037 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S

@@ -231,12 +231,17 @@
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 .Lsie_done:
 # some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
 # See also .Lcleanup_sie
-.Lrewind_pad:
-	nop	0
+.Lrewind_pad6:
+	nopr	7
+.Lrewind_pad4:
+	nopr	7
+.Lrewind_pad2:
+	nopr	7
 	.globl sie_exit
 sie_exit:
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
@@ -249,7 +254,9 @@
 	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
 	j	sie_exit
 
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
 	EX_TABLE(sie_exit,.Lsie_fault)
 EXPORT_SYMBOL(sie64a)
 EXPORT_SYMBOL(sie_exit)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe..8e622bb5 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c

@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		if (MACHINE_IS_LPAR)
-			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-		else
-			diag308(DIAG308_LOAD_CLEAR, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d715..bb32b86 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c

@@ -41,31 +41,6 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct stack_frame *sf, *low, *high;
-
-	if (!tsk || !task_stack_page(tsk))
-		return 0;
-	low = task_stack_page(tsk);
-	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) tsk->thread.ksp;
-	if (sf <= low || sf > high)
-		return 0;
-	sf = (struct stack_frame *) sf->back_chain;
-	if (sf <= low || sf > high)
-		return 0;
-	return sf->gprs[8];
-}
-
 extern void kernel_thread_starter(void);
 
 /*

diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index eefcb54..fb869b10 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c

@@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl,
 
 static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
 {
-	if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+	if (uuid_is_null(&info->vm[i].uuid))
 		return;
 	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
 }

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d..3b297fa 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c

@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 	ptr = asce.origin * 4096;
 	if (asce.r) {
 		*fake = 1;
+		ptr = 0;
 		asce.dt = ASCE_TYPE_REGION1;
 	}
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
-		if (vaddr.rfx01 > asce.tl && !asce.r)
+		if (vaddr.rfx01 > asce.tl && !*fake)
 			return PGM_REGION_FIRST_TRANS;
 		break;
 	case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region1_table_entry rfte;
 
 		if (*fake) {
-			/* offset in 16EB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+			ptr += (unsigned long) vaddr.rfx << 53;
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
@@ -1036,8 +1036,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region2_table_entry rste;
 
 		if (*fake) {
-			/* offset in 8PB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+			ptr += (unsigned long) vaddr.rsx << 42;
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
@@ -1064,8 +1063,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union region3_table_entry rtte;
 
 		if (*fake) {
-			/* offset in 4TB guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+			ptr += (unsigned long) vaddr.rtx << 31;
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
@@ -1101,8 +1099,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		union segment_table_entry ste;
 
 		if (*fake) {
-			/* offset in 2G guest memory block */
-			ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+			ptr += (unsigned long) vaddr.sx << 20;
 			ste.val = ptr;
 			goto shadow_pgt;
 		}

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index caf15c8..2d120fe 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c

@@ -2160,7 +2160,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
 	struct kvm_s390_ais_req req;
 	int ret = 0;
 
-	if (!fi->ais_enabled)
+	if (!test_kvm_facility(kvm, 72))
 		return -ENOTSUPP;
 
 	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
@@ -2204,7 +2204,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
 	};
 	int ret = 0;
 
-	if (!fi->ais_enabled || !adapter->suppressible)
+	if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
 		return kvm_s390_inject_vm(kvm, &s390int);
 
 	mutex_lock(&fi->ais_lock);

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 689ac48..f28e2e7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c

@@ -558,7 +558,6 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		} else {
 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
 			set_kvm_facility(kvm->arch.model.fac_list, 72);
-			kvm->arch.float_int.ais_enabled = 1;
 			r = 0;
 		}
 		mutex_unlock(&kvm->lock);
@@ -1533,7 +1532,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	mutex_init(&kvm->arch.float_int.ais_lock);
 	kvm->arch.float_int.simm = 0;
 	kvm->arch.float_int.nimm = 0;
-	kvm->arch.float_int.ais_enabled = 0;
 	spin_lock_init(&kvm->arch.float_int.lock);
 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);

diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b017dae..b854b1d 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c

@@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 
@@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			goto check_asce_limit;
 	}
 

diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d..2992745 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h

@@ -13,7 +13,6 @@ struct task_struct;
  */
 extern void (*cpu_wait)(void);
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
 extern unsigned long get_wchan(struct task_struct *p);

diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a..6e20241 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c

@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
 	return 1;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_pt_regs(tsk)->cp0_epc;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	if (!task || task == current || task->state == TASK_RUNNING)

diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 08e7af0..6a1a129 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c

@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 58243b0..5639c9f 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig

@@ -192,9 +192,9 @@
 	int "Maximum number of CPUs"
 	depends on SMP
 	range 2 32 if SPARC32
-	range 2 1024 if SPARC64
+	range 2 4096 if SPARC64
 	default 32 if SPARC32
-	default 64 if SPARC64
+	default 4096 if SPARC64
 
 source kernel/Kconfig.hz
 
@@ -295,9 +295,13 @@
 	depends on SPARC64 && SMP
 
 config NODES_SHIFT
-	int
-	default "4"
+	int "Maximum NUMA Nodes (as a power of 2)"
+	range 4 5 if SPARC64
+	default "5"
 	depends on NEED_MULTIPLE_NODES
+	help
+	  Specify the maximum number of NUMA Nodes available on the target
+	  system.  Increases memory reserved to accommodate various tables.
 
 # Some NUMA nodes have memory ranges that span
 # other nodes.  Even though a pfn is valid and
@@ -573,9 +577,6 @@
 	depends on COMPAT && SYSVIPC
 	default y
 
-config KEYS_COMPAT
-	def_bool y if COMPAT && KEYS
-
 endmenu
 
 source "net/Kconfig"

diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index f7de0db..83b36a5 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h

@@ -52,7 +52,7 @@
 #define CTX_NR_MASK		TAG_CONTEXT_BITS
 #define CTX_HW_MASK		(CTX_NR_MASK | CTX_PGSZ_MASK)
 
-#define CTX_FIRST_VERSION	((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_FIRST_VERSION	BIT(CTX_VERSION_SHIFT)
 #define CTX_VALID(__ctx)	\
 	 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
 #define CTX_HWBITS(__ctx)	((__ctx.sparc64_ctx_val) & CTX_HW_MASK)

diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index 22fede6..2cddcda 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h

@@ -19,13 +19,8 @@ extern spinlock_t ctx_alloc_lock;
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
-#ifdef CONFIG_SMP
-void smp_new_mmu_context_version(void);
-#else
-#define smp_new_mmu_context_version() do { } while (0)
-#endif
-
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
@@ -76,8 +71,9 @@ void __flush_tlb_mm(unsigned long, unsigned long);
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
 {
 	unsigned long ctx_valid, flags;
-	int cpu;
+	int cpu = smp_processor_id();
 
+	per_cpu(per_cpu_secondary_mm, cpu) = mm;
 	if (unlikely(mm == &init_mm))
 		return;
 
@@ -123,7 +119,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 	 * for the first time, we must flush that context out of the
 	 * local TLB.
 	 */
-	cpu = smp_processor_id();
 	if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 		cpumask_set_cpu(cpu, mm_cpumask(mm));
 		__flush_tlb_mm(CTX_HWBITS(mm->context),
@@ -133,26 +128,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
-
-/* Activate a new MM instance for the current task. */
-static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
-{
-	unsigned long flags;
-	int cpu;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-	if (!CTX_VALID(mm->context))
-		get_new_mmu_context(mm);
-	cpu = smp_processor_id();
-	if (!cpumask_test_cpu(cpu, mm_cpumask(mm)))
-		cpumask_set_cpu(cpu, mm_cpumask(mm));
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-	tsb_context_switch(mm);
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-}
-
+#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC64_MMU_CONTEXT_H) */

diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h
index 26693703..522b43d 100644
--- a/arch/sparc/include/asm/pil.h
+++ b/arch/sparc/include/asm/pil.h

@@ -20,7 +20,6 @@
 #define PIL_SMP_CALL_FUNC	1
 #define PIL_SMP_RECEIVE_SIGNAL	2
 #define PIL_SMP_CAPTURE		3
-#define PIL_SMP_CTX_NEW_VERSION	4
 #define PIL_DEVICE_IRQ		5
 #define PIL_SMP_CALL_FUNC_SNGL	6
 #define PIL_DEFERRED_PCR_WORK	7

diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159..b395e56 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h

@@ -67,9 +67,6 @@ struct thread_struct {
 	.current_ds = KERNEL_DS, \
 }
 
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
 /* Do necessary setup to start up a newly executed thread. */
 static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 				    unsigned long sp)

diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee90..f04dc5a 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h

@@ -89,9 +89,7 @@ struct thread_struct {
 #include <linux/types.h>
 #include <asm/fpumacro.h>
 
-/* Return saved PC of a blocked thread. */
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
 
 /* On Uniprocessor, even in RMO processes see TSO semantics */
 #ifdef CONFIG_SMP

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 8174f6c..9dca7a8 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h

@@ -327,6 +327,7 @@ struct vio_dev {
 	int			compat_len;
 
 	u64			dev_no;
+	u64			id;
 
 	unsigned long		channel_id;
 

diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index b542cc7..f87265a 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c

@@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp)
 		pbuf.req.handle = cp->handle;
 		pbuf.req.major = 1;
 		pbuf.req.minor = 0;
-		strcpy(pbuf.req.svc_id, cp->service_id);
+		strcpy(pbuf.id_buf, cp->service_id);
 
 		err = __ds_send(lp, &pbuf, msg_len);
 		if (err > 0)

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 4d0248a..99dd133 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c

@@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 {
 #ifdef CONFIG_SMP
 	unsigned long page;
+	void *mondo, *p;
 
-	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
+	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
+
+	/* Make sure mondo block is 64byte aligned */
+	p = kzalloc(127, GFP_KERNEL);
+	if (!p) {
+		prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
+		prom_halt();
+	}
+	mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
+	tb->cpu_mondo_block_pa = __pa(mondo);
 
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page) {
-		prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+		prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
 		prom_halt();
 	}
 
-	tb->cpu_mondo_block_pa = __pa(page);
-	tb->cpu_list_pa = __pa(page + 64);
+	tb->cpu_list_pa = __pa(page);
 #endif
 }
 

diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index c9804551..6ae1e77 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h

@@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 /* smp_64.c */
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
 

diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e..9245f93 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c

@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 }
 
 /*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return task_thread_info(tsk)->kpc;
-}
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(struct task_struct *tsk)

diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc49..b96104d 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c

@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
 
 #endif
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct thread_info *ti = task_thread_info(tsk);
-	unsigned long ret = 0xdeadbeefUL;
-	
-	if (ti && ti->ksp) {
-		unsigned long *sp;
-		sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-		if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-		    sp[14]) {
-			unsigned long *fp;
-			fp = (unsigned long *)(sp[14] + STACK_BIAS);
-			if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-				ret = fp[15];
-		}
-	}
-	return ret;
-}
-
 /* Free current thread data structures etc.. */
 void exit_thread(struct task_struct *tsk)
 {

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b3bc0ac..fdf3104 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c

@@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 	preempt_enable();
 }
 
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
-{
-	struct mm_struct *mm;
-	unsigned long flags;
-
-	clear_softint(1 << irq);
-
-	/* See if we need to allocate a new TLB context because
-	 * the version of the one we are using is now out of date.
-	 */
-	mm = current->active_mm;
-	if (unlikely(!mm || (mm == &init_mm)))
-		return;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-
-	if (unlikely(!CTX_VALID(mm->context)))
-		get_new_mmu_context(mm);
-
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context),
-		       SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
-	smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
-}
-
 #ifdef CONFIG_KGDB
 void kgdb_roundup_cpus(unsigned long flags)
 {

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ef4520e..043544d 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c

@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 10689cf..07c0df9 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S

@@ -455,13 +455,16 @@
 	.type	copy_tsb,#function
 copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
 			 * %o2=new_tsb_base, %o3=new_tsb_size
+			 * %o4=page_size_shift
 			 */
 	sethi		%uhi(TSB_PASS_BITS), %g7
 	srlx		%o3, 4, %o3
-	add		%o0, %o1, %g1	/* end of old tsb */
+	add		%o0, %o1, %o1	/* end of old tsb */
 	sllx		%g7, 32, %g7
 	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */
 
+	mov		%o4, %g1	/* page_size_shift */
+
 661:	prefetcha	[%o0] ASI_N, #one_read
 	.section	.tsb_phys_patch, "ax"
 	.word		661b
@@ -486,9 +489,9 @@
 	/* This can definitely be computed faster... */
 	srlx		%o0, 4, %o5	/* Build index */
 	and		%o5, 511, %o5	/* Mask index */
-	sllx		%o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
+	sllx		%o5, %g1, %o5	/* Put into vaddr position */
 	or		%o4, %o5, %o4	/* Full VADDR. */
-	srlx		%o4, PAGE_SHIFT, %o4 /* Shift down to create index */
+	srlx		%o4, %g1, %o4	/* Shift down to create index */
 	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
 	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
 	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
@@ -496,7 +499,7 @@
 	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */
 
 80:	add		%o0, 16, %o0
-	cmp		%o0, %g1
+	cmp		%o0, %o1
 	bne,pt		%xcc, 90b
 	 nop
 

diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
index 7bd8f65..efe93ab 100644
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S

@@ -50,7 +50,7 @@
 tl0_irq1:	TRAP_IRQ(smp_call_function_client, 1)
 tl0_irq2:	TRAP_IRQ(smp_receive_signal_client, 2)
 tl0_irq3:	TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:	TRAP_IRQ(smp_new_mmu_context_version_client, 4)
+tl0_irq4:       BTRAP(0x44)
 #else
 tl0_irq1:	BTRAP(0x41)
 tl0_irq2:	BTRAP(0x42)

diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
index f6bb857..075d389 100644
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c

@@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
 	if (!id) {
 		dev_set_name(&vdev->dev, "%s", bus_id_name);
 		vdev->dev_no = ~(u64)0;
+		vdev->id = ~(u64)0;
 	} else if (!cfg_handle) {
 		dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id);
 		vdev->dev_no = *id;
+		vdev->id = ~(u64)0;
 	} else {
 		dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name,
 			     *cfg_handle, *id);
 		vdev->dev_no = *cfg_handle;
+		vdev->id = *id;
 	}
 
 	vdev->dev.parent = parent;
@@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node)
 	(void) vio_create_one(hp, node, &root_vdev->dev);
 }
 
+struct vio_md_node_query {
+	const char *type;
+	u64 dev_no;
+	u64 id;
+};
+
 static int vio_md_node_match(struct device *dev, void *arg)
 {
+	struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
 	struct vio_dev *vdev = to_vio_dev(dev);
 
-	if (vdev->mp == (u64) arg)
-		return 1;
+	if (vdev->dev_no != query->dev_no)
+		return 0;
+	if (vdev->id != query->id)
+		return 0;
+	if (strcmp(vdev->type, query->type))
+		return 0;
 
-	return 0;
+	return 1;
 }
 
 static void vio_remove(struct mdesc_handle *hp, u64 node)
 {
+	const char *type;
+	const u64 *id, *cfg_handle;
+	u64 a;
+	struct vio_md_node_query query;
 	struct device *dev;
 
-	dev = device_find_child(&root_vdev->dev, (void *) node,
+	type = mdesc_get_property(hp, node, "device-type", NULL);
+	if (!type) {
+		type = mdesc_get_property(hp, node, "name", NULL);
+		if (!type)
+			type = mdesc_node_name(hp, node);
+	}
+
+	query.type = type;
+
+	id = mdesc_get_property(hp, node, "id", NULL);
+	cfg_handle = NULL;
+	mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 target;
+
+		target = mdesc_arc_target(hp, a);
+		cfg_handle = mdesc_get_property(hp, target,
+						"cfg-handle", NULL);
+		if (cfg_handle)
+			break;
+	}
+
+	if (!id) {
+		query.dev_no = ~(u64)0;
+		query.id = ~(u64)0;
+	} else if (!cfg_handle) {
+		query.dev_no = *id;
+		query.id = ~(u64)0;
+	} else {
+		query.dev_no = *cfg_handle;
+		query.id = *id;
+	}
+
+	dev = device_find_child(&root_vdev->dev, &query,
 				vio_md_node_match);
 	if (dev) {
 		printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
 
 		device_unregister(dev);
 		put_device(dev);
+	} else {
+		if (!id)
+			printk(KERN_ERR "VIO: Removed unknown %s node.\n",
+			       type);
+		else if (!cfg_handle)
+			printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
+			       type, *id);
+		else
+			printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n",
+			       type, *cfg_handle, *id);
 	}
 }
 

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 69912d2..07c03e7 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile

@@ -15,6 +15,7 @@
 lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
+lib-$(CONFIG_SPARC64) += multi3.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o

diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
new file mode 100644
index 0000000..d6b6c97
--- /dev/null
+++ b/arch/sparc/lib/multi3.S

@@ -0,0 +1,35 @@
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.align	4
+ENTRY(__multi3) /* %o0 = u, %o1 = v */
+	mov	%o1, %g1
+	srl	%o3, 0, %g4
+	mulx	%g4, %g1, %o1
+	srlx	%g1, 0x20, %g3
+	mulx	%g3, %g4, %g5
+	sllx	%g5, 0x20, %o5
+	srl	%g1, 0, %g4
+	sub	%o1, %o5, %o5
+	srlx	%o5, 0x20, %o5
+	addcc	%g5, %o5, %g5
+	srlx	%o3, 0x20, %o5
+	mulx	%g4, %o5, %g4
+	mulx	%g3, %o5, %o5
+	sethi	%hi(0x80000000), %g3
+	addcc	%g5, %g4, %g5
+	srlx	%g5, 0x20, %g5
+	add	%g3, %g3, %g3
+	movcc	%xcc, %g0, %g3
+	addcc	%o5, %g5, %o5
+	sllx	%g4, 0x20, %g4
+	add	%o1, %g4, %o1
+	add	%o5, %g3, %g2
+	mulx	%g1, %o2, %g1
+	add	%g1, %g2, %g1
+	mulx	%o0, %o3, %o0
+	retl
+	 add	%g1, %o0, %o0
+ENDPROC(__multi3)
+EXPORT_SYMBOL(__multi3)

diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 7c29d38..88855e3 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c

@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0cda653..3c40ebd 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c

@@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string)
 	}
 
 	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
-		pr_warn("hugepagesz=%llu not supported by MMU.\n",
+		hugetlb_bad_size();
+		pr_err("hugepagesz=%llu not supported by MMU.\n",
 			hugepage_size);
 		goto out;
 	}
@@ -706,10 +707,58 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
 #define MAX_CTX_NR	(1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
+
+static void mmu_context_wrap(void)
+{
+	unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+	unsigned long new_ver, new_ctx, old_ctx;
+	struct mm_struct *mm;
+	int cpu;
+
+	bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+	/* Reserve kernel context */
+	set_bit(0, mmu_context_bmap);
+
+	new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+	if (unlikely(new_ver == 0))
+		new_ver = CTX_FIRST_VERSION;
+	tlb_context_cache = new_ver;
+
+	/*
+	 * Make sure that any new mm that are added into per_cpu_secondary_mm,
+	 * are going to go through get_new_mmu_context() path.
+	 */
+	mb();
+
+	/*
+	 * Updated versions to current on those CPUs that had valid secondary
+	 * contexts
+	 */
+	for_each_online_cpu(cpu) {
+		/*
+		 * If a new mm is stored after we took this mm from the array,
+		 * it will go into get_new_mmu_context() path, because we
+		 * already bumped the version in tlb_context_cache.
+		 */
+		mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+		if (unlikely(!mm || mm == &init_mm))
+			continue;
+
+		old_ctx = mm->context.sparc64_ctx_val;
+		if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+			new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+			set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+			mm->context.sparc64_ctx_val = new_ctx;
+		}
+	}
+}
 
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.
@@ -725,48 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm)
 {
 	unsigned long ctx, new_ctx;
 	unsigned long orig_pgsz_bits;
-	int new_version;
 
 	spin_lock(&ctx_alloc_lock);
+retry:
+	/* wrap might have happened, test again if our context became valid */
+	if (unlikely(CTX_VALID(mm->context)))
+		goto out;
 	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
 	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
 	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
-	new_version = 0;
 	if (new_ctx >= (1 << CTX_NR_BITS)) {
 		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
 		if (new_ctx >= ctx) {
-			int i;
-			new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
-				CTX_FIRST_VERSION;
-			if (new_ctx == 1)
-				new_ctx = CTX_FIRST_VERSION;
-
-			/* Don't call memset, for 16 entries that's just
-			 * plain silly...
-			 */
-			mmu_context_bmap[0] = 3;
-			mmu_context_bmap[1] = 0;
-			mmu_context_bmap[2] = 0;
-			mmu_context_bmap[3] = 0;
-			for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
-				mmu_context_bmap[i + 0] = 0;
-				mmu_context_bmap[i + 1] = 0;
-				mmu_context_bmap[i + 2] = 0;
-				mmu_context_bmap[i + 3] = 0;
-			}
-			new_version = 1;
-			goto out;
+			mmu_context_wrap();
+			goto retry;
 		}
 	}
+	if (mm->context.sparc64_ctx_val)
+		cpumask_clear(mm_cpumask(mm));
 	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
 	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
-out:
 	tlb_context_cache = new_ctx;
 	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
+out:
 	spin_unlock(&ctx_alloc_lock);
-
-	if (unlikely(new_version))
-		smp_new_mmu_context_version();
 }
 
 static int numa_enabled = 1;

diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index bedf08b..0d4b998 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c

@@ -496,7 +496,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 		extern void copy_tsb(unsigned long old_tsb_base,
 				     unsigned long old_tsb_size,
 				     unsigned long new_tsb_base,
-				     unsigned long new_tsb_size);
+				     unsigned long new_tsb_size,
+				     unsigned long page_size_shift);
 		unsigned long old_tsb_base = (unsigned long) old_tsb;
 		unsigned long new_tsb_base = (unsigned long) new_tsb;
 
@@ -504,7 +505,9 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
 			old_tsb_base = __pa(old_tsb_base);
 			new_tsb_base = __pa(new_tsb_base);
 		}
-		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
+		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
+			tsb_index == MM_TSB_BASE ?
+			PAGE_SHIFT : REAL_HPAGE_SHIFT);
 	}
 
 	mm->context.tsb_block[tsb_index].tsb = new_tsb;

diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index 5d2fd6c..fcf4d27 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S

@@ -971,11 +971,6 @@
 	wr		%g0, (1 << PIL_SMP_CAPTURE), %set_softint
 	retry
 
-	.globl		xcall_new_mmu_context_version
-xcall_new_mmu_context_version:
-	wr		%g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
-	retry
-
 #ifdef CONFIG_KGDB
 	.globl		xcall_kgdb_capture
 xcall_kgdb_capture:

diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968..f71e520 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h

@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
 
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* Return initial ksp value for given task. */

diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 1a70e6c..94709ab 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S

@@ -24,8 +24,7 @@
  * has an opportunity to return -EFAULT to the user if needed.
  * The 64-bit routines just return a "long long" with the value,
  * since they are only used from kernel space and don't expect to fault.
- * Support for 16-bit ops is included in the framework but we don't provide
- * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ * Support for 16-bit ops is included in the framework but we don't provide any.
  *
  * Note that the caller is advised to issue a suitable L1 or L2
  * prefetch on the address being manipulated to avoid extra stalls.

diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index cb10153..03e5cc4 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c

@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (current->mm->get_unmapped_area == arch_get_unmapped_area)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8541027..b55fe9b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c

@@ -534,7 +534,7 @@ static void ubd_handler(void)
 		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 			blk_end_request(
 				(*irq_req_buffer)[count]->req,
-				0,
+				BLK_STS_OK,
 				(*irq_req_buffer)[count]->length
 			);
 			kfree((*irq_req_buffer)[count]);

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd..f6d1a3f 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h

@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 static inline void mm_copy_segments(struct mm_struct *from_mm,
 				    struct mm_struct *new_mm)
 {

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd0..7b56401 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c

@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
 	__attribute__((__section__(".data..init_irqstack"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return os_process_pc(userspace_pid[0]);
-}
-
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd18994..0efb4c9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -360,7 +360,7 @@
 	  Management" code will be disabled if you say Y here.
 
 	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
-	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+	  <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
 	  <http://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
@@ -2776,10 +2776,6 @@
 config SYSVIPC_COMPAT
 	def_bool y
 	depends on SYSVIPC
-
-config KEYS_COMPAT
-	def_bool y
-	depends on KEYS
 endif
 
 endmenu

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5851411..bf240b9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile

@@ -159,7 +159,7 @@
 	# If '-Os' is enabled, disable it and print a warning.
         ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
           undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
-	  $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
+          $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE.  Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
         endif
 
     endif

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 44163e8..2c860ad 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile

@@ -94,7 +94,7 @@
 quiet_cmd_check_data_rel = DATAREL $@
 define cmd_check_data_rel
 	for obj in $(filter %.o,$^); do \
-		readelf -S $$obj | grep -qF .rel.local && { \
+		${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
 			echo "error: $$obj has data relocations!" >&2; \
 			exit 1; \
 		} || true; \

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 54c24f0..56a7e92 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c

@@ -564,9 +564,6 @@ void choose_random_location(unsigned long input,
 {
 	unsigned long random_addr, min_addr;
 
-	/* By default, keep output position unchanged. */
-	*virt_addr = *output;
-
 	if (cmdline_find_option_bool("nokaslr")) {
 		warn("KASLR disabled: 'nokaslr' on cmdline.");
 		return;

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f0..00241c8 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c

@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 				  unsigned long output_len)
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
-	unsigned long virt_addr = (unsigned long)output;
+	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifdef CONFIG_X86_64
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
+	if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+		error("Destination virtual address is beyond the kernel mapping area");
 #else
 	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
@@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifndef CONFIG_RELOCATABLE
 	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
 		error("Destination address does not match LOAD_PHYSICAL_ADDR");
-	if ((unsigned long)output != virt_addr)
+	if (virt_addr != LOAD_PHYSICAL_ADDR)
 		error("Destination virtual address changed when not relocatable");
 #endif
 

diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355e..766a521 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h

@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
 					  unsigned long output_size,
 					  unsigned long *virt_addr)
 {
-	/* No change from existing output location. */
-	*virt_addr = *output;
 }
 #endif
 

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 34b3fa2..9e32d40 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile

@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)

diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
index 2f87563..2e14acc 100644
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile

@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)

diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
index 41089e7..45b4fca 100644
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile

@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 50bc269..48ef7bb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S

@@ -252,6 +252,23 @@
 END(__switch_to_asm)
 
 /*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack.  This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+	FRAME_BEGIN
+
+	pushl	%eax
+	call	schedule_tail
+	popl	%eax
+
+	FRAME_END
+	ret
+ENDPROC(schedule_tail_wrapper)
+/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -259,24 +276,15 @@
  * edi: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN		/* help unwinder find end of stack */
-
-	/*
-	 * schedule_tail() is asmlinkage so we have to put its 'prev' argument
-	 * on the stack.
-	 */
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
+	call	schedule_tail_wrapper
 
 	testl	%ebx, %ebx
 	jnz	1f		/* kernel threads are uncommon */
 
 2:
 	/* When we fork, we trace the syscall return in the child, too. */
-	leal	FRAME_OFFSET(%esp), %eax
+	movl    %esp, %eax
 	call    syscall_return_slowpath
-	FRAME_END
 	jmp     restore_all
 
 	/* kernel thread */

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 607d72c..4a4c083 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S

@@ -36,7 +36,6 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
-#include <asm/frame.h>
 #include <linux/err.h>
 
 .code64
@@ -406,19 +405,17 @@
  * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	FRAME_BEGIN			/* help unwinder find end of stack */
 	movq	%rax, %rdi
-	call	schedule_tail		/* rdi: 'prev' task parameter */
+	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	testq	%rbx, %rbx		/* from kernel_thread? */
-	jnz	1f			/* kernel threads are uncommon */
+	testq	%rbx, %rbx			/* from kernel_thread? */
+	jnz	1f				/* kernel threads are uncommon */
 
 2:
-	leaq	FRAME_OFFSET(%rsp),%rdi	/* pt_regs pointer */
+	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
-	FRAME_END
 	jmp	restore_regs_and_iret
 
 1:

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 580b60f..628b8c5 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c

@@ -1750,6 +1750,8 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
 	return ret;
 }
 
+static struct attribute_group x86_pmu_attr_group;
+
 static int __init init_hw_perf_events(void)
 {
 	struct x86_pmu_quirk *quirk;
@@ -1813,6 +1815,14 @@ static int __init init_hw_perf_events(void)
 			x86_pmu_events_group.attrs = tmp;
 	}
 
+	if (x86_pmu.attrs) {
+		struct attribute **tmp;
+
+		tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
+		if (!WARN_ON(!tmp))
+			x86_pmu_attr_group.attrs = tmp;
+	}
+
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
 	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
@@ -2255,7 +2265,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event *event,
 			       struct perf_event_mmap_page *userpg, u64 now)
 {
-	struct cyc2ns_data *data;
+	struct cyc2ns_data data;
 	u64 offset;
 
 	userpg->cap_user_time = 0;
@@ -2267,17 +2277,17 @@ void arch_perf_update_userpage(struct perf_event *event,
 	if (!using_native_sched_clock() || !sched_clock_stable())
 		return;
 
-	data = cyc2ns_read_begin();
+	cyc2ns_read_begin(&data);
 
-	offset = data->cyc2ns_offset + __sched_clock_offset;
+	offset = data.cyc2ns_offset + __sched_clock_offset;
 
 	/*
 	 * Internal timekeeping for enabled/running/stopped times
 	 * is always in the local_clock domain.
 	 */
 	userpg->cap_user_time = 1;
-	userpg->time_mult = data->cyc2ns_mul;
-	userpg->time_shift = data->cyc2ns_shift;
+	userpg->time_mult = data.cyc2ns_mul;
+	userpg->time_shift = data.cyc2ns_shift;
 	userpg->time_offset = offset - now;
 
 	/*
@@ -2289,7 +2299,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 		userpg->time_zero = offset;
 	}
 
-	cyc2ns_read_end(data);
+	cyc2ns_read_end();
 }
 
 void

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4..31acf2a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c

@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
  [ C(DTLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x81d0,	/* MEM_INST_RETIRED.ALL_LOADS */
-		[ C(RESULT_MISS)   ] = 0x608,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x82d0,	/* MEM_INST_RETIRED.ALL_STORES */
-		[ C(RESULT_MISS)   ] = 0x649,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		[ C(RESULT_MISS)   ] = 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0,
@@ -3160,6 +3160,19 @@ static int intel_pmu_cpu_prepare(int cpu)
 	return -ENOMEM;
 }
 
+static void flip_smm_bit(void *data)
+{
+	unsigned long set = *(unsigned long *)data;
+
+	if (set > 0) {
+		msr_set_bit(MSR_IA32_DEBUGCTLMSR,
+			    DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+	} else {
+		msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
+			      DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+	}
+}
+
 static void intel_pmu_cpu_starting(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -3174,6 +3187,8 @@ static void intel_pmu_cpu_starting(int cpu)
 
 	cpuc->lbr_sel = NULL;
 
+	flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+
 	if (!cpuc->shared_regs)
 		return;
 
@@ -3595,6 +3610,52 @@ static struct attribute *hsw_events_attrs[] = {
 	NULL
 };
 
+static ssize_t freeze_on_smi_show(struct device *cdev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
+}
+
+static DEFINE_MUTEX(freeze_on_smi_mutex);
+
+static ssize_t freeze_on_smi_store(struct device *cdev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned long val;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	mutex_lock(&freeze_on_smi_mutex);
+
+	if (x86_pmu.attr_freeze_on_smi == val)
+		goto done;
+
+	x86_pmu.attr_freeze_on_smi = val;
+
+	get_online_cpus();
+	on_each_cpu(flip_smm_bit, &val, 1);
+	put_online_cpus();
+done:
+	mutex_unlock(&freeze_on_smi_mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(freeze_on_smi);
+
+static struct attribute *intel_pmu_attrs[] = {
+	&dev_attr_freeze_on_smi.attr,
+	NULL,
+};
+
 __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
@@ -3641,6 +3702,8 @@ __init int intel_pmu_init(void)
 
 	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
 
+
+	x86_pmu.attrs			= intel_pmu_attrs;
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events, when not running in a hypervisor:

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f924629..eb26165 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c

@@ -18,7 +18,7 @@ enum {
 	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
 };
 
-static enum {
+static const enum {
 	LBR_EIP_FLAGS		= 1,
 	LBR_TSX			= 2,
 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
@@ -287,7 +287,7 @@ inline u64 lbr_from_signext_quirk_wr(u64 val)
 /*
  * If quirk is needed, ensure sign extension is 61 bits:
  */
-u64 lbr_from_signext_quirk_rd(u64 val)
+static u64 lbr_from_signext_quirk_rd(u64 val)
 {
 	if (static_branch_unlikely(&lbr_from_quirk_key)) {
 		/*

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 758c1aa..44ec523 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c

@@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
 		pmu = type->pmus;
 		for (i = 0; i < type->num_boxes; i++, pmu++) {
 			box = pmu->boxes[pkg];
-			if (!box && atomic_inc_return(&box->refcnt) == 1)
+			if (box && atomic_inc_return(&box->refcnt) == 1)
 				uncore_box_init(box);
 		}
 	}

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index be3d362..53728ee 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h

@@ -562,6 +562,9 @@ struct x86_pmu {
 	ssize_t		(*events_sysfs_show)(char *page, u64 config);
 	struct attribute **cpu_events;
 
+	unsigned long	attr_freeze_on_smi;
+	struct attribute **attrs;
+
 	/*
 	 * CPU Hotplug hooks
 	 */

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798..33380b8 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h

@@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-/**
- * atomic_inc_short - increment of a short integer
- * @v: pointer to type int
- *
- * Atomically adds 1 to @v
- * Returns the new value of @u
- */
-static __always_inline short int atomic_inc_short(short int *v)
-{
-	asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
-	return *v;
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else

diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index b8ad261..c66d19e 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h

@@ -29,6 +29,7 @@ struct pt_regs;
 	} while (0)
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_bug(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0559626..722d0e5 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h

@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
 	bool perm_ok; /* do not check permissions if true */
 	bool ud;	/* inject an #UD if host doesn't support insn */
+	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
 	struct x86_exception exception;

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4fd5195..3f9a3d2 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h

@@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
+bool mce_is_memory_error(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba1007..d5acc27 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h

@@ -2,8 +2,7 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
 #include <asm/hyperv.h>
 
 /*

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 673f9ac..18b1623 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h

@@ -137,6 +137,8 @@
 #define DEBUGCTLMSR_BTS_OFF_OS		(1UL <<  9)
 #define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
 #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT	14
+#define DEBUGCTLMSR_FREEZE_IN_SMM	(1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
 
 #define MSR_PEBS_FRONTEND		0x000003f7
 

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada99..a28b671 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h

@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 					       unsigned long new_sp);
 

diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 27e9f9d..2016962 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h

@@ -29,11 +29,9 @@ struct cyc2ns_data {
 	u32 cyc2ns_mul;
 	u32 cyc2ns_shift;
 	u64 cyc2ns_offset;
-	u32 __count;
-	/* u32 hole */
-}; /* 24 bytes -- do not grow */
+}; /* 16 bytes */
 
-extern struct cyc2ns_data *cyc2ns_read_begin(void);
-extern void cyc2ns_read_end(struct cyc2ns_data *);
+extern void cyc2ns_read_begin(struct cyc2ns_data *);
+extern void cyc2ns_read_end(void);
 
 #endif /* _ASM_X86_TIMER_H */

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4b99423..3c7c419 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile

@@ -29,6 +29,7 @@
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
 
 # If instrumentation of this dir is enabled, boot hangs during first second.
 # Probably could be more selective here, but note that files related to irqs,

diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 26b78d8..85a9e17 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile

@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c5b8f76..32e14d1 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c

@@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		memcpy(insnbuf, replacement, a->replacementlen);
 		insnbuf_sz = a->replacementlen;
 
-		/* 0xe8 is a relative jump; fix the offset. */
-		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+		/*
+		 * 0xe8 is a relative jump; fix the offset.
+		 *
+		 * Instruction length is checked before the opcode to avoid
+		 * accessing uninitialized bytes for zero-length replacements.
+		 */
+		if (a->replacementlen == 5 && *insnbuf == 0xe8) {
 			*(s32 *)(insnbuf + 1) += replacement - instr;
 			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
 				*(s32 *)(insnbuf + 1),

diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index a70fd61..6f07744 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c

@@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 		break;
 
 	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+	case 11: /* GX1 with inverted Device ID */
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f5af0cc..9257bd9 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

@@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 	dentry = kernfs_mount(fs_type, flags, rdt_root,
 			      RDTGROUP_SUPER_MAGIC, NULL);
 	if (IS_ERR(dentry))
-		goto out_cdp;
+		goto out_destroy;
 
 	static_branch_enable(&rdt_enable_key);
 	goto out;
 
+out_destroy:
+	kernfs_remove(kn_info);
 out_cdp:
 	cdp_disable();
 out:

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5abd4bf..5cfbaeb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c

@@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m)
 	return 1;
 }
 
-static bool memory_error(struct mce *m)
+bool mce_is_memory_error(struct mce *m)
 {
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	if (c->x86_vendor == X86_VENDOR_AMD) {
+	if (m->cpuvendor == X86_VENDOR_AMD) {
 		/* ErrCodeExt[20:16] */
 		u8 xec = (m->status >> 16) & 0x1f;
 
 		return (xec == 0x0 || xec == 0x8);
-	} else if (c->x86_vendor == X86_VENDOR_INTEL) {
+	} else if (m->cpuvendor == X86_VENDOR_INTEL) {
 		/*
 		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 		 *
@@ -529,6 +527,7 @@ static bool memory_error(struct mce *m)
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
 static bool cec_add_mce(struct mce *m)
 {
@@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m)
 		return false;
 
 	/* We eat only correctable DRAM errors with usable addresses. */
-	if (memory_error(m) &&
+	if (mce_is_memory_error(m) &&
 	    !(m->status & MCI_STATUS_UC) &&
 	    mce_usable_address(m))
 		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
@@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 		severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
 
-		if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
+		if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
 			if (m.status & MCI_STATUS_ADDRV)
 				m.severity = severity;
 

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 45db4d2..e9f4d76 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c

@@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
 
 int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 {
@@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 	if (!desc.mc)
 		return -EINVAL;
 
-	ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
-				 desc.data, desc.size);
+	ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
@@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
 }
 
 static enum ucode_state
-load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
+load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
 {
 	enum ucode_state ret;
 
@@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
 
 #ifdef CONFIG_X86_32
 	/* save BSP's matching patch for early load */
-	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(cpu);
+	if (save) {
+		struct ucode_patch *p = find_patch(0);
 		if (p) {
 			memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
 			memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
@@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
 	enum ucode_state ret = UCODE_NFOUND;
 	const struct firmware *fw;
 
 	/* reload ucode container only on the boot cpu */
-	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+	if (!refresh_fw || !bsp)
 		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
@@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
+	ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
 
  fw_release:
 	release_firmware(fw);

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index afdfd23..f522415 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c

@@ -619,6 +619,9 @@ int __init save_microcode_in_initrd_intel(void)
 
 	show_saved_mc();
 
+	/* initrd is going away, clear patch ptr. */
+	intel_ucode_patch = NULL;
+
 	return 0;
 }
 

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0651e97..9bef1bb 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c

@@ -689,8 +689,12 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return module_alloc(size);
 }
-static inline void tramp_free(void *tramp)
+static inline void tramp_free(void *tramp, int size)
 {
+	int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	set_memory_nx((unsigned long)tramp, npages);
+	set_memory_rw((unsigned long)tramp, npages);
 	module_memfree(tramp);
 }
 #else
@@ -699,7 +703,7 @@ static inline void *alloc_tramp(unsigned long size)
 {
 	return NULL;
 }
-static inline void tramp_free(void *tramp) { }
+static inline void tramp_free(void *tramp, int size) { }
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
@@ -771,7 +775,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* Copy ftrace_caller onto the trampoline memory */
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 	if (WARN_ON(ret < 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -797,7 +801,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	/* Are we pointing to the reference? */
 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) {
-		tramp_free(trampoline);
+		tramp_free(trampoline, *tramp_size);
 		return 0;
 	}
 
@@ -839,7 +843,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	unsigned long offset;
 	unsigned long ip;
 	unsigned int size;
-	int ret;
+	int ret, npages;
 
 	if (ops->trampoline) {
 		/*
@@ -848,11 +852,14 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 		 */
 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 			return;
+		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
+		set_memory_rw(ops->trampoline, npages);
 	} else {
 		ops->trampoline = create_trampoline(ops, &size);
 		if (!ops->trampoline)
 			return;
 		ops->trampoline_size = size;
+		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	}
 
 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
@@ -863,6 +870,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	/* Do a safe modify in case the trampoline is executing */
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	ret = update_ftrace_func(ip, new);
+	set_memory_ro(ops->trampoline, npages);
 
 	/* The update should never fail */
 	WARN_ON(ret);
@@ -939,7 +947,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 		return;
 
-	tramp_free((void *)ops->trampoline);
+	tramp_free((void *)ops->trampoline, ops->trampoline_size);
 	ops->trampoline = 0;
 }
 

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 5b2bbfb..6b87780 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c

@@ -52,6 +52,7 @@
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 #include <linux/kasan.h>
+#include <linux/moduleloader.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -417,6 +418,14 @@ static void prepare_boost(struct kprobe *p, struct insn *insn)
 	}
 }
 
+/* Recover page to RW mode before releasing it */
+void free_insn_page(void *page)
+{
+	set_memory_nx((unsigned long)page & PAGE_MASK, 1);
+	set_memory_rw((unsigned long)page & PAGE_MASK, 1);
+	module_memfree(page);
+}
+
 static int arch_copy_kprobe(struct kprobe *p)
 {
 	struct insn insn;

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 901c640..69ea0bc 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c

@@ -28,6 +28,7 @@
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
+#include <linux/frame.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 }
 
 asm (
+			"optprobe_template_func:\n"
 			".global optprobe_template_entry\n"
 			"optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
@@ -131,7 +133,12 @@ asm (
 			"	popf\n"
 #endif
 			".global optprobe_template_end\n"
-			"optprobe_template_end:\n");
+			"optprobe_template_end:\n"
+			".type optprobe_template_func, @function\n"
+			".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
 
 #define TMPL_MOVE_IDX \
 	((long)&optprobe_template_val - (long)&optprobe_template_entry)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index da5c097..43e10d6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -161,8 +161,8 @@ void kvm_async_pf_task_wait(u32 token)
 			 */
 			rcu_irq_exit();
 			native_safe_halt();
-			rcu_irq_enter();
 			local_irq_disable();
+			rcu_irq_enter();
 		}
 	}
 	if (!n.halted)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb8842..3ca1980 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c

@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct inactive_task_frame *frame =
-		(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-	return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ff40e74..ffeae81 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c

@@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all)
 
 	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
 	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
-		smp_processor_id());
+		raw_smp_processor_id());
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2544700..67393fc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c

@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/tboot.h>
 #include <linux/delay.h>
+#include <linux/frame.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type)
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
 
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0b4d3c6..f818236 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	x86_configure_nx();
 
-	simple_udelay_calibration();
-
 	parse_early_param();
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	init_hypervisor_platform();
 
+	simple_udelay_calibration();
+
 	x86_init.resources.probe_roms();
 
 	/* after parse_early_param, so could debug it */

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f04479a..045e4f9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int apicid)
 	if (cpu == 1)
 		printk(KERN_INFO "x86: Booting SMP configuration:\n");
 
-	if (system_state == SYSTEM_BOOTING) {
+	if (system_state < SYSTEM_RUNNING) {
 		if (node != current_node) {
 			if (current_node > (-1))
 				pr_cont("\n");

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 207b8f2..213ddf3 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c

@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (end - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 
@@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 4b17240..a4eb279 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c

@@ -514,7 +514,7 @@ int tboot_force_iommu(void)
 	if (!tboot_enabled())
 		return 0;
 
-	if (!intel_iommu_tboot_noforce)
+	if (intel_iommu_tboot_noforce)
 		return 1;
 
 	if (no_iommu || swiotlb || dmar_disabled)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3995d3a..bf54309 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -182,7 +182,7 @@ int is_valid_bugaddr(unsigned long addr)
 	return ud == INSN_UD0 || ud == INSN_UD2;
 }
 
-static int fixup_bug(struct pt_regs *regs, int trapnr)
+int fixup_bug(struct pt_regs *regs, int trapnr)
 {
 	if (trapnr != X86_TRAP_UD)
 		return 0;

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 714dfba..5270fc0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c

@@ -51,115 +51,34 @@ static u32 art_to_tsc_denominator;
 static u64 art_to_tsc_offset;
 struct clocksource *art_related_clocksource;
 
-/*
- * Use a ring-buffer like data structure, where a writer advances the head by
- * writing a new data entry and a reader advances the tail when it observes a
- * new entry.
- *
- * Writers are made to wait on readers until there's space to write a new
- * entry.
- *
- * This means that we can always use an {offset, mul} pair to compute a ns
- * value that is 'roughly' in the right direction, even if we're writing a new
- * {offset, mul} pair during the clock read.
- *
- * The down-side is that we can no longer guarantee strict monotonicity anymore
- * (assuming the TSC was that to begin with), because while we compute the
- * intersection point of the two clock slopes and make sure the time is
- * continuous at the point of switching; we can no longer guarantee a reader is
- * strictly before or after the switch point.
- *
- * It does mean a reader no longer needs to disable IRQs in order to avoid
- * CPU-Freq updates messing with his times, and similarly an NMI reader will
- * no longer run the risk of hitting half-written state.
- */
-
 struct cyc2ns {
-	struct cyc2ns_data data[2];	/*  0 + 2*24 = 48 */
-	struct cyc2ns_data *head;	/* 48 + 8    = 56 */
-	struct cyc2ns_data *tail;	/* 56 + 8    = 64 */
-}; /* exactly fits one cacheline */
+	struct cyc2ns_data data[2];	/*  0 + 2*16 = 32 */
+	seqcount_t	   seq;		/* 32 + 4    = 36 */
+
+}; /* fits one cacheline */
 
 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
 
-struct cyc2ns_data *cyc2ns_read_begin(void)
+void cyc2ns_read_begin(struct cyc2ns_data *data)
 {
-	struct cyc2ns_data *head;
+	int seq, idx;
 
-	preempt_disable();
+	preempt_disable_notrace();
 
-	head = this_cpu_read(cyc2ns.head);
-	/*
-	 * Ensure we observe the entry when we observe the pointer to it.
-	 * matches the wmb from cyc2ns_write_end().
-	 */
-	smp_read_barrier_depends();
-	head->__count++;
-	barrier();
+	do {
+		seq = this_cpu_read(cyc2ns.seq.sequence);
+		idx = seq & 1;
 
-	return head;
+		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
+		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
+		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
+
+	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
 }
 
-void cyc2ns_read_end(struct cyc2ns_data *head)
+void cyc2ns_read_end(void)
 {
-	barrier();
-	/*
-	 * If we're the outer most nested read; update the tail pointer
-	 * when we're done. This notifies possible pending writers
-	 * that we've observed the head pointer and that the other
-	 * entry is now free.
-	 */
-	if (!--head->__count) {
-		/*
-		 * x86-TSO does not reorder writes with older reads;
-		 * therefore once this write becomes visible to another
-		 * cpu, we must be finished reading the cyc2ns_data.
-		 *
-		 * matches with cyc2ns_write_begin().
-		 */
-		this_cpu_write(cyc2ns.tail, head);
-	}
-	preempt_enable();
-}
-
-/*
- * Begin writing a new @data entry for @cpu.
- *
- * Assumes some sort of write side lock; currently 'provided' by the assumption
- * that cpufreq will call its notifiers sequentially.
- */
-static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
-{
-	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
-	struct cyc2ns_data *data = c2n->data;
-
-	if (data == c2n->head)
-		data++;
-
-	/* XXX send an IPI to @cpu in order to guarantee a read? */
-
-	/*
-	 * When we observe the tail write from cyc2ns_read_end(),
-	 * the cpu must be done with that entry and its safe
-	 * to start writing to it.
-	 */
-	while (c2n->tail == data)
-		cpu_relax();
-
-	return data;
-}
-
-static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
-{
-	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
-
-	/*
-	 * Ensure the @data writes are visible before we publish the
-	 * entry. Matches the data-depencency in cyc2ns_read_begin().
-	 */
-	smp_wmb();
-
-	ACCESS_ONCE(c2n->head) = data;
+	preempt_enable_notrace();
 }
 
 /*
@@ -191,7 +110,6 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
 	data->cyc2ns_mul = 0;
 	data->cyc2ns_shift = 0;
 	data->cyc2ns_offset = 0;
-	data->__count = 0;
 }
 
 static void cyc2ns_init(int cpu)
@@ -201,51 +119,29 @@ static void cyc2ns_init(int cpu)
 	cyc2ns_data_init(&c2n->data[0]);
 	cyc2ns_data_init(&c2n->data[1]);
 
-	c2n->head = c2n->data;
-	c2n->tail = c2n->data;
+	seqcount_init(&c2n->seq);
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	struct cyc2ns_data *data, *tail;
+	struct cyc2ns_data data;
 	unsigned long long ns;
 
-	/*
-	 * See cyc2ns_read_*() for details; replicated in order to avoid
-	 * an extra few instructions that came with the abstraction.
-	 * Notable, it allows us to only do the __count and tail update
-	 * dance when its actually needed.
-	 */
+	cyc2ns_read_begin(&data);
 
-	preempt_disable_notrace();
-	data = this_cpu_read(cyc2ns.head);
-	tail = this_cpu_read(cyc2ns.tail);
+	ns = data.cyc2ns_offset;
+	ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
 
-	if (likely(data == tail)) {
-		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
-	} else {
-		data->__count++;
-
-		barrier();
-
-		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
-
-		barrier();
-
-		if (!--data->__count)
-			this_cpu_write(cyc2ns.tail, data);
-	}
-	preempt_enable_notrace();
+	cyc2ns_read_end();
 
 	return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
 {
-	unsigned long long tsc_now, ns_now;
-	struct cyc2ns_data *data;
+	unsigned long long ns_now;
+	struct cyc2ns_data data;
+	struct cyc2ns *c2n;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -254,9 +150,6 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	if (!khz)
 		goto done;
 
-	data = cyc2ns_write_begin(cpu);
-
-	tsc_now = rdtsc();
 	ns_now = cycles_2_ns(tsc_now);
 
 	/*
@@ -264,7 +157,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	 * time function is continuous; see the comment near struct
 	 * cyc2ns_data.
 	 */
-	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
+	clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
 			       NSEC_PER_MSEC, 0);
 
 	/*
@@ -273,20 +166,26 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
 	 * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
 	 * value) - refer perf_event_mmap_page documentation in perf_event.h.
 	 */
-	if (data->cyc2ns_shift == 32) {
-		data->cyc2ns_shift = 31;
-		data->cyc2ns_mul >>= 1;
+	if (data.cyc2ns_shift == 32) {
+		data.cyc2ns_shift = 31;
+		data.cyc2ns_mul >>= 1;
 	}
 
-	data->cyc2ns_offset = ns_now -
-		mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
+	data.cyc2ns_offset = ns_now -
+		mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
 
-	cyc2ns_write_end(cpu, data);
+	c2n = per_cpu_ptr(&cyc2ns, cpu);
+
+	raw_write_seqcount_latch(&c2n->seq);
+	c2n->data[0] = data;
+	raw_write_seqcount_latch(&c2n->seq);
+	c2n->data[1] = data;
 
 done:
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 	local_irq_restore(flags);
 }
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -374,6 +273,8 @@ static int __init tsc_setup(char *str)
 		tsc_clocksource_reliable = 1;
 	if (!strncmp(str, "noirqtime", 9))
 		no_sched_irq_time = 1;
+	if (!strcmp(str, "unstable"))
+		mark_tsc_unstable("boot parameter");
 	return 1;
 }
 
@@ -986,7 +887,6 @@ void tsc_restore_sched_clock_state(void)
 }
 
 #ifdef CONFIG_CPU_FREQ
-
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
  * changes.
  *
@@ -1027,7 +927,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 			mark_tsc_unstable("cpufreq changes");
 
-		set_cyc2ns_scale(tsc_khz, freq->cpu);
+		set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc());
 	}
 
 	return 0;
@@ -1127,6 +1027,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
 
+static void tsc_cs_tick_stable(struct clocksource *cs)
+{
+	if (tsc_unstable)
+		return;
+
+	if (using_native_sched_clock())
+		sched_clock_tick_stable();
+}
+
 /*
  * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
  */
@@ -1140,6 +1049,7 @@ static struct clocksource clocksource_tsc = {
 	.archdata               = { .vclock_mode = VCLOCK_TSC },
 	.resume			= tsc_resume,
 	.mark_unstable		= tsc_cs_mark_unstable,
+	.tick_stable		= tsc_cs_tick_stable,
 };
 
 void mark_tsc_unstable(char *reason)
@@ -1255,6 +1165,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	static int hpet;
 	u64 tsc_stop, ref_stop, delta;
 	unsigned long freq;
+	int cpu;
 
 	/* Don't bother refining TSC on unstable systems */
 	if (check_tsc_unstable())
@@ -1305,6 +1216,10 @@ static void tsc_refine_calibration_work(struct work_struct *work)
 	/* Inform the TSC deadline clockevent devices about the recalibration */
 	lapic_update_tsc_freq();
 
+	/* Update the sched_clock() rate to match the clocksource one */
+	for_each_possible_cpu(cpu)
+		set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
+
 out:
 	if (boot_cpu_has(X86_FEATURE_ART))
 		art_related_clocksource = &clocksource_tsc;
@@ -1350,7 +1265,7 @@ device_initcall(init_tsc_clocksource);
 
 void __init tsc_init(void)
 {
-	u64 lpj;
+	u64 lpj, cyc;
 	int cpu;
 
 	if (!boot_cpu_has(X86_FEATURE_TSC)) {
@@ -1390,9 +1305,10 @@ void __init tsc_init(void)
 	 * speed as the bootup CPU. (cpufreq notifiers will fix this
 	 * up if their speed diverges)
 	 */
+	cyc = rdtsc();
 	for_each_possible_cpu(cpu) {
 		cyc2ns_init(cpu);
-		set_cyc2ns_scale(tsc_khz, cpu);
+		set_cyc2ns_scale(tsc_khz, cpu, cyc);
 	}
 
 	if (tsc_disabled > 0)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 82c6d7f..b9389d7 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c

@@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state)
 	return (unsigned long *)task_pt_regs(state->task) - 2;
 }
 
+static bool is_last_frame(struct unwind_state *state)
+{
+	return state->bp == last_frame(state);
+}
+
 #ifdef CONFIG_X86_32
 #define GCC_REALIGN_WORDS 3
 #else
@@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state)
 	return last_frame(state) - GCC_REALIGN_WORDS;
 }
 
-static bool is_last_task_frame(struct unwind_state *state)
+static bool is_last_aligned_frame(struct unwind_state *state)
 {
 	unsigned long *last_bp = last_frame(state);
 	unsigned long *aligned_bp = last_aligned_frame(state);
 
 	/*
-	 * We have to check for the last task frame at two different locations
-	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame in the prologue of a function
-	 * called by head/entry code.  Examples:
+	 * GCC can occasionally decide to realign the stack pointer and change
+	 * the offset of the stack frame in the prologue of a function called
+	 * by head/entry code.  Examples:
 	 *
 	 * <start_secondary>:
 	 *      push   %edi
@@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state)
 	 *      push   %rbp
 	 *      mov    %rsp,%rbp
 	 *
-	 * Note that after aligning the stack, it pushes a duplicate copy of
-	 * the return address before pushing the frame pointer.
+	 * After aligning the stack, it pushes a duplicate copy of the return
+	 * address before pushing the frame pointer.
 	 */
-	return (state->bp == last_bp ||
-		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
+	return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1));
+}
+
+static bool is_last_ftrace_frame(struct unwind_state *state)
+{
+	unsigned long *last_bp = last_frame(state);
+	unsigned long *last_ftrace_bp = last_bp - 3;
+
+	/*
+	 * When unwinding from an ftrace handler of a function called by entry
+	 * code, the stack layout of the last frame is:
+	 *
+	 *   bp
+	 *   parent ret addr
+	 *   bp
+	 *   function ret addr
+	 *   parent ret addr
+	 *   pt_regs
+	 *   -----------------
+	 */
+	return (state->bp == last_ftrace_bp &&
+		*state->bp == *(state->bp + 2) &&
+		*(state->bp + 1) == *(state->bp + 4));
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+	return is_last_frame(state) || is_last_aligned_frame(state) ||
+	       is_last_ftrace_frame(state);
 }
 
 /*

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a181ae7..59ca2ee 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c

@@ -780,18 +780,20 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 {
 	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
-	int j, nent = vcpu->arch.cpuid_nent;
+	struct kvm_cpuid_entry2 *ej;
+	int j = i;
+	int nent = vcpu->arch.cpuid_nent;
 
 	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 	/* when no next entry is found, the current entry[i] is reselected */
-	for (j = i + 1; ; j = (j + 1) % nent) {
-		struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
-		if (ej->function == e->function) {
-			ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
-			return j;
-		}
-	}
-	return 0; /* silence gcc, even though control never reaches here */
+	do {
+		j = (j + 1) % nent;
+		ej = &vcpu->arch.cpuid_entries[j];
+	} while (ej->function != e->function);
+
+	ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
+
+	return j;
 }
 
 /* find an entry with matching function, matching index (if needed), and that

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2..80890de 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c

@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 	}
 
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 	return X86EMUL_CONTINUE;
 }
 

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c329d28..d24c874 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c

@@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
 static void cancel_hv_timer(struct kvm_lapic *apic)
 {
+	preempt_disable();
 	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
 	apic->lapic_timer.hv_timer_in_use = false;
+	preempt_enable();
 }
 
 static bool start_hv_timer(struct kvm_lapic *apic)
@@ -1934,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
 		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic_update_lvtt(apic);
-	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
+	if (kvm_vcpu_is_reset_bsp(vcpu) &&
+	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
 		kvm_lapic_set_reg(apic, APIC_LVT0,
 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5d3376f..cb82259 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -3698,12 +3698,15 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 	return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
 }
 
-static bool can_do_async_pf(struct kvm_vcpu *vcpu)
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
 	if (unlikely(!lapic_in_kernel(vcpu) ||
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
+	if (is_guest_mode(vcpu))
+		return false;
+
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
@@ -3719,7 +3722,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	if (!prefault && can_do_async_pf(vcpu)) {
+	if (!prefault && kvm_can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
 			trace_kvm_async_pf_doublefault(gva, gfn);

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2797580..330bf3a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h

@@ -76,6 +76,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty);
+bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 183ddb2..33460fc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/hashtable.h>
+#include <linux/frame.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -1807,7 +1808,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	 * AMD's VMCB does not have an explicit unusable field, so emulate it
 	 * for cross vendor migration purposes by "not present"
 	 */
-	var->unusable = !var->present || (var->type == 0);
+	var->unusable = !var->present;
 
 	switch (seg) {
 	case VCPU_SREG_TR:
@@ -1840,6 +1841,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 		 */
 		if (var->unusable)
 			var->db = 0;
+		/* This is symmetric with svm_set_segment() */
 		var->dpl = to_svm(vcpu)->vmcb->save.cpl;
 		break;
 	}
@@ -1980,18 +1982,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	s->base = var->base;
 	s->limit = var->limit;
 	s->selector = var->selector;
-	if (var->unusable)
-		s->attrib = 0;
-	else {
-		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
-		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
-		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
-		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
-		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
-		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
-		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
-		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
-	}
+	s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
+	s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
+	s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
+	s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
+	s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
+	s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
+	s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
+	s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
 
 	/*
 	 * This is always accurate, except if SYSRET returned to a segment
@@ -2000,7 +1998,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	 * would entail passing the CPL to userspace and back.
 	 */
 	if (seg == VCPU_SREG_SS)
-		svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
+		/* This is symmetric with svm_get_segment() */
+		svm->vmcb->save.cpl = (var->dpl & 3);
 
 	mark_dirty(svm->vmcb, VMCB_SEG);
 }
@@ -4908,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	mark_all_clean(svm->vmcb);
 }
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 72f7839..1b469b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/hrtimer.h>
+#include <linux/frame.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -2425,7 +2426,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 	if (!(vmcs12->exception_bitmap & (1u << nr)))
 		return 0;
 
-	nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 			  vmcs_read32(VM_EXIT_INTR_INFO),
 			  vmcs_readl(EXIT_QUALIFICATION));
 	return 1;
@@ -6914,97 +6915,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-/*
- * This function performs the various checks including
- * - if it's 4KB aligned
- * - No bits beyond the physical address width are set
- * - Returns 0 on success or else 1
- * (Intel SDM Section 30.3)
- */
-static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
-				  gpa_t *vmpointer)
+static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 {
 	gva_t gva;
-	gpa_t vmptr;
 	struct x86_exception e;
-	struct page *page;
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
 			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
 		return 1;
 
-	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
-				sizeof(vmptr), &e)) {
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
+				sizeof(*vmpointer), &e)) {
 		kvm_inject_page_fault(vcpu, &e);
 		return 1;
 	}
 
-	switch (exit_reason) {
-	case EXIT_REASON_VMON:
-		/*
-		 * SDM 3: 24.11.5
-		 * The first 4 bytes of VMXON region contain the supported
-		 * VMCS revision identifier
-		 *
-		 * Note - IA32_VMX_BASIC[48] will never be 1
-		 * for the nested case;
-		 * which replaces physical address width with 32
-		 *
-		 */
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		page = nested_get_page(vcpu, vmptr);
-		if (page == NULL) {
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		if (*(u32 *)kmap(page) != VMCS12_REVISION) {
-			kunmap(page);
-			nested_release_page_clean(page);
-			nested_vmx_failInvalid(vcpu);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		kunmap(page);
-		nested_release_page_clean(page);
-		vmx->nested.vmxon_ptr = vmptr;
-		break;
-	case EXIT_REASON_VMCLEAR:
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMCLEAR_INVALID_ADDRESS);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		if (vmptr == vmx->nested.vmxon_ptr) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMCLEAR_VMXON_POINTER);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		break;
-	case EXIT_REASON_VMPTRLD:
-		if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMPTRLD_INVALID_ADDRESS);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-
-		if (vmptr == vmx->nested.vmxon_ptr) {
-			nested_vmx_failValid(vcpu,
-					     VMXERR_VMPTRLD_VMXON_POINTER);
-			return kvm_skip_emulated_instruction(vcpu);
-		}
-		break;
-	default:
-		return 1; /* shouldn't happen */
-	}
-
-	if (vmpointer)
-		*vmpointer = vmptr;
 	return 0;
 }
 
@@ -7066,6 +6991,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 static int handle_vmon(struct kvm_vcpu *vcpu)
 {
 	int ret;
+	gpa_t vmptr;
+	struct page *page;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
 		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -7095,9 +7022,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
- 
+
+	/*
+	 * SDM 3: 24.11.5
+	 * The first 4 bytes of VMXON region contain the supported
+	 * VMCS revision identifier
+	 *
+	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
+	 * which replaces physical address width with 32
+	 */
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	page = nested_get_page(vcpu, vmptr);
+	if (page == NULL) {
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+	if (*(u32 *)kmap(page) != VMCS12_REVISION) {
+		kunmap(page);
+		nested_release_page_clean(page);
+		nested_vmx_failInvalid(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+	kunmap(page);
+	nested_release_page_clean(page);
+
+	vmx->nested.vmxon_ptr = vmptr;
 	ret = enter_vmx_operation(vcpu);
 	if (ret)
 		return ret;
@@ -7213,9 +7168,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	if (vmptr == vmx->nested.vmxon_ptr) {
+		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
@@ -7545,9 +7510,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr))
+	if (nested_vmx_get_vmptr(vcpu, &vmptr))
 		return 1;
 
+	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
+		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+	if (vmptr == vmx->nested.vmxon_ptr) {
+		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
@@ -7913,11 +7888,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 {
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	int cr = exit_qualification & 15;
-	int reg = (exit_qualification >> 8) & 15;
-	unsigned long val = kvm_register_readl(vcpu, reg);
+	int reg;
+	unsigned long val;
 
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
+		reg = (exit_qualification >> 8) & 15;
+		val = kvm_register_readl(vcpu, reg);
 		switch (cr) {
 		case 0:
 			if (vmcs12->cr0_guest_host_mask &
@@ -7972,6 +7949,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
 		 * cr0. Other attempted changes are ignored, with no exit.
 		 */
+		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
 		if (vmcs12->cr0_guest_host_mask & 0xe &
 		    (val ^ vmcs12->cr0_read_shadow))
 			return true;
@@ -8675,6 +8653,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 			);
 	}
 }
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
 
 static bool vmx_has_high_real_mode_segbase(void)
 {
@@ -9051,6 +9030,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_recover_nmi_blocking(vmx);
 	vmx_complete_interrupts(vmx);
 }
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02363e3..0e846f0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
+	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
 	ctxt->eip = kvm_rip_read(vcpu);
 	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 	return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	/*
-	 * rflags is the old, "raw" value of the flags.  The new value has
-	 * not been saved yet.
-	 *
-	 * This is correct even for TF set by the guest, because "the
-	 * processor will not generate this exception after the instruction
-	 * that sets the TF flag".
-	 */
-	if (unlikely(rflags & X86_EFLAGS_TF)) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-						  DR6_RTM;
-			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-			kvm_run->debug.arch.exception = DB_VECTOR;
-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
-			*r = EMULATE_USER_EXIT;
-		} else {
-			/*
-			 * "Certain debug exceptions may clear bit 0-3.  The
-			 * remaining contents of the DR6 register are never
-			 * cleared by the processor".
-			 */
-			vcpu->arch.dr6 &= ~15;
-			vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-			kvm_queue_exception(vcpu, DB_VECTOR);
-		}
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		*r = EMULATE_USER_EXIT;
+	} else {
+		/*
+		 * "Certain debug exceptions may clear bit 0-3.  The
+		 * remaining contents of the DR6 register are never
+		 * cleared by the processor".
+		 */
+		vcpu->arch.dr6 &= ~15;
+		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+		kvm_queue_exception(vcpu, DB_VECTOR);
 	}
 }
 
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	int r = EMULATE_DONE;
 
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+	/*
+	 * rflags is the old, "raw" value of the flags.  The new value has
+	 * not been saved yet.
+	 *
+	 * This is correct even for TF set by the guest, because "the
+	 * processor will not generate this exception after the instruction
+	 * that sets the TF flag".
+	 */
+	if (unlikely(rflags & X86_EFLAGS_TF))
+		kvm_vcpu_do_singlestep(vcpu, &r);
 	return r == EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
-		if (r == EMULATE_DONE)
-			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+		if (r == EMULATE_DONE &&
+		    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+			kvm_vcpu_do_singlestep(vcpu, &r);
 		if (!ctxt->have_exception ||
 		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 			__kvm_set_rflags(vcpu, ctxt->eflags);
@@ -8394,10 +8396,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.pv.pv_unhalted)
 		return true;
 
-	if (atomic_read(&vcpu->arch.nmi_queued))
+	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+	    (vcpu->arch.nmi_pending &&
+	     kvm_x86_ops->nmi_allowed(vcpu)))
 		return true;
 
-	if (kvm_test_request(KVM_REQ_SMI, vcpu))
+	if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
+	    (vcpu->arch.smi_pending && !is_smm(vcpu)))
 		return true;
 
 	if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8604,8 +8609,7 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
 		return true;
 	else
-		return !kvm_event_needs_reinjection(vcpu) &&
-			kvm_x86_ops->interrupt_allowed(vcpu);
+		return kvm_can_do_async_pf(vcpu);
 }
 
 void kvm_arch_start_assignment(struct kvm *kvm)

diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index c815564..10ffa7e 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S

@@ -13,14 +13,14 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	pushq %rbx
-	pushq %rbp
+	pushq %r12
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
 	movl    4(%rdi), %ecx
 	movl    8(%rdi), %edx
 	movl    12(%rdi), %ebx
-	movl    20(%rdi), %ebp
+	movl    20(%rdi), %r12d
 	movl    24(%rdi), %esi
 	movl    28(%rdi), %edi
 1:	\op
@@ -29,10 +29,10 @@
 	movl    %ecx, 4(%r10)
 	movl    %edx, 8(%r10)
 	movl    %ebx, 12(%r10)
-	movl    %ebp, 20(%r10)
+	movl    %r12d, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq %rbp
+	popq %r12
 	popq %rbx
 	ret
 3:

diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 767be7c..12e3771 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt

@@ -1009,7 +1009,7 @@
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 35ea061..0ea8afc 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c

@@ -162,6 +162,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 	if (fixup_exception(regs, trapnr))
 		return;
 
+	if (fixup_bug(regs, trapnr))
+		return;
+
 fail:
 	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
 		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 302f43f..adad702 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c

@@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cbc87ea..9b3f9fa 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c

@@ -161,16 +161,16 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-#if !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
 	 * use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
+	if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
 		page_size_mask |= 1 << PG_LEVEL_2M;
-#endif
+	else
+		direct_gbpages = 0;
 
 	/* Enable PSE if available */
 	if (boot_cpu_has(X86_FEATURE_PSE))

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 95651dc..0a59daf 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c

@@ -990,7 +990,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
 
 		pud_base = pud_offset(p4d, 0);
 		remove_pud_table(pud_base, addr, next, direct);
-		free_pud_table(pud_base, p4d);
+		/*
+		 * For 4-level page tables we do not want to free PUDs, but in the
+		 * 5-level case we should free them. This code will have to change
+		 * to adapt for boot-time switching between 4 and 5 level page tables.
+		 */
+		if (CONFIG_PGTABLE_LEVELS == 5)
+			free_pud_table(pud_base, p4d);
 	}
 
 	if (direct)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1dcd2be..c8520b2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -186,7 +186,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
 	unsigned int i, level;
 	unsigned long addr;
 
-	BUG_ON(irqs_disabled());
+	BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 	WARN_ON(PAGE_ALIGN(start) != start);
 
 	on_each_cpu(__cpa_flush_range, NULL, 1);

diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 90568c3..fefb4b6 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile

@@ -1,4 +1,6 @@
 #
 # Arch-specific network modules
 #
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
 obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o

diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index f1d83b3..2f56e1e 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile

@@ -1,4 +1,5 @@
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
 
 obj-$(CONFIG_EFI) 		+= quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_EARLY_PRINTK_EFI)	+= early_printk.o

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7e76a4d..f084d87 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c

@@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void)
 
 	/*
 	 * We don't do virtual mode, since we don't do runtime services, on
-	 * non-native EFI
+	 * non-native EFI. With efi=old_map, we don't do runtime services in
+	 * kexec kernel because in the initial boot something else might
+	 * have been mapped at these virtual addresses.
 	 */
-	if (!efi_is_native()) {
+	if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) {
 		efi_memmap_unmap();
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
@@ -1012,7 +1014,6 @@ static void __init __efi_enter_virtual_mode(void)
 	 * necessary relocation fixups for the new virtual addresses.
 	 */
 	efi_runtime_update_mappings();
-	efi_dump_pagetable();
 
 	/* clean DUMMY object */
 	efi_delete_dummy_variable();
@@ -1027,6 +1028,8 @@ void __init efi_enter_virtual_mode(void)
 		kexec_enter_virtual_mode();
 	else
 		__efi_enter_virtual_mode();
+
+	efi_dump_pagetable();
 }
 
 /*

diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 3481268..52f7faa 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c

@@ -44,7 +44,14 @@ int __init efi_alloc_page_tables(void)
 }
 
 void efi_sync_low_kernel_mappings(void) {}
-void __init efi_dump_pagetable(void) {}
+
+void __init efi_dump_pagetable(void)
+{
+#ifdef CONFIG_EFI_PGT_DUMP
+	ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+#endif
+}
+
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	return 0;

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c488625..8ff1f95 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c

@@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable)
 
 pgd_t * __init efi_call_phys_prolog(void)
 {
-	unsigned long vaddress;
-	pgd_t *save_pgd;
+	unsigned long vaddr, addr_pgd, addr_p4d, addr_pud;
+	pgd_t *save_pgd, *pgd_k, *pgd_efi;
+	p4d_t *p4d, *p4d_k, *p4d_efi;
+	pud_t *pud;
 
 	int pgd;
-	int n_pgds;
+	int n_pgds, i, j;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
 		save_pgd = (pgd_t *)read_cr3();
@@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void)
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
 	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
 
+	/*
+	 * Build 1:1 identity mapping for efi=old_map usage. Note that
+	 * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while
+	 * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical
+	 * address X, the pud_index(X) != pud_index(__va(X)), we can only copy
+	 * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping.
+	 * This means here we can only reuse the PMD tables of the direct mapping.
+	 */
 	for (pgd = 0; pgd < n_pgds; pgd++) {
-		save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
-		vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
-		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+		addr_pgd = (unsigned long)(pgd * PGDIR_SIZE);
+		vaddr = (unsigned long)__va(pgd * PGDIR_SIZE);
+		pgd_efi = pgd_offset_k(addr_pgd);
+		save_pgd[pgd] = *pgd_efi;
+
+		p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd);
+		if (!p4d) {
+			pr_err("Failed to allocate p4d table!\n");
+			goto out;
+		}
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			addr_p4d = addr_pgd + i * P4D_SIZE;
+			p4d_efi = p4d + p4d_index(addr_p4d);
+
+			pud = pud_alloc(&init_mm, p4d_efi, addr_p4d);
+			if (!pud) {
+				pr_err("Failed to allocate pud table!\n");
+				goto out;
+			}
+
+			for (j = 0; j < PTRS_PER_PUD; j++) {
+				addr_pud = addr_p4d + j * PUD_SIZE;
+
+				if (addr_pud > (max_pfn << PAGE_SHIFT))
+					break;
+
+				vaddr = (unsigned long)__va(addr_pud);
+
+				pgd_k = pgd_offset_k(vaddr);
+				p4d_k = p4d_offset(pgd_k, vaddr);
+				pud[j] = *pud_offset(p4d_k, vaddr);
+			}
+		}
 	}
 out:
 	__flush_tlb_all();
@@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 	/*
 	 * After the lock is released, the original page table is restored.
 	 */
-	int pgd_idx;
+	int pgd_idx, i;
 	int nr_pgds;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
 
 	if (!efi_enabled(EFI_OLD_MEMMAP)) {
 		write_cr3((unsigned long)save_pgd);
@@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 
 	nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
 
-	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+	for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) {
+		pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE);
 		set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
 
+		if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+			continue;
+
+		for (i = 0; i < PTRS_PER_P4D; i++) {
+			p4d = p4d_offset(pgd,
+					 pgd_idx * PGDIR_SIZE + i * P4D_SIZE);
+
+			if (!(p4d_val(*p4d) & _PAGE_PRESENT))
+				continue;
+
+			pud = (pud_t *)p4d_page_vaddr(*p4d);
+			pud_free(&init_mm, pud);
+		}
+
+		p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+		p4d_free(&init_mm, p4d);
+	}
+
 	kfree(save_pgd);
 
 	__flush_tlb_all();
@@ -526,7 +589,10 @@ void __init efi_runtime_update_mappings(void)
 void __init efi_dump_pagetable(void)
 {
 #ifdef CONFIG_EFI_PGT_DUMP
-	ptdump_walk_pgd_level(NULL, efi_pgd);
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+	else
+		ptdump_walk_pgd_level(NULL, efi_pgd);
 #endif
 }
 

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 2661599..8a99a2e 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c

@@ -15,12 +15,66 @@
 #include <asm/e820/api.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
+#include <asm/cpu_device_id.h>
 
 #define EFI_MIN_RESERVE 5120
 
 #define EFI_DUMMY_GUID \
 	EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
 
+#define QUARK_CSH_SIGNATURE		0x5f435348	/* _CSH */
+#define QUARK_SECURITY_HEADER_SIZE	0x400
+
+/*
+ * Header prepended to the standard EFI capsule on Quark systems the are based
+ * on Intel firmware BSP.
+ * @csh_signature:	Unique identifier to sanity check signed module
+ * 			presence ("_CSH").
+ * @version:		Current version of CSH used. Should be one for Quark A0.
+ * @modulesize:		Size of the entire module including the module header
+ * 			and payload.
+ * @security_version_number_index: Index of SVN to use for validation of signed
+ * 			module.
+ * @security_version_number: Used to prevent against roll back of modules.
+ * @rsvd_module_id:	Currently unused for Clanton (Quark).
+ * @rsvd_module_vendor:	Vendor Identifier. For Intel products value is
+ * 			0x00008086.
+ * @rsvd_date:		BCD representation of build date as yyyymmdd, where
+ * 			yyyy=4 digit year, mm=1-12, dd=1-31.
+ * @headersize:		Total length of the header including including any
+ * 			padding optionally added by the signing tool.
+ * @hash_algo:		What Hash is used in the module signing.
+ * @cryp_algo:		What Crypto is used in the module signing.
+ * @keysize:		Total length of the key data including including any
+ * 			padding optionally added by the signing tool.
+ * @signaturesize:	Total length of the signature including including any
+ * 			padding optionally added by the signing tool.
+ * @rsvd_next_header:	32-bit pointer to the next Secure Boot Module in the
+ * 			chain, if there is a next header.
+ * @rsvd:		Reserved, padding structure to required size.
+ *
+ * See also QuartSecurityHeader_t in
+ * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h
+ * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP
+ */
+struct quark_security_header {
+	u32 csh_signature;
+	u32 version;
+	u32 modulesize;
+	u32 security_version_number_index;
+	u32 security_version_number;
+	u32 rsvd_module_id;
+	u32 rsvd_module_vendor;
+	u32 rsvd_date;
+	u32 headersize;
+	u32 hash_algo;
+	u32 cryp_algo;
+	u32 keysize;
+	u32 signaturesize;
+	u32 rsvd_next_header;
+	u32 rsvd[2];
+};
+
 static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
 
 static bool efi_no_storage_paranoia;
@@ -360,6 +414,9 @@ void __init efi_free_boot_services(void)
 		free_bootmem_late(start, size);
 	}
 
+	if (!num_entries)
+		return;
+
 	new_size = efi.memmap.desc_size * num_entries;
 	new_phys = efi_memmap_alloc(num_entries);
 	if (!new_phys) {
@@ -501,3 +558,86 @@ bool efi_poweroff_required(void)
 {
 	return acpi_gbl_reduced_hardware || acpi_no_s5;
 }
+
+#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH
+
+static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
+				  size_t hdr_bytes)
+{
+	struct quark_security_header *csh = *pkbuff;
+
+	/* Only process data block that is larger than the security header */
+	if (hdr_bytes < sizeof(struct quark_security_header))
+		return 0;
+
+	if (csh->csh_signature != QUARK_CSH_SIGNATURE ||
+	    csh->headersize != QUARK_SECURITY_HEADER_SIZE)
+		return 1;
+
+	/* Only process data block if EFI header is included */
+	if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE +
+			sizeof(efi_capsule_header_t))
+		return 0;
+
+	pr_debug("Quark security header detected\n");
+
+	if (csh->rsvd_next_header != 0) {
+		pr_err("multiple Quark security headers not supported\n");
+		return -EINVAL;
+	}
+
+	*pkbuff += csh->headersize;
+	cap_info->total_size = csh->headersize;
+
+	/*
+	 * Update the first page pointer to skip over the CSH header.
+	 */
+	cap_info->pages[0] += csh->headersize;
+
+	return 1;
+}
+
+#define ICPU(family, model, quirk_handler) \
+	{ X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \
+	  (unsigned long)&quirk_handler }
+
+static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
+	ICPU(5, 9, qrk_capsule_setup_info),	/* Intel Quark X1000 */
+	{ }
+};
+
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+			   size_t hdr_bytes)
+{
+	int (*quirk_handler)(struct capsule_info *, void **, size_t);
+	const struct x86_cpu_id *id;
+	int ret;
+
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	cap_info->total_size = 0;
+
+	id = x86_match_cpu(efi_capsule_quirk_ids);
+	if (id) {
+		/*
+		 * The quirk handler is supposed to return
+		 *  - a value > 0 if the setup should continue, after advancing
+		 *    kbuff as needed
+		 *  - 0 if not enough hdr_bytes are available yet
+		 *  - a negative error code otherwise
+		 */
+		quirk_handler = (typeof(quirk_handler))id->driver_data;
+		ret = quirk_handler(cap_info, &kbuff, hdr_bytes);
+		if (ret <= 0)
+			return ret;
+	}
+
+	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+
+	cap_info->total_size += cap_info->header.imagesize;
+
+	return __efi_capsule_setup_info(cap_info);
+}
+
+#endif

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 42e65fe..7956715 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c

@@ -456,12 +456,13 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
  */
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
-	struct cyc2ns_data *data = cyc2ns_read_begin();
+	struct cyc2ns_data data;
 	unsigned long long ns;
 
-	ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+	cyc2ns_read_begin(&data);
+	ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
+	cyc2ns_read_end();
 
-	cyc2ns_read_end(data);
 	return ns;
 }
 
@@ -470,12 +471,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
  */
 static inline unsigned long long ns_2_cycles(unsigned long long ns)
 {
-	struct cyc2ns_data *data = cyc2ns_read_begin();
+	struct cyc2ns_data data;
 	unsigned long long cyc;
 
-	cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
+	cyc2ns_read_begin(&data);
+	cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul;
+	cyc2ns_read_end();
 
-	cyc2ns_read_end(data);
 	return cyc;
 }
 

diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a6a198c..0504187 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile

@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
 # __restore_processor_state() restores %gs after S3 resume and so should not
 # itself be stack-protected
 nostackp := $(call cc-option, -fno-stack-protector)

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index fffb0a1..bced7a3 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile

@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg

diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 30bb2e8..a18703b 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c

@@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = {
 	.tables		= EFI_INVALID_TABLE_ADDR  /* Initialized later. */
 };
 
-static const struct efi efi_xen __initconst = {
-	.systab                   = NULL, /* Initialized later. */
-	.runtime_version	  = 0,    /* Initialized later. */
-	.mps                      = EFI_INVALID_TABLE_ADDR,
-	.acpi                     = EFI_INVALID_TABLE_ADDR,
-	.acpi20                   = EFI_INVALID_TABLE_ADDR,
-	.smbios                   = EFI_INVALID_TABLE_ADDR,
-	.smbios3                  = EFI_INVALID_TABLE_ADDR,
-	.sal_systab               = EFI_INVALID_TABLE_ADDR,
-	.boot_info                = EFI_INVALID_TABLE_ADDR,
-	.hcdp                     = EFI_INVALID_TABLE_ADDR,
-	.uga                      = EFI_INVALID_TABLE_ADDR,
-	.uv_systab                = EFI_INVALID_TABLE_ADDR,
-	.fw_vendor                = EFI_INVALID_TABLE_ADDR,
-	.runtime                  = EFI_INVALID_TABLE_ADDR,
-	.config_table             = EFI_INVALID_TABLE_ADDR,
-	.get_time                 = xen_efi_get_time,
-	.set_time                 = xen_efi_set_time,
-	.get_wakeup_time          = xen_efi_get_wakeup_time,
-	.set_wakeup_time          = xen_efi_set_wakeup_time,
-	.get_variable             = xen_efi_get_variable,
-	.get_next_variable        = xen_efi_get_next_variable,
-	.set_variable             = xen_efi_set_variable,
-	.query_variable_info      = xen_efi_query_variable_info,
-	.update_capsule           = xen_efi_update_capsule,
-	.query_capsule_caps       = xen_efi_query_capsule_caps,
-	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
-	.reset_system             = xen_efi_reset_system,
-	.set_virtual_address_map  = NULL, /* Not used under Xen. */
-	.flags			  = 0     /* Initialized later. */
-};
-
 static efi_system_table_t __init *xen_efi_probe(void)
 {
 	struct xen_platform_op op = {
@@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
 
 	/* Here we know that Xen runs on EFI platform. */
 
-	efi = efi_xen;
+	efi.get_time                 = xen_efi_get_time;
+	efi.set_time                 = xen_efi_set_time;
+	efi.get_wakeup_time          = xen_efi_get_wakeup_time;
+	efi.set_wakeup_time          = xen_efi_set_wakeup_time;
+	efi.get_variable             = xen_efi_get_variable;
+	efi.get_next_variable        = xen_efi_get_next_variable;
+	efi.set_variable             = xen_efi_set_variable;
+	efi.query_variable_info      = xen_efi_query_variable_info;
+	efi.update_capsule           = xen_efi_update_capsule;
+	efi.query_capsule_caps       = xen_efi_query_capsule_caps;
+	efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+	efi.reset_system             = xen_efi_reset_system;
 
 	efi_systab_xen.tables = info->cfg.addr;
 	efi_systab_xen.nr_tables = info->cfg.nent;

diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h
index f71f88e..19707db 100644
--- a/arch/xtensa/include/asm/irq.h
+++ b/arch/xtensa/include/asm/irq.h

@@ -29,7 +29,8 @@ static inline void variant_irq_disable(unsigned int irq) { }
 # define PLATFORM_NR_IRQS 0
 #endif
 #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS)
+#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
 
 #if VARIANT_NR_IRQS == 0
 static inline void variant_init_irq(void) { }

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee..30ee8c6 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h

@@ -213,8 +213,6 @@ struct mm_struct;
 #define release_segments(mm)	do { } while(0)
 #define forget_segments()	do { } while (0)
 
-#define thread_saved_pc(tsk)	(task_pt_regs(tsk)->pc)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index a265edd..9934102 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c

@@ -34,11 +34,6 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
 	int irq = irq_find_mapping(NULL, hwirq);
 
-	if (hwirq >= NR_IRQS) {
-		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-				__func__, hwirq);
-	}
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
 	{

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 394ef08..33bfa52 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c

@@ -593,8 +593,7 @@ c_show(struct seq_file *f, void *slot)
 		      (ccount_freq/10000) % 100,
 		      loops_per_jiffy/(500000/HZ),
 		      (loops_per_jiffy/(5000/HZ)) % 100);
-
-	seq_printf(f,"flags\t\t: "
+	seq_puts(f, "flags\t\t: "
 #if XCHAL_HAVE_NMI
 		     "nmi "
 #endif

diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 0693792..74afbf0 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c

@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		/* At this point:  (!vmm || addr < vmm->vm_end). */
 		if (TASK_SIZE - len < addr)
 			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
+		if (!vmm || addr + len <= vm_start_gap(vmm))
 			return addr;
 		addr = vmm->vm_end;
 		if (flags & MAP_SHARED)

diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 30d9fc2..162c77e 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S

@@ -118,7 +118,7 @@
   SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
   SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
   SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
-  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20)
   SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 #endif
 
@@ -306,13 +306,13 @@
 		  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
 		  .DoubleExceptionVector.literal,
-		  DOUBLEEXC_VECTOR_VADDR - 48,
+		  DOUBLEEXC_VECTOR_VADDR - 20,
 		  SIZEOF(.UserExceptionVector.text),
 		  .UserExceptionVector.text)
   SECTION_VECTOR (_DoubleExceptionVector_text,
 		  .DoubleExceptionVector.text,
 		  DOUBLEEXC_VECTOR_VADDR,
-		  48,
+		  20,
 		  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;

diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 02e94bb..c45b90b 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c

@@ -317,8 +317,7 @@ static int __init simdisk_init(void)
 	if (simdisk_count > MAX_SIMDISK_COUNT)
 		simdisk_count = MAX_SIMDISK_COUNT;
 
-	sddev = kmalloc(simdisk_count * sizeof(struct simdisk),
-			GFP_KERNEL);
+	sddev = kmalloc_array(simdisk_count, sizeof(*sddev), GFP_KERNEL);
 	if (sddev == NULL)
 		goto out_unregister;
 

diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index dbeea2b..1fda7e2 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h

@@ -24,16 +24,18 @@
 
 /* Interrupt configuration. */
 
-#define PLATFORM_NR_IRQS	10
+#define PLATFORM_NR_IRQS	0
 
 /* Default assignment of LX60 devices to external interrupts. */
 
 #ifdef CONFIG_XTENSA_MX
 #define DUART16552_INTNUM	XCHAL_EXTINT3_NUM
 #define OETH_IRQ		XCHAL_EXTINT4_NUM
+#define C67X00_IRQ		XCHAL_EXTINT8_NUM
 #else
 #define DUART16552_INTNUM	XCHAL_EXTINT0_NUM
 #define OETH_IRQ		XCHAL_EXTINT1_NUM
+#define C67X00_IRQ		XCHAL_EXTINT5_NUM
 #endif
 
 /*
@@ -63,5 +65,5 @@
 
 #define C67X00_PADDR		(XCHAL_KIO_PADDR + 0x0D0D0000)
 #define C67X00_SIZE		0x10
-#define C67X00_IRQ		5
+
 #endif /* __XTENSA_XTAVNET_HARDWARE_H */

diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 779be72..42285f3 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c

@@ -175,8 +175,8 @@ static struct resource ethoc_res[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[2] = { /* IRQ number */
-		.start = OETH_IRQ,
-		.end   = OETH_IRQ,
+		.start = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
+		.end   = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
 		.flags = IORESOURCE_IRQ,
 	},
 };
@@ -213,8 +213,8 @@ static struct resource c67x00_res[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = { /* IRQ number */
-		.start = C67X00_IRQ,
-		.end   = C67X00_IRQ,
+		.start = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
+		.end   = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
 		.flags = IORESOURCE_IRQ,
 	},
 };
@@ -247,7 +247,7 @@ static struct resource serial_resource = {
 static struct plat_serial8250_port serial_platform_data[] = {
 	[0] = {
 		.mapbase	= DUART16552_PADDR,
-		.irq		= DUART16552_INTNUM,
+		.irq		= XTENSA_PIC_LINUX_IRQ(DUART16552_INTNUM),
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
 				  UPF_IOREMAP,
 		.iotype		= XCHAL_HAVE_BE ? UPIO_MEM32BE : UPIO_MEM32,

diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef2..43c7116 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c

@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 	case 3:
 		if (newline != '\n')
 			return -EINVAL;
+		/* fall through */
 	case 2:
 		if (length <= 0)
 			return -EINVAL;

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index c8a32fb..78b2e0d 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c

@@ -52,7 +52,7 @@ BFQG_FLAG_FNS(idling)
 BFQG_FLAG_FNS(empty)
 #undef BFQG_FLAG_FNS
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
 {
 	unsigned long long now;
@@ -67,7 +67,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
 	bfqg_stats_clear_waiting(stats);
 }
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
 						 struct bfq_group *curr_bfqg)
 {
@@ -81,7 +81,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
 	bfqg_stats_mark_waiting(stats);
 }
 
-/* This should be called with the queue_lock held. */
+/* This should be called with the scheduler lock held. */
 static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
 {
 	unsigned long long now;
@@ -203,12 +203,30 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
 
 static void bfqg_get(struct bfq_group *bfqg)
 {
-	return blkg_get(bfqg_to_blkg(bfqg));
+	bfqg->ref++;
 }
 
 void bfqg_put(struct bfq_group *bfqg)
 {
-	return blkg_put(bfqg_to_blkg(bfqg));
+	bfqg->ref--;
+
+	if (bfqg->ref == 0)
+		kfree(bfqg);
+}
+
+static void bfqg_and_blkg_get(struct bfq_group *bfqg)
+{
+	/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
+	bfqg_get(bfqg);
+
+	blkg_get(bfqg_to_blkg(bfqg));
+}
+
+void bfqg_and_blkg_put(struct bfq_group *bfqg)
+{
+	bfqg_put(bfqg);
+
+	blkg_put(bfqg_to_blkg(bfqg));
 }
 
 void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
@@ -312,7 +330,11 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
 	if (bfqq) {
 		bfqq->ioprio = bfqq->new_ioprio;
 		bfqq->ioprio_class = bfqq->new_ioprio_class;
-		bfqg_get(bfqg);
+		/*
+		 * Make sure that bfqg and its associated blkg do not
+		 * disappear before entity.
+		 */
+		bfqg_and_blkg_get(bfqg);
 	}
 	entity->parent = bfqg->my_entity; /* NULL for root group */
 	entity->sched_data = &bfqg->sched_data;
@@ -399,6 +421,8 @@ struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
 		return NULL;
 	}
 
+	/* see comments in bfq_bic_update_cgroup for why refcounting */
+	bfqg_get(bfqg);
 	return &bfqg->pd;
 }
 
@@ -426,7 +450,7 @@ void bfq_pd_free(struct blkg_policy_data *pd)
 	struct bfq_group *bfqg = pd_to_bfqg(pd);
 
 	bfqg_stats_exit(&bfqg->stats);
-	return kfree(bfqg);
+	bfqg_put(bfqg);
 }
 
 void bfq_pd_reset_stats(struct blkg_policy_data *pd)
@@ -496,9 +520,10 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
  * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
  * it on the new one.  Avoid putting the entity on the old group idle tree.
  *
- * Must be called under the queue lock; the cgroup owning @bfqg must
- * not disappear (by now this just means that we are called under
- * rcu_read_lock()).
+ * Must be called under the scheduler lock, to make sure that the blkg
+ * owning @bfqg does not disappear (see comments in
+ * bfq_bic_update_cgroup on guaranteeing the consistency of blkg
+ * objects).
  */
 void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		   struct bfq_group *bfqg)
@@ -519,16 +544,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		bfq_deactivate_bfqq(bfqd, bfqq, false, false);
 	else if (entity->on_st)
 		bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
-	bfqg_put(bfqq_group(bfqq));
+	bfqg_and_blkg_put(bfqq_group(bfqq));
 
-	/*
-	 * Here we use a reference to bfqg.  We don't need a refcounter
-	 * as the cgroup reference will not be dropped, so that its
-	 * destroy() callback will not be invoked.
-	 */
 	entity->parent = bfqg->my_entity;
 	entity->sched_data = &bfqg->sched_data;
-	bfqg_get(bfqg);
+	/* pin down bfqg and its associated blkg  */
+	bfqg_and_blkg_get(bfqg);
 
 	if (bfq_bfqq_busy(bfqq)) {
 		bfq_pos_tree_add_move(bfqd, bfqq);
@@ -545,8 +566,9 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  * @bic: the bic to move.
  * @blkcg: the blk-cgroup to move to.
  *
- * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
- * has to make sure that the reference to cgroup is valid across the call.
+ * Move bic to blkcg, assuming that bfqd->lock is held; which makes
+ * sure that the reference to cgroup is valid across the call (see
+ * comments in bfq_bic_update_cgroup on this issue)
  *
  * NOTE: an alternative approach might have been to store the current
  * cgroup in bfqq and getting a reference to it, reducing the lookup
@@ -604,6 +626,57 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 		goto out;
 
 	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+	/*
+	 * Update blkg_path for bfq_log_* functions. We cache this
+	 * path, and update it here, for the following
+	 * reasons. Operations on blkg objects in blk-cgroup are
+	 * protected with the request_queue lock, and not with the
+	 * lock that protects the instances of this scheduler
+	 * (bfqd->lock). This exposes BFQ to the following sort of
+	 * race.
+	 *
+	 * The blkg_lookup performed in bfq_get_queue, protected
+	 * through rcu, may happen to return the address of a copy of
+	 * the original blkg. If this is the case, then the
+	 * bfqg_and_blkg_get performed in bfq_get_queue, to pin down
+	 * the blkg, is useless: it does not prevent blk-cgroup code
+	 * from destroying both the original blkg and all objects
+	 * directly or indirectly referred by the copy of the
+	 * blkg.
+	 *
+	 * On the bright side, destroy operations on a blkg invoke, as
+	 * a first step, hooks of the scheduler associated with the
+	 * blkg. And these hooks are executed with bfqd->lock held for
+	 * BFQ. As a consequence, for any blkg associated with the
+	 * request queue this instance of the scheduler is attached
+	 * to, we are guaranteed that such a blkg is not destroyed, and
+	 * that all the pointers it contains are consistent, while we
+	 * are holding bfqd->lock. A blkg_lookup performed with
+	 * bfqd->lock held then returns a fully consistent blkg, which
+	 * remains consistent until this lock is held.
+	 *
+	 * Thanks to the last fact, and to the fact that: (1) bfqg has
+	 * been obtained through a blkg_lookup in the above
+	 * assignment, and (2) bfqd->lock is being held, here we can
+	 * safely use the policy data for the involved blkg (i.e., the
+	 * field bfqg->pd) to get to the blkg associated with bfqg,
+	 * and then we can safely use any field of blkg. After we
+	 * release bfqd->lock, even just getting blkg through this
+	 * bfqg may cause dangling references to be traversed, as
+	 * bfqg->pd may not exist any more.
+	 *
+	 * In view of the above facts, here we cache, in the bfqg, any
+	 * blkg data we may need for this bic, and for its associated
+	 * bfq_queue. As of now, we need to cache only the path of the
+	 * blkg, which is used in the bfq_log_* functions.
+	 *
+	 * Finally, note that bfqg itself needs to be protected from
+	 * destruction on the blkg_free of the original blkg (which
+	 * invokes bfq_pd_free). We use an additional private
+	 * refcounter for bfqg, to let it disappear only after no
+	 * bfq_queue refers to it any longer.
+	 */
+	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
 	bic->blkcg_serial_nr = serial_nr;
 out:
 	rcu_read_unlock();
@@ -640,8 +713,6 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
  * @bfqd: the device data structure with the root group.
  * @bfqg: the group to move from.
  * @st: the service tree with the entities.
- *
- * Needs queue_lock to be taken and reference to be valid over the call.
  */
 static void bfq_reparent_active_entities(struct bfq_data *bfqd,
 					 struct bfq_group *bfqg,
@@ -692,8 +763,7 @@ void bfq_pd_offline(struct blkg_policy_data *pd)
 		/*
 		 * The idle tree may still contain bfq_queues belonging
 		 * to exited task because they never migrated to a different
-		 * cgroup from the one being destroyed now.  No one else
-		 * can access them so it's safe to act without any lock.
+		 * cgroup from the one being destroyed now.
 		 */
 		bfq_flush_idle_tree(st);
 

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 08ce450..12bbc6b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c

@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
 }
 
 static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+		      struct bfq_io_cq *bic, bool bfq_already_existing)
 {
+	unsigned int old_wr_coeff = bfqq->wr_coeff;
+	bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
 	if (bic->saved_idle_window)
 		bfq_mark_bfqq_idle_window(bfqq);
 	else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
 	/* make sure weight will be updated, however we got here */
 	bfqq->entity.prio_changed = 1;
+
+	if (likely(!busy))
+		return;
+
+	if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+		bfqd->wr_busy_queues++;
+	else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+		bfqd->wr_busy_queues--;
 }
 
 static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -3665,7 +3677,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
 
 	kmem_cache_free(bfq_pool, bfqq);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
-	bfqg_put(bfqg);
+	bfqg_and_blkg_put(bfqg);
 #endif
 }
 
@@ -4290,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
 	bfq_put_queue(bfqq);
 }
 
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
 {
-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
-	struct bfq_data *bfqd = bfqq->bfqd;
+	struct bfq_queue *bfqq;
+	struct bfq_data *bfqd;
+
+	if (!rq->elv.icq)
+		return;
+
+	bfqq = RQ_BFQQ(rq);
+	bfqd = bfqq->bfqd;
 
 	if (rq->rq_flags & RQF_STARTED)
 		bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4324,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
 		 */
 
 		if (!RB_EMPTY_NODE(&rq->rb_node))
-			bfq_remove_request(q, rq);
+			bfq_remove_request(rq->q, rq);
 		bfq_put_rq_priv_body(bfqq);
 	}
 
@@ -4394,21 +4412,22 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 /*
  * Allocate bfq data structures associated with this request.
  */
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
-			      struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
-	struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
 	struct bfq_queue *bfqq;
 	bool new_queue = false;
-	bool split = false;
+	bool bfqq_already_existing = false, split = false;
+
+	if (!rq->elv.icq)
+		return;
+	bic = icq_to_bic(rq->elv.icq);
 
 	spin_lock_irq(&bfqd->lock);
 
-	if (!bic)
-		goto queue_fail;
-
 	bfq_check_ioprio_change(bic, bio);
 
 	bfq_bic_update_cgroup(bic, bio);
@@ -4432,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 				bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
 								 true, is_sync,
 								 NULL);
+			else
+				bfqq_already_existing = true;
 		}
 	}
 
@@ -4457,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 			 * queue: restore the idle window and the
 			 * possible weight raising period.
 			 */
-			bfq_bfqq_resume_state(bfqq, bic);
+			bfq_bfqq_resume_state(bfqq, bfqd, bic,
+					      bfqq_already_existing);
 		}
 	}
 
@@ -4465,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
 		bfq_handle_burst(bfqd, bfqq);
 
 	spin_unlock_irq(&bfqd->lock);
-
-	return 0;
-
-queue_fail:
-	spin_unlock_irq(&bfqd->lock);
-
-	return 1;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4950,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
 
 static struct elevator_type iosched_bfq_mq = {
 	.ops.mq = {
-		.get_rq_priv		= bfq_get_rq_private,
-		.put_rq_priv		= bfq_put_rq_private,
+		.prepare_request	= bfq_prepare_request,
+		.finish_request		= bfq_finish_request,
 		.exit_icq		= bfq_exit_icq,
 		.insert_requests	= bfq_insert_requests,
 		.dispatch_request	= bfq_dispatch_request,

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ae783c0..5c3bf98 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h

@@ -759,6 +759,12 @@ struct bfq_group {
 	/* must be the first member */
 	struct blkg_policy_data pd;
 
+	/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
+	char blkg_path[128];
+
+	/* reference counter (see comments in bfq_bic_update_cgroup) */
+	int ref;
+
 	struct bfq_entity entity;
 	struct bfq_sched_data sched_data;
 
@@ -838,7 +844,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
 struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
-void bfqg_put(struct bfq_group *bfqg);
+void bfqg_and_blkg_put(struct bfq_group *bfqg);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 extern struct cftype bfq_blkcg_legacy_files[];
@@ -910,20 +916,13 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
 struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
 
 #define bfq_log_bfqq(bfqd, bfqq, fmt, args...)	do {			\
-	char __pbuf[128];						\
-									\
-	blkg_path(bfqg_to_blkg(bfqq_group(bfqq)), __pbuf, sizeof(__pbuf)); \
-	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid, \
+	blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\
 			bfq_bfqq_sync((bfqq)) ? 'S' : 'A',		\
-			  __pbuf, ##args);				\
+			bfqq_group(bfqq)->blkg_path, ##args);		\
 } while (0)
 
-#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	do {			\
-	char __pbuf[128];						\
-									\
-	blkg_path(bfqg_to_blkg(bfqg), __pbuf, sizeof(__pbuf));		\
-	blk_add_trace_msg((bfqd)->queue, "%s " fmt, __pbuf, ##args);	\
-} while (0)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...)	\
+	blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args)
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED */
 

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5384713..b8a3a65 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c

@@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio)
 	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
 		return false;
 
+	if (!bio_sectors(bio))
+		return false;
+
 	/* Already protected? */
 	if (bio_integrity(bio))
 		return false;
@@ -221,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
  * @bio:	bio to generate/verify integrity metadata for
  * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
 				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -229,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	unsigned int ret = 0;
+	blk_status_t ret = BLK_STS_OK;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
@@ -366,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
@@ -395,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
 	 * integrity metadata.  Restore original bio end_io handler
 	 * and run it.
 	 */
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio->bi_end_io = bip->bip_end_io;
 		bio_endio(bio);
 

diff --git a/block/bio.c b/block/bio.c
index 888e780..1cfcd0d 100644
--- a/block/bio.c
+++ b/block/bio.c

@@ -240,20 +240,21 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
 	return bvl;
 }
 
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
 }
+EXPORT_SYMBOL(bio_uninit);
 
 static void bio_free(struct bio *bio)
 {
 	struct bio_set *bs = bio->bi_pool;
 	void *p;
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	if (bs) {
 		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
 	}
 }
 
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
 void bio_init(struct bio *bio, struct bio_vec *table,
 	      unsigned short max_vecs)
 {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
 {
 	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
 
-	__bio_free(bio);
+	bio_uninit(bio);
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags;
@@ -309,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
 	struct bio *parent = bio->bi_private;
 
-	if (!parent->bi_error)
-		parent->bi_error = bio->bi_error;
+	if (!parent->bi_status)
+		parent->bi_status = bio->bi_status;
 	bio_put(bio);
 	return parent;
 }
@@ -363,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	struct bio_list punt, nopunt;
 	struct bio *bio;
 
+	if (WARN_ON_ONCE(!bs->rescue_workqueue))
+		return;
 	/*
 	 * In order to guarantee forward progress we must punt only bios that
 	 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
 		if (current->bio_list &&
 		    (!bio_list_empty(&current->bio_list[0]) ||
-		     !bio_list_empty(&current->bio_list[1])))
+		     !bio_list_empty(&current->bio_list[1])) &&
+		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -544,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
@@ -593,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_opf = bio_src->bi_opf;
+	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
@@ -676,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
 		return NULL;
 	bio->bi_bdev		= bio_src->bi_bdev;
 	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
@@ -918,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
 {
 	struct submit_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->error = blk_status_to_errno(bio->bi_status);
 	complete(&ret->event);
 }
 
@@ -1817,8 +1828,8 @@ void bio_endio(struct bio *bio)
 	}
 
 	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-					 bio, bio->bi_error);
+		trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+					 blk_status_to_errno(bio->bi_status));
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
 	}
 
@@ -1921,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-static struct bio_set *__bioset_create(unsigned int pool_size,
-				       unsigned int front_pad,
-				       bool create_bvec_pool)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ *              and %BIOSET_NEED_RESCUER
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ *    dispatch queued requests when the mempool runs out of space.
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+			      unsigned int front_pad,
+			      int flags)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1948,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (create_bvec_pool) {
+	if (flags & BIOSET_NEED_BVECS) {
 		bs->bvec_pool = biovec_create_pool(pool_size);
 		if (!bs->bvec_pool)
 			goto bad;
 	}
 
+	if (!(flags & BIOSET_NEED_RESCUER))
+		return bs;
+
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
@@ -1963,41 +1997,8 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
 	bioset_free(bs);
 	return NULL;
 }
-
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, true);
-}
 EXPORT_SYMBOL(bioset_create);
 
-/**
- * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
- * @pool_size:	Number of bio to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Same functionality as bioset_create() except that mempool is not
- *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
-	return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
 #ifdef CONFIG_BLK_CGROUP
 
 /**
@@ -2112,7 +2113,7 @@ static int __init init_bio(void)
 	bio_integrity_init();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7c29471..0480892 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c

@@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg)
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
 	if (blkg->blkcg != &blkcg_root)
-		blk_exit_rl(&blkg->rl);
+		blk_exit_rl(blkg->q, &blkg->rl);
 
 	blkg_rwstat_exit(&blkg->stat_ios);
 	blkg_rwstat_exit(&blkg->stat_bytes);

diff --git a/block/blk-core.c b/block/blk-core.c
index c706852..af393d5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static const struct {
+	int		errno;
+	const char	*name;
+} blk_errors[] = {
+	[BLK_STS_OK]		= { 0,		"" },
+	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
+	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
+	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
+	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
+	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
+	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
+	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
+	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
+	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
+	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
+
+	/* device mapper special case, should not leak out: */
+	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
+
+	/* everything else not covered above: */
+	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+		if (blk_errors[i].errno == errno)
+			return (__force blk_status_t)i;
+	}
+
+	return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+		return -EIO;
+	return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+	int idx = (__force int)status;
+
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+		return;
+
+	printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+			   __func__, blk_errors[idx].name, req->rq_disk ?
+			   req->rq_disk->disk_name : "?",
+			   (unsigned long long)blk_rq_pos(req));
+}
+
 static void req_bio_endio(struct request *rq, struct bio *bio,
-			  unsigned int nbytes, int error)
+			  unsigned int nbytes, blk_status_t error)
 {
 	if (error)
-		bio->bi_error = error;
+		bio->bi_status = error;
 
 	if (unlikely(rq->rq_flags & RQF_QUIET))
 		bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time. Queue lock must be held.
+ *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (likely(!blk_queue_dead(q)))
 		queue_delayed_work(kblockd_workqueue, &q->delay_work,
 				   msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
  **/
 void blk_start_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	blk_run_queue_async(q);
 }
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
- *   entered. Also see blk_stop_queue(). Queue lock must be held.
+ *   entered. Also see blk_stop_queue().
  **/
 void blk_start_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
 	WARN_ON(!irqs_disabled());
+	WARN_ON_ONCE(q->mq_ops);
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
 	__blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
- *   blk_start_queue() to restart queue operations. Queue lock must be held.
+ *   blk_start_queue() to restart queue operations.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 inline void __blk_run_queue_uncond(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  * @q:	The queue to run
  *
  * Description:
- *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled.
+ *    See @blk_run_queue.
  */
 void __blk_run_queue(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us. The caller must hold the queue lock.
+ *    of us.
+ *
+ * Note:
+ *    Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ *    has canceled q->delay_work, callers must hold the queue lock to avoid
+ *    race conditions between blk_cleanup_queue() and blk_run_queue_async().
  */
 void blk_run_queue_async(struct request_queue *q)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
 		mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irqsave(q->queue_lock, flags);
 	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 	int i;
 
 	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
 
 	while (true) {
 		bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
  */
 void blk_queue_bypass_start(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->bypass_depth++;
 	queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
  * @q: queue of interest
  *
  * Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
  */
 void blk_queue_bypass_end(struct request_queue *q)
 {
@@ -648,13 +739,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
 	if (!rl->rq_pool)
 		return -ENOMEM;
 
+	if (rl != &q->root_rl)
+		WARN_ON_ONCE(!blk_get_queue(q));
+
 	return 0;
 }
 
-void blk_exit_rl(struct request_list *rl)
+void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 {
-	if (rl->rq_pool)
+	if (rl->rq_pool) {
 		mempool_destroy(rl->rq_pool);
+		if (rl != &q->root_rl)
+			blk_put_queue(q);
+	}
 }
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
@@ -726,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!q->bio_split)
 		goto fail_id;
 
@@ -872,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 int blk_init_allocated_queue(struct request_queue *q)
 {
+	WARN_ON_ONCE(q->mq_ops);
+
 	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
 	if (!q->fq)
 		return -ENOMEM;
@@ -1009,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 	struct request_list *rl;
 	int on_thresh, off_thresh;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	spin_lock_irq(q->queue_lock);
 	q->nr_requests = nr;
 	blk_queue_congestion_threshold(q);
@@ -1071,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 	int may_queue;
 	req_flags_t rq_flags = RQF_ALLOCED;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely(blk_queue_dying(q)))
 		return ERR_PTR(-ENODEV);
 
@@ -1244,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 	struct request_list *rl;
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
 	rq = __get_request(rl, op, bio, gfp_mask);
 	if (!IS_ERR(rq))
 		return rq;
 
+	if (op & REQ_NOWAIT) {
+		blk_put_rl(rl);
+		return ERR_PTR(-EAGAIN);
+	}
+
 	if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return rq;
@@ -1277,16 +1388,18 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 	goto retry;
 }
 
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
-		gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+					   unsigned int op, gfp_t gfp_mask)
 {
 	struct request *rq;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	/* create ioc upfront */
 	create_io_context(gfp_mask, q->node);
 
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, rw, NULL, gfp_mask);
+	rq = get_request(q, op, NULL, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		return rq;
@@ -1299,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+				gfp_t gfp_mask)
 {
-	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw,
+	struct request *req;
+
+	if (q->mq_ops) {
+		req = blk_mq_alloc_request(q, op,
 			(gfp_mask & __GFP_DIRECT_RECLAIM) ?
 				0 : BLK_MQ_REQ_NOWAIT);
-	else
-		return blk_old_get_request(q, rw, gfp_mask);
+		if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+			q->mq_ops->initialize_rq_fn(req);
+	} else {
+		req = blk_old_get_request(q, op, gfp_mask);
+		if (!IS_ERR(req) && q->initialize_rq_fn)
+			q->initialize_rq_fn(req);
+	}
+
+	return req;
 }
 EXPORT_SYMBOL(blk_get_request);
 
@@ -1322,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -1396,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
 static inline void blk_pm_put_request(struct request *rq) {}
 #endif
 
-/*
- * queue lock must be held
- */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	req_flags_t rq_flags = req->rq_flags;
@@ -1411,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 		return;
 	}
 
+	lockdep_assert_held(q->queue_lock);
+
 	blk_pm_put_request(req);
 
 	elv_completed_request(q, req);
@@ -1640,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
 		req->ioprio = ioc->ioprio;
 	else
 		req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+	req->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1659,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 */
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -1720,7 +1846,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
 	if (IS_ERR(req)) {
 		__wbt_done(q->rq_wb, wb_acct);
-		bio->bi_error = PTR_ERR(req);
+		if (PTR_ERR(req) == -ENOMEM)
+			bio->bi_status = BLK_STS_RESOURCE;
+		else
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		goto out_unlock;
 	}
@@ -1875,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
 {
 	struct request_queue *q;
 	int nr_sectors = bio_sectors(bio);
-	int err = -EIO;
+	blk_status_t status = BLK_STS_IOERR;
 	char b[BDEVNAME_SIZE];
 	struct hd_struct *part;
 
@@ -1894,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
 		goto end_io;
 	}
 
+	/*
+	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+	 * if queue is not a request based queue.
+	 */
+
+	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+		goto not_supported;
+
 	part = bio->bi_bdev->bd_part;
 	if (should_fail_request(part, bio->bi_iter.bi_size) ||
 	    should_fail_request(&part_to_disk(part)->part0,
@@ -1918,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
 	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
 		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
 		if (!nr_sectors) {
-			err = 0;
+			status = BLK_STS_OK;
 			goto end_io;
 		}
 	}
@@ -1970,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
 	return true;
 
 not_supported:
-	err = -EOPNOTSUPP;
+	status = BLK_STS_NOTSUPP;
 end_io:
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	return false;
 }
@@ -2051,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-		if (likely(blk_queue_enter(q, false) == 0)) {
+		if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
@@ -2076,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_merge(&bio_list_on_stack[0], &same);
 			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
-			bio_io_error(bio);
+			if (unlikely(!blk_queue_dying(q) &&
+					(bio->bi_opf & REQ_NOWAIT)))
+				bio_wouldblock_error(bio);
+			else
+				bio_io_error(bio);
 		}
 		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
@@ -2177,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
 	if (blk_cloned_rq_check_limits(q, rq))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (rq->rq_disk &&
 	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
 		blk_mq_sched_insert_request(rq, false, true, false, false);
-		return 0;
+		return BLK_STS_OK;
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		return -ENODEV;
+		return BLK_STS_IOERR;
 	}
 
 	/*
@@ -2216,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
+	return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
@@ -2232,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
  *
  * Return:
  *     The number of bytes to fail.
- *
- * Context:
- *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
@@ -2374,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	while ((rq = __elv_next_request(q)) != NULL) {
 
 		rq = blk_pm_peek_request(q, rq);
@@ -2450,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
-			int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
 			rq->rq_flags |= RQF_QUIET;
 			/*
 			 * Mark this request as started so we don't trigger
 			 * any debug logic in the end I/O path.
 			 */
 			blk_start_request(rq);
-			__blk_end_request_all(rq, err);
+			__blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+					BLK_STS_TARGET : BLK_STS_IOERR);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -2499,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
- *
- * Context:
- *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
+	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(req->q->mq_ops);
+
 	blk_dequeue_request(req);
 
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2529,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
 	struct request *rq;
 
+	lockdep_assert_held(q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	rq = blk_peek_request(q);
 	if (rq)
 		blk_start_request(rq);
@@ -2547,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
@@ -2566,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+		unsigned int nr_bytes)
 {
 	int total_bytes;
 
-	trace_block_rq_complete(req, error, nr_bytes);
+	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
 
 	if (!req->bio)
 		return false;
 
-	if (error && !blk_rq_is_passthrough(req) &&
-	    !(req->rq_flags & RQF_QUIET)) {
-		char *error_type;
-
-		switch (error) {
-		case -ENOLINK:
-			error_type = "recoverable transport";
-			break;
-		case -EREMOTEIO:
-			error_type = "critical target";
-			break;
-		case -EBADE:
-			error_type = "critical nexus";
-			break;
-		case -ETIMEDOUT:
-			error_type = "timeout";
-			break;
-		case -ENOSPC:
-			error_type = "critical space allocation";
-			break;
-		case -ENODATA:
-			error_type = "critical medium";
-			break;
-		case -EIO:
-		default:
-			error_type = "I/O";
-			break;
-		}
-		printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-				   __func__, error_type, req->rq_disk ?
-				   req->rq_disk->disk_name : "?",
-				   (unsigned long long)blk_rq_pos(req));
-
-	}
+	if (unlikely(error && !blk_rq_is_passthrough(req) &&
+		     !(req->rq_flags & RQF_QUIET)))
+		print_req_error(req, error);
 
 	blk_account_io_completion(req, nr_bytes);
 
@@ -2674,7 +2781,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
 				    unsigned int nr_bytes,
 				    unsigned int bidi_bytes)
 {
@@ -2712,13 +2819,13 @@ void blk_unprep_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 
-/*
- * queue lock must be held
- */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
 
+	lockdep_assert_held(req->q->queue_lock);
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (req->rq_flags & RQF_STATS)
 		blk_stat_add(req);
 
@@ -2752,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2766,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
 				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2785,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2797,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
 				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
 
@@ -2811,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2822,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
+	WARN_ON_ONCE(rq->q->mq_ops);
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
@@ -2831,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
  *
  * Description:
  *     Completely finish @rq.
  */
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
@@ -2852,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2862,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+		unsigned int nr_bytes)
 {
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
@@ -2871,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
 {
 	bool pending;
 	unsigned int bidi_bytes = 0;
 
+	lockdep_assert_held(rq->q->queue_lock);
+	WARN_ON_ONCE(rq->q->mq_ops);
+
 	if (unlikely(blk_bidi_rq(rq)))
 		bidi_bytes = blk_rq_bytes(rq->next_rq);
 
@@ -2892,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
@@ -2902,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
 {
 	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
@@ -3145,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 			    bool from_schedule)
 	__releases(q->queue_lock)
 {
+	lockdep_assert_held(q->queue_lock);
+
 	trace_block_unplug(q, depth, !from_schedule);
 
 	if (from_schedule)
@@ -3243,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		 * Short-circuit if @q is dead
 		 */
 		if (unlikely(blk_queue_dying(q))) {
-			__blk_end_request_all(rq, -ENODEV);
+			__blk_end_request_all(rq, BLK_STS_IOERR);
 			continue;
 		}
 

diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3..5c0f3dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c

@@ -16,7 +16,7 @@
  * @rq: request to complete
  * @error: end I/O status of the request
  */
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	struct completion *waiting = rq->end_io_data;
 
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
-		__blk_end_request_all(rq, -ENXIO);
+		__blk_end_request_all(rq, BLK_STS_IOERR);
 		spin_unlock_irq(q->queue_lock);
 		return;
 	}

diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880..ed5fe32 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c

@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  */
 static bool blk_flush_complete_seq(struct request *rq,
 				   struct blk_flush_queue *fq,
-				   unsigned int seq, int error)
+				   unsigned int seq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
 	return kicked | queued;
 }
 
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
 	struct request_queue *q = flush_rq->q;
 	struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	return blk_flush_queue_rq(flush_rq, false);
 }
 
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * Updating q->in_flight[] here for making this tag usable
 	 * early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
 		blk_run_queue_async(q);
 }
 
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
  * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
 	unsigned int policy = blk_flush_policy(fflags, rq);
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/*
 	 * @policy now records what operations need to be done.  Adjust
 	 * REQ_PREFLUSH and FUA for the driver.

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9..feb3057 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c

@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
 	.sysfs_ops	= &integrity_ops,
 };
 
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 {
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_integrity_profile nop_profile = {

diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb86..2547016 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c

@@ -16,6 +16,8 @@
  */
 int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
+	blk_queue_bounce(rq->q, &bio);
+
 	if (!rq->bio) {
 		blk_rq_bio_prep(rq->q, rq, bio);
 	} else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
 		map_data->offset += bio->bi_iter.bi_size;
 
 	orig_bio = bio;
-	blk_queue_bounce(q, &bio);
 
 	/*
 	 * We link the bounce buffer in and could have to traverse it
 	 * later so we have to get a ref to prevent it from being freed
 	 */
-	bio_get(bio);
-
 	ret = blk_rq_append_bio(rq, bio);
+	bio_get(bio);
 	if (ret) {
 		bio_endio(bio);
 		__blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		return ret;
 	}
 
-	blk_queue_bounce(q, &rq->bio);
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae4..99038830 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c

@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	bool do_split = true;
 	struct bio *new = NULL;
 	const unsigned max_sectors = get_max_io_size(q, bio);
-	unsigned bvecs = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
-		 * With arbitrary bio size, the incoming bio may be very
-		 * big. We have to split the bio into small bios so that
-		 * each holds at most BIO_MAX_PAGES bvecs because
-		 * bio_clone() can fail to allocate big bvecs.
-		 *
-		 * It should have been better to apply the limit per
-		 * request queue in which bio_clone() is involved,
-		 * instead of globally. The biggest blocker is the
-		 * bio_clone() in bio bounce.
-		 *
-		 * If bio is splitted by this reason, we should have
-		 * allowed to continue bios merging, but don't do
-		 * that now for making the change simple.
-		 *
-		 * TODO: deal with bio bounce's bio_clone() gracefully
-		 * and convert the global limit into per-queue limit.
-		 */
-		if (bvecs++ >= BIO_MAX_PAGES)
-			goto split;
-
-		/*
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
@@ -202,8 +180,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 	return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-		     struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
 	struct bio *split, *res;
 	unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
 		break;
 	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
 static struct request *attempt_merge(struct request_queue *q,
 				     struct request *req, struct request *next)
 {
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	if (!rq_mergeable(req) || !rq_mergeable(next))
 		return NULL;
 
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
 		return NULL;
 
 	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (req->write_hint != next->write_hint)
+		return NULL;
+
+	/*
 	 * If we are allowed to merge, then append bio list
 	 * from next to rq and release next. merge_requests_fn
 	 * will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	/*
+	 * Don't allow merge of different write hints, or for a hint with
+	 * non-hint IO.
+	 */
+	if (rq->write_hint != bio->bi_write_hint)
+		return false;
+
 	return true;
 }
 

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e86..2cca4fc 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c

@@ -14,10 +14,15 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-			      const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+			      const struct cpumask *online_mask)
 {
-	return cpu * nr_queues / nr_cpus;
+	/*
+	 * Non online CPU will be mapped to queue index 0.
+	 */
+	if (!cpumask_test_cpu(cpu, online_mask))
+		return 0;
+	return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
 	const struct cpumask *online_mask = cpu_online_mask;
-	unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-	cpumask_var_t cpus;
+	unsigned int cpu, first_sibling;
 
-	if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-		return -ENOMEM;
-
-	cpumask_clear(cpus);
-	nr_cpus = nr_uniq_cpus = 0;
-	for_each_cpu(i, online_mask) {
-		nr_cpus++;
-		first_sibling = get_first_sibling(i);
-		if (!cpumask_test_cpu(first_sibling, cpus))
-			nr_uniq_cpus++;
-		cpumask_set_cpu(i, cpus);
+	for_each_possible_cpu(cpu) {
+		/*
+		 * First do sequential mapping between CPUs and queues.
+		 * In case we still have CPUs to map, and we have some number of
+		 * threads per cores then map sibling threads to the same queue for
+		 * performace optimizations.
+		 */
+		if (cpu < nr_queues) {
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+		} else {
+			first_sibling = get_first_sibling(cpu);
+			if (first_sibling == cpu)
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			else
+				map[cpu] = map[first_sibling];
+		}
 	}
 
-	queue = 0;
-	for_each_possible_cpu(i) {
-		if (!cpumask_test_cpu(i, online_mask)) {
-			map[i] = 0;
-			continue;
-		}
-
-		/*
-		 * Easy case - we have equal or more hardware queues. Or
-		 * there are no thread siblings to take into account. Do
-		 * 1:1 if enough, or sequential mapping if less.
-		 */
-		if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-			map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-			queue++;
-			continue;
-		}
-
-		/*
-		 * Less then nr_cpus queues, and we have some number of
-		 * threads per cores. Map sibling threads to the same
-		 * queue.
-		 */
-		first_sibling = get_first_sibling(i);
-		if (first_sibling == i) {
-			map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-							queue);
-			queue++;
-		} else
-			map[i] = map[first_sibling];
-	}
-
-	free_cpumask_var(cpus);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4..9ebc294 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c

@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
 		blk_mq_run_hw_queues(q, true);
 	} else if (strcmp(op, "start") == 0) {
 		blk_mq_start_stopped_hw_queues(q, true);
+	} else if (strcmp(op, "kick") == 0) {
+		blk_mq_kick_requeue_list(q);
 	} else {
 		pr_err("%s: unsupported operation '%s'\n", __func__, op);
 inval:
-		pr_err("%s: use either 'run' or 'start'\n", __func__);
+		pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
 		return -EINVAL;
 	}
 	return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 	}
 }
 
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+	return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct request_queue *q = data;
+	int i;
+
+	for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+		q->write_hints[i] = 0;
+
+	return count;
+}
+
 static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+	RQAF_NAME(COMPLETE),
+	RQAF_NAME(STARTED),
+	RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
+	seq_puts(m, ", .atomic_flags=");
+	blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
 	seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
 		   rq->internal_tag);
 	if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+	__acquires(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_lock_irq(&q->requeue_lock);
+	return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+
+	return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+	__releases(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+	.start	= queue_requeue_list_start,
+	.next	= queue_requeue_list_next,
+	.stop	= queue_requeue_list_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
 {
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
 	.show	= blk_mq_debugfs_rq_show,
 };
 
+struct show_busy_params {
+	struct seq_file		*m;
+	struct blk_mq_hw_ctx	*hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+	const struct show_busy_params *params = data;
+
+	if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+	    test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		__blk_mq_debugfs_rq_show(params->m,
+					 list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct show_busy_params params = { .m = m, .hctx = hctx };
+
+	blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+				&params);
+
+	return 0;
+}
+
 static int hctx_ctx_map_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{"poll_stat", 0400, queue_poll_stat_show},
+	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
 	{"state", 0600, queue_state_show, queue_state_write},
+	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
 	{},
 };
 
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"state", 0400, hctx_state_show},
 	{"flags", 0400, hctx_flags_show},
 	{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+	{"busy", 0400, hctx_busy_show},
 	{"ctx_map", 0400, hctx_ctx_map_show},
 	{"tags", 0400, hctx_tags_show},
 	{"tags_bitmap", 0400, hctx_tags_bitmap_show},

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692..7f0dc48 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c

@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
 
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
-				      struct request *rq,
-				      struct bio *bio,
-				      struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
+	struct io_context *ioc = rq_ioc(bio);
 	struct io_cq *icq;
 
 	spin_lock_irq(q->queue_lock);
@@ -47,90 +46,47 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
 		if (!icq)
 			return;
 	}
-
+	get_io_context(icq->ioc);
 	rq->elv.icq = icq;
-	if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
-		rq->rq_flags |= RQF_ELVPRIV;
-		get_io_context(icq->ioc);
+}
+
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		return;
-	}
 
-	rq->elv.icq = NULL;
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
+
+		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_inc(&q->shared_hctx_restart);
+	} else
+		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
-				    struct request *rq, struct bio *bio)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-	struct io_context *ioc;
+	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+		return false;
 
-	ioc = rq_ioc(bio);
-	if (ioc)
-		__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
-}
+	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+		struct request_queue *q = hctx->queue;
 
-struct request *blk_mq_sched_get_request(struct request_queue *q,
-					 struct bio *bio,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
-{
-	struct elevator_queue *e = q->elevator;
-	struct request *rq;
+		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+			atomic_dec(&q->shared_hctx_restart);
+	} else
+		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-	blk_queue_enter_live(q);
-	data->q = q;
-	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
-	if (likely(!data->hctx))
-		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
-	if (e) {
-		data->flags |= BLK_MQ_REQ_INTERNAL;
-
-		/*
-		 * Flush requests are special and go directly to the
-		 * dispatch list.
-		 */
-		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-			rq = e->type->ops.mq.get_request(q, op, data);
-			if (rq)
-				rq->rq_flags |= RQF_QUEUED;
-		} else
-			rq = __blk_mq_alloc_request(data, op);
-	} else {
-		rq = __blk_mq_alloc_request(data, op);
+	if (blk_mq_hctx_has_pending(hctx)) {
+		blk_mq_run_hw_queue(hctx, true);
+		return true;
 	}
 
-	if (rq) {
-		if (!op_is_flush(op)) {
-			rq->elv.icq = NULL;
-			if (e && e->type->icq_cache)
-				blk_mq_sched_assign_ioc(q, rq, bio);
-		}
-		data->hctx->queued++;
-		return rq;
-	}
-
-	blk_queue_exit(q);
-	return NULL;
-}
-
-void blk_mq_sched_put_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		blk_mq_sched_put_rq_priv(rq->q, rq);
-		if (rq->elv.icq) {
-			put_io_context(rq->elv.icq->ioc);
-			rq->elv.icq = NULL;
-		}
-	}
-
-	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-		e->type->ops.mq.put_request(rq);
-	else
-		blk_mq_finish_request(rq);
+	return false;
 }
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
@@ -141,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	bool did_work = false;
 	LIST_HEAD(rq_list);
 
-	if (unlikely(blk_mq_hctx_stopped(hctx)))
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
 	hctx->run++;
@@ -221,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	struct request *rq;
+	int checked = 8;
+
+	lockdep_assert_held(&ctx->lock);
+
+	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+		bool merged = false;
+
+		if (!checked--)
+			break;
+
+		if (!blk_rq_merge_ok(rq, bio))
+			continue;
+
+		switch (blk_try_merge(rq, bio)) {
+		case ELEVATOR_BACK_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_back_merge(q, rq, bio);
+			break;
+		case ELEVATOR_FRONT_MERGE:
+			if (blk_mq_sched_allow_merge(q, rq, bio))
+				merged = bio_attempt_front_merge(q, rq, bio);
+			break;
+		case ELEVATOR_DISCARD_MERGE:
+			merged = bio_attempt_discard_merge(q, rq, bio);
+			break;
+		default:
+			continue;
+		}
+
+		if (merged)
+			ctx->rq_merged++;
+		return merged;
+	}
+
+	return false;
+}
+
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
 	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	bool ret = false;
 
-	if (e->type->ops.mq.bio_merge) {
-		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+	if (e && e->type->ops.mq.bio_merge) {
 		blk_mq_put_ctx(ctx);
 		return e->type->ops.mq.bio_merge(hctx, bio);
 	}
 
-	return false;
+	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+		/* default per sw-queue merge */
+		spin_lock(&ctx->lock);
+		ret = blk_mq_attempt_merge(q, ctx, bio);
+		spin_unlock(&ctx->lock);
+	}
+
+	blk_mq_put_ctx(ctx);
+	return ret;
 }
 
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
@@ -266,18 +277,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (blk_mq_hctx_has_pending(hctx)) {
-			blk_mq_run_hw_queue(hctx, true);
-			return true;
-		}
-	}
-	return false;
-}
-
 /**
  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
  * @pos:    loop cursor.
@@ -309,6 +308,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
 	unsigned int i, j;
 
 	if (set->flags & BLK_MQ_F_TAG_SHARED) {
+		/*
+		 * If this is 0, then we know that no hardware queues
+		 * have RESTART marked. We're done.
+		 */
+		if (!atomic_read(&queue->shared_hctx_restart))
+			return;
+
 		rcu_read_lock();
 		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
 					   tag_set_list) {

diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb53..9267d0b7 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h

@@ -7,8 +7,7 @@
 void blk_mq_sched_free_hctx_data(struct request_queue *q,
 				 void (*exit)(struct blk_mq_hw_ctx *));
 
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
 
 void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
-
-	if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
 		return false;
 
 	return __blk_mq_sched_bio_merge(q, bio);
 }
 
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
-					   struct request *rq,
-					   struct bio *bio)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.get_rq_priv)
-		return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
-	return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
-					    struct request *rq)
-{
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.mq.put_rq_priv)
-		e->type->ops.mq.put_rq_priv(q, rq);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
@@ -115,15 +91,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }
 
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a69ad12..ced2b00 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c

@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
 
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
 /**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
  *
  * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
  */
 void blk_mq_quiesce_queue(struct request_queue *q)
 {
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
-	__blk_mq_stop_hw_queues(q, true);
+	blk_mq_quiesce_queue_nowait(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
-			synchronize_srcu(&hctx->queue_rq_srcu);
+			synchronize_srcu(hctx->queue_rq_srcu);
 		else
 			rcu = true;
 	}
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	/* dispatch requests which are inserted during quiescing */
+	blk_mq_run_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+		unsigned int tag, unsigned int op)
 {
+	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	struct request *rq = tags->static_rqs[tag];
+
+	rq->rq_flags = 0;
+
+	if (data->flags & BLK_MQ_REQ_INTERNAL) {
+		rq->tag = -1;
+		rq->internal_tag = tag;
+	} else {
+		if (blk_mq_tag_busy(data->hctx)) {
+			rq->rq_flags = RQF_MQ_INFLIGHT;
+			atomic_inc(&data->hctx->nr_active);
+		}
+		rq->tag = tag;
+		rq->internal_tag = -1;
+		data->hctx->tags->rqs[rq->tag] = rq;
+	}
+
 	INIT_LIST_HEAD(&rq->queuelist);
 	/* csd/requeue_work/fifo_time is initialized before use */
-	rq->q = q;
-	rq->mq_ctx = ctx;
+	rq->q = data->q;
+	rq->mq_ctx = data->ctx;
 	rq->cmd_flags = op;
-	if (blk_queue_io_stat(q))
+	if (blk_queue_io_stat(data->q))
 		rq->rq_flags |= RQF_IO_STAT;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
 
-	ctx->rq_dispatched[op_is_sync(op)]++;
+	data->ctx->rq_dispatched[op_is_sync(op)]++;
+	return rq;
 }
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-				       unsigned int op)
+static struct request *blk_mq_get_request(struct request_queue *q,
+		struct bio *bio, unsigned int op,
+		struct blk_mq_alloc_data *data)
 {
+	struct elevator_queue *e = q->elevator;
 	struct request *rq;
 	unsigned int tag;
 
-	tag = blk_mq_get_tag(data);
-	if (tag != BLK_MQ_TAG_FAIL) {
-		struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+	blk_queue_enter_live(q);
+	data->q = q;
+	if (likely(!data->ctx))
+		data->ctx = blk_mq_get_ctx(q);
+	if (likely(!data->hctx))
+		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+	if (op & REQ_NOWAIT)
+		data->flags |= BLK_MQ_REQ_NOWAIT;
 
-		rq = tags->static_rqs[tag];
+	if (e) {
+		data->flags |= BLK_MQ_REQ_INTERNAL;
 
-		if (data->flags & BLK_MQ_REQ_INTERNAL) {
-			rq->tag = -1;
-			rq->internal_tag = tag;
-		} else {
-			if (blk_mq_tag_busy(data->hctx)) {
-				rq->rq_flags = RQF_MQ_INFLIGHT;
-				atomic_inc(&data->hctx->nr_active);
-			}
-			rq->tag = tag;
-			rq->internal_tag = -1;
-			data->hctx->tags->rqs[rq->tag] = rq;
-		}
-
-		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
-		return rq;
+		/*
+		 * Flush requests are special and go directly to the
+		 * dispatch list.
+		 */
+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+			e->type->ops.mq.limit_depth(op, data);
 	}
 
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
+	tag = blk_mq_get_tag(data);
+	if (tag == BLK_MQ_TAG_FAIL) {
+		blk_queue_exit(q);
+		return NULL;
+	}
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+	rq = blk_mq_rq_ctx_init(data, tag, op);
+	if (!op_is_flush(op)) {
+		rq->elv.icq = NULL;
+		if (e && e->type->ops.mq.prepare_request) {
+			if (e->type->icq_cache && rq_ioc(bio))
+				blk_mq_sched_assign_ioc(rq, bio);
+
+			e->type->ops.mq.prepare_request(rq, bio);
+			rq->rq_flags |= RQF_ELVPRIV;
+		}
+	}
+	data->hctx->queued++;
+	return rq;
+}
+
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 	if (ret)
 		return ERR_PTR(ret);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
-		unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx)
 {
 	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	cpu = cpumask_first(alloc_data.hctx->cpumask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
 	blk_queue_exit(q);
 
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-			     struct request *rq)
+void blk_mq_free_request(struct request *rq)
 {
-	const int sched_tag = rq->internal_tag;
 	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	const int sched_tag = rq->internal_tag;
 
+	if (rq->rq_flags & RQF_ELVPRIV) {
+		if (e && e->type->ops.mq.finish_request)
+			e->type->ops.mq.finish_request(rq);
+		if (rq->elv.icq) {
+			put_io_context(rq->elv.icq->ioc);
+			rq->elv.icq = NULL;
+		}
+	}
+
+	ctx->rq_completed[rq_is_sync(rq)]++;
 	if (rq->rq_flags & RQF_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
 
 	wbt_done(q->rq_wb, &rq->issue_stat);
-	rq->rq_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
 }
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
-				     struct request *rq)
-{
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-
-	ctx->rq_completed[rq_is_sync(rq)]++;
-	__blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
-	blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
-	blk_mq_sched_put_request(rq);
-}
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	blk_account_io_done(rq);
 
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
@@ -628,25 +687,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
@@ -772,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	blk_queue_exit(q);
 }
 
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
-				 struct blk_mq_ctx *ctx, struct bio *bio)
-{
-	struct request *rq;
-	int checked = 8;
-
-	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
-		bool merged = false;
-
-		if (!checked--)
-			break;
-
-		if (!blk_rq_merge_ok(rq, bio))
-			continue;
-
-		switch (blk_try_merge(rq, bio)) {
-		case ELEVATOR_BACK_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_back_merge(q, rq, bio);
-			break;
-		case ELEVATOR_FRONT_MERGE:
-			if (blk_mq_sched_allow_merge(q, rq, bio))
-				merged = bio_attempt_front_merge(q, rq, bio);
-			break;
-		case ELEVATOR_DISCARD_MERGE:
-			merged = bio_attempt_discard_merge(q, rq, bio);
-			break;
-		default:
-			continue;
-		}
-
-		if (merged)
-			ctx->rq_merged++;
-		return merged;
-	}
-
-	return false;
-}
-
 struct flush_busy_ctx_data {
 	struct blk_mq_hw_ctx *hctx;
 	struct list_head *list;
@@ -945,14 +941,14 @@ static bool reorder_tags_to_front(struct list_head *list)
 	return first != NULL;
 }
 
-static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 				void *key)
 {
 	struct blk_mq_hw_ctx *hctx;
 
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 	clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
@@ -987,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
 	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
-	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+	int errors, queued;
 
 	if (list_empty(list))
 		return false;
@@ -998,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 	errors = queued = 0;
 	do {
 		struct blk_mq_queue_data bd;
+		blk_status_t ret;
 
 		rq = list_first_entry(list, struct request, queuelist);
 		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1038,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
-		switch (ret) {
-		case BLK_MQ_RQ_QUEUE_OK:
-			queued++;
-			break;
-		case BLK_MQ_RQ_QUEUE_BUSY:
+		if (ret == BLK_STS_RESOURCE) {
 			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
-		default:
-			pr_err("blk-mq: bad return on queue: %d\n", ret);
-		case BLK_MQ_RQ_QUEUE_ERROR:
-			errors++;
-			blk_mq_end_request(rq, -EIO);
-			break;
 		}
 
-		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
-			break;
+		if (unlikely(ret != BLK_STS_OK)) {
+			errors++;
+			blk_mq_end_request(rq, BLK_STS_IOERR);
+			continue;
+		}
+
+		queued++;
 	} while (!list_empty(list));
 
 	hctx->dispatched[queued_to_index(queued)]++;
@@ -1094,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 		 * - blk_mq_run_hw_queue() checks whether or not a queue has
 		 *   been stopped before rerunning a queue.
 		 * - Some but not all block drivers stop a queue before
-		 *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
 		 *   and dm-rq.
 		 */
 		if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1119,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	} else {
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
 		blk_mq_sched_dispatch_requests(hctx);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1153,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 					unsigned long msecs)
 {
-	if (unlikely(blk_mq_hctx_stopped(hctx) ||
-		     !blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+		return;
+
+	if (unlikely(blk_mq_hctx_stopped(hctx)))
 		return;
 
 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1220,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_queue_stopped);
 
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-	if (sync)
-		cancel_delayed_work_sync(&hctx->run_work);
-	else
-		cancel_delayed_work(&hctx->run_work);
+	cancel_delayed_work(&hctx->run_work);
 
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
-
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
-	__blk_mq_stop_hw_queue(hctx, false);
-}
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queues(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
-		__blk_mq_stop_hw_queue(hctx, sync);
-}
-
-void blk_mq_stop_hw_queues(struct request_queue *q)
-{
-	__blk_mq_stop_hw_queues(q, false);
+		blk_mq_stop_hw_queue(hctx);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 
@@ -1314,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
-	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
 		return;
 
 	/*
@@ -1336,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
@@ -1349,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	lockdep_assert_held(&ctx->lock);
+
 	__blk_mq_insert_req_list(hctx, rq, at_head);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
@@ -1446,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
 		!blk_queue_nomerges(hctx->queue);
 }
 
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-					 struct blk_mq_ctx *ctx,
-					 struct request *rq, struct bio *bio)
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+				   struct blk_mq_ctx *ctx,
+				   struct request *rq)
 {
-	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
-		blk_mq_bio_to_request(rq, bio);
-		spin_lock(&ctx->lock);
-insert_rq:
-		__blk_mq_insert_request(hctx, rq, false);
-		spin_unlock(&ctx->lock);
-		return false;
-	} else {
-		struct request_queue *q = hctx->queue;
-
-		spin_lock(&ctx->lock);
-		if (!blk_mq_attempt_merge(q, ctx, bio)) {
-			blk_mq_bio_to_request(rq, bio);
-			goto insert_rq;
-		}
-
-		spin_unlock(&ctx->lock);
-		__blk_mq_finish_request(hctx, ctx, rq);
-		return true;
-	}
+	spin_lock(&ctx->lock);
+	__blk_mq_insert_request(hctx, rq, false);
+	spin_unlock(&ctx->lock);
 }
 
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1480,22 +1466,29 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
-				      bool may_sleep)
+static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+					struct request *rq,
+					blk_qc_t *cookie, bool may_sleep)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
 		.rq = rq,
 		.last = true,
 	};
-	struct blk_mq_hw_ctx *hctx;
 	blk_qc_t new_cookie;
-	int ret;
+	blk_status_t ret;
+	bool run_queue = true;
+
+	/* RCU or SRCU read lock is needed before checking quiesced flag */
+	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
+		run_queue = false;
+		goto insert;
+	}
 
 	if (q->elevator)
 		goto insert;
 
-	if (!blk_mq_get_driver_tag(rq, &hctx, false))
+	if (!blk_mq_get_driver_tag(rq, NULL, false))
 		goto insert;
 
 	new_cookie = request_to_qc_t(hctx, rq);
@@ -1506,20 +1499,21 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
 	 * would have done
 	 */
 	ret = q->mq_ops->queue_rq(hctx, &bd);
-	if (ret == BLK_MQ_RQ_QUEUE_OK) {
+	switch (ret) {
+	case BLK_STS_OK:
 		*cookie = new_cookie;
 		return;
-	}
-
-	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+	case BLK_STS_RESOURCE:
+		__blk_mq_requeue_request(rq);
+		goto insert;
+	default:
 		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, -EIO);
+		blk_mq_end_request(rq, ret);
 		return;
 	}
 
-	__blk_mq_requeue_request(rq);
 insert:
-	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
+	blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
 }
 
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
@@ -1527,16 +1521,16 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 {
 	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
 		rcu_read_lock();
-		__blk_mq_try_issue_directly(rq, cookie, false);
+		__blk_mq_try_issue_directly(hctx, rq, cookie, false);
 		rcu_read_unlock();
 	} else {
 		unsigned int srcu_idx;
 
 		might_sleep();
 
-		srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
-		__blk_mq_try_issue_directly(rq, cookie, true);
-		srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
+		__blk_mq_try_issue_directly(hctx, rq, cookie, true);
+		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
 	}
 }
 
@@ -1554,7 +1548,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
@@ -1572,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
-	rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
 		__wbt_done(q->rq_wb, wb_acct);
+		if (bio->bi_opf & REQ_NOWAIT)
+			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
@@ -1638,9 +1634,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 		blk_mq_put_ctx(data.ctx);
 
-		if (same_queue_rq)
+		if (same_queue_rq) {
+			data.hctx = blk_mq_map_queue(q,
+					same_queue_rq->mq_ctx->cpu);
 			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
 					&cookie);
+		}
 	} else if (q->nr_hw_queues > 1 && is_sync) {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
@@ -1649,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true, true, true);
-	} else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+	} else {
 		blk_mq_put_ctx(data.ctx);
+		blk_mq_bio_to_request(rq, bio);
+		blk_mq_queue_io(data.hctx, data.ctx, rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else
-		blk_mq_put_ctx(data.ctx);
+	}
 
 	return cookie;
 }
@@ -1876,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		cleanup_srcu_struct(&hctx->queue_rq_srcu);
+		cleanup_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_remove_cpuhp(hctx);
 	blk_free_flush_queue(hctx->fq);
@@ -1910,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
 	hctx->queue = q;
-	hctx->queue_num = hctx_idx;
 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1949,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		goto free_fq;
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		init_srcu_struct(&hctx->queue_rq_srcu);
+		init_srcu_struct(hctx->queue_rq_srcu);
 
 	blk_mq_debugfs_register_hctx(q, hctx);
 
@@ -2113,20 +2112,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared)
+		if (shared) {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_inc(&q->shared_hctx_restart);
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		else
+		} else {
+			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+				atomic_dec(&q->shared_hctx_restart);
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+		}
 	}
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+					bool shared)
 {
 	struct request_queue *q;
 
@@ -2224,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+			   __alignof__(struct blk_mq_hw_ctx)) !=
+		     sizeof(struct blk_mq_hw_ctx));
+
+	if (tag_set->flags & BLK_MQ_F_BLOCKING)
+		hw_ctx_size += sizeof(struct srcu_struct);
+
+	return hw_ctx_size;
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 						struct request_queue *q)
 {
@@ -2238,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			continue;
 
 		node = blk_mq_hw_queue_to_node(q->mq_map, i);
-		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+		hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
 					GFP_KERNEL, node);
 		if (!hctxs[i])
 			break;
@@ -2660,7 +2683,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return ret;
 }
 
-void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
+							int nr_hw_queues)
 {
 	struct request_queue *q;
 
@@ -2684,6 +2708,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue(q);
 }
+
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+	mutex_lock(&set->tag_list_lock);
+	__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
+	mutex_unlock(&set->tag_list_lock);
+}
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
 /* Enable polling stats and return whether they were already enabled. */

diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48..1a06fdf 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h

@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
 	return data->hctx->tags;
 }
 
-/*
- * Internal helpers for request allocation/init/free
- */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-			struct request *rq, unsigned int op);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-				struct request *rq);
-void blk_mq_finish_request(struct request *rq);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-					unsigned int op);
-
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_STOPPED, &hctx->state);

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed3..be1f115 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->nr_batching = BLK_BATCH_REQ;
 
 	blk_set_default_limits(&q->limits);
-
-	/*
-	 * by default assume old behaviour and bounce for any highmem page
-	 */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 504fee9..27aceab 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c

@@ -777,24 +777,25 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 }
 
 /**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging to the request queue to be released
+ * __blk_release_queue - release a request queue when it is no longer needed
+ * @work: pointer to the release_work member of the request queue to be released
  *
  * Description:
- *     blk_release_queue is the pair to blk_init_queue() or
- *     blk_queue_make_request().  It should be called when a request queue is
- *     being released; typically when a block device is being de-registered.
- *     Currently, its primary task it to free all the &struct request
- *     structures that were allocated to the queue and the queue itself.
+ *     blk_release_queue is the counterpart of blk_init_queue(). It should be
+ *     called when a request queue is being released; typically when a block
+ *     device is being de-registered. Its primary task it to free the queue
+ *     itself.
  *
- * Note:
+ * Notes:
  *     The low level driver must have finished any outstanding requests first
  *     via blk_cleanup_queue().
- **/
-static void blk_release_queue(struct kobject *kobj)
+ *
+ *     Although blk_release_queue() may be called with preemption disabled,
+ *     __blk_release_queue() may sleep.
+ */
+static void __blk_release_queue(struct work_struct *work)
 {
-	struct request_queue *q =
-		container_of(kobj, struct request_queue, kobj);
+	struct request_queue *q = container_of(work, typeof(*q), release_work);
 
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
@@ -809,7 +810,7 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_free_queue_stats(q->stats);
 
-	blk_exit_rl(&q->root_rl);
+	blk_exit_rl(q, &q->root_rl);
 
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
@@ -834,6 +835,15 @@ static void blk_release_queue(struct kobject *kobj)
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
+static void blk_release_queue(struct kobject *kobj)
+{
+	struct request_queue *q =
+		container_of(kobj, struct request_queue, kobj);
+
+	INIT_WORK(&q->release_work, __blk_release_queue);
+	schedule_work(&q->release_work);
+}
+
 static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
@@ -887,10 +897,10 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		__blk_mq_register_dev(dev, q);
-
-	blk_mq_debugfs_register(q);
+		blk_mq_debugfs_register(q);
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 

diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329..2290f65 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c

@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	unsigned tag = rq->tag; /* negative tags invalid */
 
+	lockdep_assert_held(q->queue_lock);
+
 	BUG_ON(tag >= bqt->real_max_depth);
 
 	list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
  *    calling this function.  The request will also be removed from
  *    the request queue, so it's the drivers responsibility to readd
  *    it if it should need to be restarted for some reason.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	unsigned max_depth;
 	int tag;
 
+	lockdep_assert_held(q->queue_lock);
+
 	if (unlikely((rq->rq_flags & RQF_QUEUED))) {
 		printk(KERN_ERR
 		       "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
  *   Hardware conditions may dictate a need to stop all pending requests.
  *   In this case, we will safely clear the block side of the tag queue and
  *   readd all requests to the request queue in the right order.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_invalidate_tags(struct request_queue *q)
 {
 	struct list_head *tmp, *n;
 
+	lockdep_assert_held(q->queue_lock);
+
 	list_for_each_safe(tmp, n, &q->tag_busy_list)
 		blk_requeue_request(q, list_entry_rq(tmp));
 }

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b78db2e..a7285bf 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c

@@ -22,11 +22,18 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
+#define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
+#define LATENCY_FILTERED_SSD (0)
+/*
+ * For HD, very small latency comes from sequential IO. Such IO is helpless to
+ * help determine if its IO is impacted by others, hence we ignore the IO
+ */
+#define LATENCY_FILTERED_HD (1000L) /* 1ms */
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -157,6 +164,7 @@ struct throtl_grp {
 	unsigned long last_check_time;
 
 	unsigned long latency_target; /* us */
+	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -165,6 +173,7 @@ struct throtl_grp {
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
+	unsigned long idletime_threshold_conf; /* us */
 
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -201,8 +210,6 @@ struct throtl_data
 	unsigned int limit_index;
 	bool limit_valid[LIMIT_CNT];
 
-	unsigned long dft_idletime_threshold; /* us */
-
 	unsigned long low_upgrade_time;
 	unsigned long low_downgrade_time;
 
@@ -212,6 +219,7 @@ struct throtl_data
 	struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE];
 	struct latency_bucket __percpu *latency_buckets;
 	unsigned long last_calculate_time;
+	unsigned long filtered_latency;
 
 	bool track_bio_latency;
 };
@@ -294,8 +302,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
 	td = tg->td;
 	ret = tg->bps[rw][td->limit_index];
-	if (ret == 0 && td->limit_index == LIMIT_LOW)
-		return tg->bps[rw][LIMIT_MAX];
+	if (ret == 0 && td->limit_index == LIMIT_LOW) {
+		/* intermediate node or iops isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->iops[rw][td->limit_index])
+			return U64_MAX;
+		else
+			return MIN_THROTL_BPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -315,10 +329,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
 		return UINT_MAX;
+
 	td = tg->td;
 	ret = tg->iops[rw][td->limit_index];
-	if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-		return tg->iops[rw][LIMIT_MAX];
+	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+		/* intermediate node or bps isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->bps[rw][td->limit_index])
+			return UINT_MAX;
+		else
+			return MIN_THROTL_IOPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -482,6 +503,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
+	tg->latency_target_conf = DFL_LATENCY_TARGET;
+	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
 }
@@ -510,8 +534,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
-
-	tg->idletime_threshold = td->dft_idletime_threshold;
 }
 
 /*
@@ -684,7 +706,7 @@ static void throtl_dequeue_tg(struct throtl_grp *tg)
 static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
 					  unsigned long expires)
 {
-	unsigned long max_expire = jiffies + 8 * sq_to_tg(sq)->td->throtl_slice;
+	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
 
 	/*
 	 * Since we are adjusting the throttle limit dynamically, the sleep
@@ -1349,7 +1371,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1367,8 +1389,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-		tg_update_has_rules(blkg_to_tg(blkg));
+	blkg_for_each_descendant_pre(blkg, pos_css,
+			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+		struct throtl_grp *this_tg = blkg_to_tg(blkg);
+		struct throtl_grp *parent_tg;
+
+		tg_update_has_rules(this_tg);
+		/* ignore root/second level */
+		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+		    !blkg->parent->parent)
+			continue;
+		parent_tg = blkg_to_tg(blkg->parent);
+		/*
+		 * make sure all children has lower idle time threshold and
+		 * higher latency target
+		 */
+		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+				parent_tg->idletime_threshold);
+		this_tg->latency_target = max(this_tg->latency_target,
+				parent_tg->latency_target);
+	}
 
 	/*
 	 * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1413,7 +1453,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1497,34 +1537,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-	      tg->latency_target == DFL_LATENCY_TARGET)))
+	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
+	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
-	if (tg->bps_conf[READ][off] != bps_dft)
+	if (tg->bps_conf[READ][off] != U64_MAX)
 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
 			tg->bps_conf[READ][off]);
-	if (tg->bps_conf[WRITE][off] != bps_dft)
+	if (tg->bps_conf[WRITE][off] != U64_MAX)
 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
 			tg->bps_conf[WRITE][off]);
-	if (tg->iops_conf[READ][off] != iops_dft)
+	if (tg->iops_conf[READ][off] != UINT_MAX)
 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
 			tg->iops_conf[READ][off]);
-	if (tg->iops_conf[WRITE][off] != iops_dft)
+	if (tg->iops_conf[WRITE][off] != UINT_MAX)
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
-		if (tg->idletime_threshold == ULONG_MAX)
+		if (tg->idletime_threshold_conf == ULONG_MAX)
 			strcpy(idle_time, " idle=max");
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-				tg->idletime_threshold);
+				tg->idletime_threshold_conf);
 
-		if (tg->latency_target == ULONG_MAX)
+		if (tg->latency_target_conf == ULONG_MAX)
 			strcpy(latency_time, " latency=max");
 		else
 			snprintf(latency_time, sizeof(latency_time),
-				" latency=%lu", tg->latency_target);
+				" latency=%lu", tg->latency_target_conf);
 	}
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1603,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
 
-	idle_time = tg->idletime_threshold;
-	latency_time = tg->latency_target;
+	idle_time = tg->idletime_threshold_conf;
+	latency_time = tg->latency_target_conf;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1623,17 +1663,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->iops_conf[READ][LIMIT_MAX]);
 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
 		tg->iops_conf[WRITE][LIMIT_MAX]);
+	tg->idletime_threshold_conf = idle_time;
+	tg->latency_target_conf = latency_time;
 
-	if (index == LIMIT_LOW) {
-		blk_throtl_update_limit_valid(tg->td);
-		if (tg->td->limit_valid[LIMIT_LOW])
-			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-			ULONG_MAX : idle_time;
-		tg->latency_target = (latency_time == ULONG_MAX) ?
-			ULONG_MAX : latency_time;
+	/* force user to configure all settings for low limit  */
+	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
+		tg->bps[READ][LIMIT_LOW] = 0;
+		tg->bps[WRITE][LIMIT_LOW] = 0;
+		tg->iops[READ][LIMIT_LOW] = 0;
+		tg->iops[WRITE][LIMIT_LOW] = 0;
+		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+		tg->latency_target = DFL_LATENCY_TARGET;
+	} else if (index == LIMIT_LOW) {
+		tg->idletime_threshold = tg->idletime_threshold_conf;
+		tg->latency_target = tg->latency_target_conf;
 	}
-	tg_conf_updated(tg);
+
+	blk_throtl_update_limit_valid(tg->td);
+	if (tg->td->limit_valid[LIMIT_LOW]) {
+		if (index == LIMIT_LOW)
+			tg->td->limit_index = LIMIT_LOW;
+	} else
+		tg->td->limit_index = LIMIT_MAX;
+	tg_conf_updated(tg, index == LIMIT_LOW &&
+		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1722,17 +1778,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	/*
 	 * cgroup is idle if:
 	 * - single idle is too long, longer than a fixed value (in case user
-	 *   configure a too big threshold) or 4 times of slice
+	 *   configure a too big threshold) or 4 times of idletime threshold
 	 * - average think time is more than threshold
 	 * - IO latency is largely below threshold
 	 */
-	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
+	unsigned long time;
+	bool ret;
 
-	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-	       tg->avg_idletime > tg->idletime_threshold ||
-	       (tg->latency_target && tg->bio_cnt &&
+	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+	ret = tg->latency_target == DFL_LATENCY_TARGET ||
+	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	      tg->avg_idletime > tg->idletime_threshold ||
+	      (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
+	throtl_log(&tg->service_queue,
+		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+		tg->bio_cnt, ret, tg->td->scale);
+	return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1828,6 +1892,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	throtl_log(&td->service_queue, "upgrade to max");
 	td->limit_index = LIMIT_MAX;
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
@@ -1850,6 +1915,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
 	td->scale /= 2;
 
+	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
 	if (td->scale) {
 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
 		return;
@@ -2023,6 +2089,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 		td->avg_buckets[i].valid = true;
 		last_latency = td->avg_buckets[i].latency;
 	}
+
+	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+		throtl_log(&td->service_queue,
+			"Latency bucket %d: latency=%ld, valid=%d", i,
+			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2218,7 +2289,7 @@ void blk_throtl_bio_endio(struct bio *bio)
 		throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
 			bio_op(bio), lat);
 
-	if (tg->latency_target) {
+	if (tg->latency_target && lat >= tg->td->filtered_latency) {
 		int bucket;
 		unsigned int threshold;
 
@@ -2354,18 +2425,19 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct cgroup_subsys_state *pos_css;
-	struct blkcg_gq *blkg;
+	int i;
 
 	td = q->td;
 	BUG_ON(!td);
 
 	if (blk_queue_nonrot(q)) {
 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
+		td->filtered_latency = LATENCY_FILTERED_SSD;
 	} else {
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
+		td->filtered_latency = LATENCY_FILTERED_HD;
+		for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+			td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY;
 	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */
@@ -2375,18 +2447,6 @@ void blk_throtl_register_queue(struct request_queue *q)
 	td->track_bio_latency = !q->mq_ops && !q->request_fn;
 	if (!td->track_bio_latency)
 		blk_stat_enable_accounting(q);
-
-	/*
-	 * some tg are created before queue is fully initialized, eg, nonrot
-	 * isn't initialized yet
-	 */
-	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		tg->idletime_threshold = td->dft_idletime_threshold;
-	}
-	rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183..17ec83b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c

@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
  * Notes:
  *    Each request has its own timer, and as it is added to the queue, we
  *    set up the timer. When the request completes, we cancel the timer.
- *    Queue lock must be held for the non-mq case, mq case doesn't care.
  */
 void blk_add_timer(struct request *req)
 {
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 
+	if (!q->mq_ops)
+		lockdep_assert_held(q->queue_lock);
+
 	/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
 	if (!q->mq_ops && !q->rq_timed_out_fn)
 		return;

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 17676f4..6a9a0f0 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c

@@ -503,7 +503,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 }
 
 static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
-			     wait_queue_t *wait, unsigned long rw)
+			     wait_queue_entry_t *wait, unsigned long rw)
 {
 	/*
 	 * inc it here even if disabled, since we'll dec it at completion.
@@ -520,7 +520,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	 * in line to be woken up, wait for our turn.
 	 */
 	if (waitqueue_active(&rqw->wait) &&
-	    rqw->wait.task_list.next != &wait->task_list)
+	    rqw->wait.head.next != &wait->entry)
 		return false;
 
 	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));

diff --git a/block/blk.h b/block/blk.h
index 2ed7022..01ebb81 100644
--- a/block/blk.h
+++ b/block/blk.h

@@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
 
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
-void blk_exit_rl(struct request_list *rl);
+void blk_exit_rl(struct request_queue *q, struct request_list *rl);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
 void blk_queue_bypass_start(struct request_queue *q);
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	struct request *rq;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+	WARN_ON_ONCE(q->mq_ops);
+
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif
 
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+	return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
 #endif /* BLK_INTERNAL_H */

diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3..5793c2d 100644
--- a/block/bounce.c
+++ b/block/bounce.c

@@ -22,10 +22,12 @@
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
+#include "blk.h"
 
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
 	BUG_ON(!page_pool);
 	pr_info("pool size: %d pages\n", POOL_SIZE);
 
+	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	BUG_ON(!bounce_bio_set);
+	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+		BUG_ON(1);
+
+	bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+	BUG_ON(!bounce_bio_split);
+
 	return 0;
 }
 
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 		mempool_free(bvec->bv_page, pool);
 	}
 
-	bio_orig->bi_error = bio->bi_error;
+	bio_orig->bi_status = bio->bi_status;
 	bio_endio(bio_orig);
 	bio_put(bio);
 }
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 {
 	struct bio *bio_orig = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		copy_to_high_bio_irq(bio_orig, bio);
 
 	bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	int rw = bio_data_dir(*bio_orig);
 	struct bio_vec *to, from;
 	struct bvec_iter iter;
-	unsigned i;
+	unsigned i = 0;
+	bool bounce = false;
+	int sectors = 0;
 
-	bio_for_each_segment(from, *bio_orig, iter)
-		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
-			goto bounce;
+	bio_for_each_segment(from, *bio_orig, iter) {
+		if (i++ < BIO_MAX_PAGES)
+			sectors += from.bv_len >> 9;
+		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+			bounce = true;
+	}
+	if (!bounce)
+		return;
 
-	return;
-bounce:
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+	if (sectors < bio_sectors(*bio_orig)) {
+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+		bio_chain(bio, *bio_orig);
+		generic_make_request(*bio_orig);
+		*bio_orig = bio;
+	}
+	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
 
-		if (page_to_pfn(page) <= queue_bounce_pfn(q))
+		if (page_to_pfn(page) <= q->limits.bounce_pfn)
 			continue;
 
 		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (queue_bounce_pfn(q) >= blk_max_pfn)
+		if (q->limits.bounce_pfn >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 */
 	__blk_queue_bounce(q, bio_orig, pool);
 }
-
-EXPORT_SYMBOL(blk_queue_bounce);

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbb..c4513b2 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c

@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
 
-	blk_end_request_all(rq, scsi_req(rq)->result);
+	blk_end_request_all(rq, BLK_STS_OK);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
 		ret = bsg_create_job(dev, req);
 		if (ret) {
 			scsi_req(req)->result = ret;
-			blk_end_request_all(req, ret);
+			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 

diff --git a/block/bsg.c b/block/bsg.c
index 6fd0854..37663b6 100644
--- a/block/bsg.c
+++ b/block/bsg.c

@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
 	rq = blk_get_request(q, op, GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
-	scsi_req_init(rq);
 
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
@@ -294,14 +293,14 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
  * async completion call-back from the block layer, when scsi/ide/whatever
  * calls end_that_request_last() on a request
  */
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct bsg_command *bc = rq->end_io_data;
 	struct bsg_device *bd = bc->bd;
 	unsigned long flags;
 
-	dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
-		bd->name, rq, bc, bc->bio, uptodate);
+	dprintk("%s: finished rq %p bc %p, bio %p\n",
+		bd->name, rq, bc, bc->bio);
 
 	bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
 
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
+
+	if (!blk_queue_scsi_passthrough(rq)) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da69b07..3d5c289 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 
 /*
- * offset from end of service tree
+ * offset from end of queue service tree for idle class
  */
 #define CFQ_IDLE_DELAY		(NSEC_PER_SEC / 5)
+/* offset from end of group service tree under time slice mode */
+#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5)
+/* offset from end of group service under IOPS mode */
+#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5)
 
 /*
  * below this threshold, we consider thinktime immediate
@@ -978,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
 	return min_vdisktime;
 }
 
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
-	s64 delta = (s64)(vdisktime - min_vdisktime);
-	if (delta < 0)
-		min_vdisktime = vdisktime;
-
-	return min_vdisktime;
-}
-
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
 	struct cfq_group *cfqg;
@@ -1362,6 +1357,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 	cfqg->vfraction = max_t(unsigned, vfr, 1);
 }
 
+static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd)
+{
+	if (!iops_mode(cfqd))
+		return CFQ_SLICE_MODE_GROUP_DELAY;
+	else
+		return CFQ_IOPS_MODE_GROUP_DELAY;
+}
+
 static void
 cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
@@ -1381,7 +1384,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	n = rb_last(&st->rb);
 	if (n) {
 		__cfqg = rb_entry_cfqg(n);
-		cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
+		cfqg->vdisktime = __cfqg->vdisktime +
+			cfq_get_cfqg_vdisktime_delay(cfqd);
 	} else
 		cfqg->vdisktime = st->min_vdisktime;
 	cfq_group_service_tree_add(st, cfqg);

diff --git a/block/elevator.c b/block/elevator.c
index dac99fb..4bb2f0c 100644
--- a/block/elevator.c
+++ b/block/elevator.c

@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 */
 		if (elv_attempt_insert_merge(q, rq))
 			break;
+		/* fall through */
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(blk_rq_is_passthrough(rq));
 		rq->rq_flags |= RQF_SORTED;

diff --git a/block/genhd.c b/block/genhd.c
index d252d29..7f520fa 100644
--- a/block/genhd.c
+++ b/block/genhd.c

@@ -36,7 +36,7 @@ struct kobject *block_depr;
 static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
-static struct device_type disk_type;
+static const struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
 			      unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
 	return NULL;
 }
 
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,

diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9..6f5d0b6 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c

@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 		case IOPRIO_CLASS_RT:
 			if (!capable(CAP_SYS_ADMIN))
 				return -EPERM;
-			/* fall through, rt has prio field too */
+			/* fall through */
+			/* rt has prio field too */
 		case IOPRIO_CLASS_BE:
 			if (data >= IOPRIO_BE_NR || data < 0)
 				return -EINVAL;

diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc..f58cab8 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c

@@ -99,7 +99,7 @@ struct kyber_hctx_data {
 	struct list_head rqs[KYBER_NUM_DOMAINS];
 	unsigned int cur_domain;
 	unsigned int batching;
-	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
+	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
 	atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
@@ -385,7 +385,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
 		INIT_LIST_HEAD(&khd->rqs[i]);
-		INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
+		INIT_LIST_HEAD(&khd->domain_wait[i].entry);
 		atomic_set(&khd->wait_index[i], 0);
 	}
 
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
 	}
 }
 
-static struct request *kyber_get_request(struct request_queue *q,
-					 unsigned int op,
-					 struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
-	struct request *rq;
-
 	/*
 	 * We use the scheduler tags as per-hardware queue queueing tokens.
 	 * Async requests can be limited at this stage.
 	 */
-	if (!op_is_sync(op))
-		data->shallow_depth = kqd->async_depth;
+	if (!op_is_sync(op)) {
+		struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
 
-	rq = __blk_mq_alloc_request(data, op);
-	if (rq)
-		rq_set_domain_token(rq, -1);
-	return rq;
+		data->shallow_depth = kqd->async_depth;
+	}
 }
 
-static void kyber_put_request(struct request *rq)
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
 {
-	struct request_queue *q = rq->q;
-	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+	rq_set_domain_token(rq, -1);
+}
+
+static void kyber_finish_request(struct request *rq)
+{
+	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
 
 	rq_clear_domain_token(kqd, rq);
-	blk_mq_finish_request(rq);
 }
 
 static void kyber_completed_request(struct request *rq)
@@ -507,12 +503,12 @@ static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
 	}
 }
 
-static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 			     void *key)
 {
 	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
 
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
@@ -523,7 +519,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 {
 	unsigned int sched_domain = khd->cur_domain;
 	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
-	wait_queue_t *wait = &khd->domain_wait[sched_domain];
+	wait_queue_entry_t *wait = &khd->domain_wait[sched_domain];
 	struct sbq_wait_state *ws;
 	int nr;
 
@@ -536,7 +532,7 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
 	 * run when one becomes available. Note that this is serialized on
 	 * khd->lock, but we still need to be careful about the waker.
 	 */
-	if (list_empty_careful(&wait->task_list)) {
+	if (list_empty_careful(&wait->entry)) {
 		init_waitqueue_func_entry(wait, kyber_domain_wake);
 		wait->private = hctx;
 		ws = sbq_wait_ptr(domain_tokens,
@@ -734,9 +730,9 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
 {									\
 	struct blk_mq_hw_ctx *hctx = data;				\
 	struct kyber_hctx_data *khd = hctx->sched_data;			\
-	wait_queue_t *wait = &khd->domain_wait[domain];			\
+	wait_queue_entry_t *wait = &khd->domain_wait[domain];		\
 									\
-	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list));	\
+	seq_printf(m, "%d\n", !list_empty_careful(&wait->entry));	\
 	return 0;							\
 }
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
 		.exit_sched = kyber_exit_sched,
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
-		.get_request = kyber_get_request,
-		.put_request = kyber_put_request,
+		.limit_depth = kyber_limit_depth,
+		.prepare_request = kyber_prepare_request,
+		.finish_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
 		.dispatch_request = kyber_dispatch_request,
 		.has_work = kyber_has_work,

diff --git a/block/partition-generic.c b/block/partition-generic.c
index ff07b91..c5ec824 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c

@@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	if (info) {
 		struct partition_meta_info *pinfo = alloc_part_info(disk);
-		if (!pinfo)
+		if (!pinfo) {
+			err = -ENOMEM;
 			goto out_free_stats;
+		}
 		memcpy(pinfo, info, sizeof(*info));
 		p->info = pinfo;
 	}

diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index edcea70..2a365c7 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c

@@ -115,7 +115,7 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
 		ldm_error("PRIVHEAD disk size doesn't match real disk size");
 		return false;
 	}
-	if (uuid_be_to_bin(data + 0x0030, (uuid_be *)ph->disk_id)) {
+	if (uuid_parse(data + 0x0030, &ph->disk_id)) {
 		ldm_error("PRIVHEAD contains an invalid GUID.");
 		return false;
 	}
@@ -234,7 +234,7 @@ static bool ldm_compare_privheads (const struct privhead *ph1,
 		(ph1->logical_disk_size  == ph2->logical_disk_size)	&&
 		(ph1->config_start       == ph2->config_start)		&&
 		(ph1->config_size        == ph2->config_size)		&&
-		!memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
+		uuid_equal(&ph1->disk_id, &ph2->disk_id));
 }
 
 /**
@@ -557,7 +557,7 @@ static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
 
 	list_for_each (item, &ldb->v_disk) {
 		struct vblk *v = list_entry (item, struct vblk, list);
-		if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
+		if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id))
 			return v;
 	}
 
@@ -892,7 +892,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
 	disk = &vb->vblk.disk;
 	ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
 		sizeof (disk->alt_name));
-	if (uuid_be_to_bin(buffer + 0x19 + r_name, (uuid_be *)disk->disk_id))
+	if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id))
 		return false;
 
 	return true;
@@ -927,7 +927,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
 		return false;
 
 	disk = &vb->vblk.disk;
-	memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
+	uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name));
 	return true;
 }
 

diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index 374242c..f4c6055 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h

@@ -112,8 +112,6 @@ struct frag {				/* VBLK Fragment handling */
 
 /* In memory LDM database structures. */
 
-#define GUID_SIZE		16
-
 struct privhead {			/* Offsets and sizes are in sectors. */
 	u16	ver_major;
 	u16	ver_minor;
@@ -121,7 +119,7 @@ struct privhead {			/* Offsets and sizes are in sectors. */
 	u64	logical_disk_size;
 	u64	config_start;
 	u64	config_size;
-	u8	disk_id[GUID_SIZE];
+	uuid_t	disk_id;
 };
 
 struct tocblock {			/* We have exactly two bitmaps. */
@@ -154,7 +152,7 @@ struct vblk_dgrp {			/* VBLK Disk Group */
 };
 
 struct vblk_disk {			/* VBLK Disk */
-	u8	disk_id[GUID_SIZE];
+	uuid_t	disk_id;
 	u8	alt_name[128];
 };
 

diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 93e7c1b..5610cd5 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c

@@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
 			continue;
 		bsd_start = le32_to_cpu(p->p_offset);
 		bsd_size = le32_to_cpu(p->p_size);
+		if (memcmp(flavour, "bsd\0", 4) == 0)
+			bsd_start += offset;
 		if (offset == bsd_start && size == bsd_size)
 			/* full parent partition, we have it already */
 			continue;

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5..7440de4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c

@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	if (hdr->cmd_len > BLK_MAX_CDB) {
 		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		goto error_free_buffer;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	cmdlen = COMMAND_SIZE(opcode);
 
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	scsi_req(rq)->cmd[0] = cmd;
 	scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
 
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
 {
-	struct scsi_request *req = scsi_req(rq);
-
 	memset(req->__cmd, 0, sizeof(req->__cmd));
 	req->cmd = req->__cmd;
 	req->cmd_len = BLK_MAX_CDB;

diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d6..3416dad 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c

@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
  * tag.
  */
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
-			   unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
-				unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+		csum_fn *fn, unsigned int type)
 {
 	unsigned int i;
 
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 				       "(rcvd %u)\n", iter->disk_name,
 				       (unsigned long long)
 				       iter->seed, be32_to_cpu(pi->ref_tag));
-				return -EILSEQ;
+				return BLK_STS_PROTECTION;
 			}
 			break;
 		case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 			       "(rcvd %04x, want %04x)\n", iter->disk_name,
 			       (unsigned long long)iter->seed,
 			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		}
 
 next:
@@ -117,45 +117,45 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
 		iter->seed++;
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
 }
 
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 {
 	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index d3a989e..3cd6e12 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c

@@ -141,7 +141,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	 * signature and returns that to us.
 	 */
 	ret = crypto_akcipher_verify(req);
-	if (ret == -EINPROGRESS) {
+	if ((ret == -EINPROGRESS) || (ret == -EBUSY)) {
 		wait_for_completion(&compl.completion);
 		ret = compl.err;
 	}

diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 672a94c..d178650 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c

@@ -381,7 +381,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen,
 	}
 
 error:
-	kfree(desc);
+	kzfree(desc);
 error_no_desc:
 	crypto_free_shash(tfm);
 	kleave(" = %d", ret);
@@ -450,6 +450,6 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen,
 	ret = pefile_digest_pe(pebuf, pelen, &ctx);
 
 error:
-	kfree(ctx.digest);
+	kzfree(ctx.digest);
 	return ret;
 }

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index c80765b..dd03fea 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c

@@ -102,6 +102,7 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
 		}
 	}
 
+	ret = -ENOMEM;
 	cert->pub->key = kmemdup(ctx->key, ctx->key_size, GFP_KERNEL);
 	if (!cert->pub->key)
 		goto error_decode;

diff --git a/crypto/drbg.c b/crypto/drbg.c
index fa749f4..cdb27ac 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c

@@ -1767,9 +1767,8 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			break;
 		case -EINPROGRESS:
 		case -EBUSY:
-			ret = wait_for_completion_interruptible(
-				&drbg->ctr_completion);
-			if (!ret && !drbg->ctr_async_err) {
+			wait_for_completion(&drbg->ctr_completion);
+			if (!drbg->ctr_async_err) {
 				reinit_completion(&drbg->ctr_completion);
 				break;
 			}

diff --git a/crypto/gcm.c b/crypto/gcm.c
index b7ad808..3841b5e 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c

@@ -152,10 +152,8 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 
 	err = crypto_skcipher_encrypt(&data->req);
 	if (err == -EINPROGRESS || err == -EBUSY) {
-		err = wait_for_completion_interruptible(
-			&data->result.completion);
-		if (!err)
-			err = data->result.err;
+		wait_for_completion(&data->result.completion);
+		err = data->result.err;
 	}
 
 	if (err)

diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 014af74..4faa0fd 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c

@@ -764,6 +764,44 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int skcipher_setkey_unaligned(struct crypto_skcipher *tfm,
+				     const u8 *key, unsigned int keylen)
+{
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	u8 *buffer, *alignbuffer;
+	unsigned long absize;
+	int ret;
+
+	absize = keylen + alignmask;
+	buffer = kmalloc(absize, GFP_ATOMIC);
+	if (!buffer)
+		return -ENOMEM;
+
+	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	memcpy(alignbuffer, key, keylen);
+	ret = cipher->setkey(tfm, alignbuffer, keylen);
+	kzfree(buffer);
+	return ret;
+}
+
+static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
+	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if ((unsigned long)key & alignmask)
+		return skcipher_setkey_unaligned(tfm, key, keylen);
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
@@ -784,7 +822,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	    tfm->__crt_alg->cra_type == &crypto_givcipher_type)
 		return crypto_init_skcipher_ops_ablkcipher(tfm);
 
-	skcipher->setkey = alg->setkey;
+	skcipher->setkey = skcipher_setkey;
 	skcipher->encrypt = alg->encrypt;
 	skcipher->decrypt = alg->decrypt;
 	skcipher->ivsize = alg->ivsize;

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 502ea4dc..560fdae 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c

@@ -141,9 +141,9 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	int	cpu = mce->extcpu;
 	struct acpi_hest_generic_status *estatus, *tmp;
 	struct acpi_hest_generic_data *gdata;
-	const uuid_le *fru_id = &NULL_UUID_LE;
+	const guid_t *fru_id = &guid_null;
 	char *fru_text = "";
-	uuid_le *sec_type;
+	guid_t *sec_type;
 	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
@@ -165,11 +165,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	err_seq++;
 	gdata = (struct acpi_hest_generic_data *)(tmp + 1);
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
-		fru_id = (uuid_le *)gdata->fru_id;
+		fru_id = (guid_t *)gdata->fru_id;
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 		fru_text = gdata->fru_text;
-	sec_type = (uuid_le *)gdata->section_type;
-	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+	sec_type = (guid_t *)gdata->section_type;
+	if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
 		if (gdata->error_data_length >= sizeof(*mem))
 			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
@@ -182,17 +182,17 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 
 static bool __init extlog_get_l1addr(void)
 {
-	u8 uuid[16];
+	guid_t guid;
 	acpi_handle handle;
 	union acpi_object *obj;
 
-	acpi_str_to_uuid(extlog_dsm_uuid, uuid);
-
+	if (guid_parse(extlog_dsm_uuid, &guid))
+		return false;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return false;
-	if (!acpi_check_dsm(handle, uuid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
+	if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
 		return false;
-	obj = acpi_evaluate_dsm_typed(handle, uuid, EXTLOG_DSM_REV,
+	obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
 				      EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		return false;

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a7..0d2e989 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c

@@ -416,13 +416,18 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 		}
 	}
 
-	table_desc->validation_count++;
-	if (table_desc->validation_count == 0) {
-		ACPI_ERROR((AE_INFO,
-			    "Table %p, Validation count is zero after increment\n",
-			    table_desc));
-		table_desc->validation_count--;
-		return_ACPI_STATUS(AE_LIMIT);
+	if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+		table_desc->validation_count++;
+
+		/*
+		 * Detect validation_count overflows to ensure that the warning
+		 * message will only be printed once.
+		 */
+		if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+			ACPI_WARNING((AE_INFO,
+				      "Table %p, Validation count overflows\n",
+				      table_desc));
+		}
 	}
 
 	*out_table = table_desc->pointer;
@@ -449,13 +454,20 @@ void acpi_tb_put_table(struct acpi_table_desc *table_desc)
 
 	ACPI_FUNCTION_TRACE(acpi_tb_put_table);
 
-	if (table_desc->validation_count == 0) {
-		ACPI_WARNING((AE_INFO,
-			      "Table %p, Validation count is zero before decrement\n",
-			      table_desc));
-		return_VOID;
+	if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+		table_desc->validation_count--;
+
+		/*
+		 * Detect validation_count underflows to ensure that the warning
+		 * message will only be printed once.
+		 */
+		if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+			ACPI_WARNING((AE_INFO,
+				      "Table %p, Validation count underflows\n",
+				      table_desc));
+			return_VOID;
+		}
 	}
-	table_desc->validation_count--;
 
 	if (table_desc->validation_count == 0) {
 

diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index e0587c8..ff096d9 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c

@@ -474,15 +474,6 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
 			}
 
-			/*
-			 * The end_tag opcode must be followed by a zero byte.
-			 * Although this byte is technically defined to be a checksum,
-			 * in practice, all ASL compilers set this byte to zero.
-			 */
-			if (*(aml + 1) != 0) {
-				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
-			}
-
 			/* Return the pointer to the end_tag if requested */
 
 			if (!user_function) {

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index d0855c0..980515e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c

@@ -431,12 +431,13 @@ static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	guid_t *sec_type;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
+		sec_type = (guid_t *)gdata->section_type;
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-				 CPER_SEC_PLATFORM_MEM)) {
+		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 			mem_err = (struct cper_sec_mem_err *)(gdata+1);
 			ghes_edac_report_mem_error(ghes, sev, mem_err);
@@ -445,8 +446,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-				      CPER_SEC_PCIE)) {
+		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
 			pcie_err = (struct cper_sec_pcie *)(gdata+1);
 			if (sev == GHES_SEV_RECOVERABLE &&

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index c5fecf9..797b28d 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c

@@ -666,14 +666,6 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
 	int ret = -ENODEV;
 	struct fwnode_handle *iort_fwnode;
 
-	/*
-	 * If we already translated the fwspec there
-	 * is nothing left to do, return the iommu_ops.
-	 */
-	ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
-	if (ops)
-		return ops;
-
 	if (node) {
 		iort_fwnode = iort_get_fwnode(node);
 		if (!iort_fwnode)
@@ -735,6 +727,14 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	u32 streamid = 0;
 	int err;
 
+	/*
+	 * If we already translated the fwspec there
+	 * is nothing left to do, return the iommu_ops.
+	 */
+	ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
+	if (ops)
+		return ops;
+
 	if (dev_is_pci(dev)) {
 		struct pci_bus *bus = to_pci_dev(dev)->bus;
 		u32 rid;
@@ -782,6 +782,12 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	if (err)
 		ops = ERR_PTR(err);
 
+	/* Ignore all other errors apart from EPROBE_DEFER */
+	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
+		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+		ops = NULL;
+	}
+
 	return ops;
 }
 

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index a9a9ab3..d42eeef 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c

@@ -782,7 +782,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume)
 	if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) ||
 	    (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) &&
             (battery->capacity_now <= battery->alarm)))
-		pm_wakeup_hard_event(&battery->device->dev);
+		pm_wakeup_event(&battery->device->dev, 0);
 
 	return result;
 }

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 784bda6..5a6fbe0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c

@@ -196,42 +196,19 @@ static void acpi_print_osc_error(acpi_handle handle,
 	pr_debug("\n");
 }
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
-{
-	int i;
-	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
-		24, 26, 28, 30, 32, 34};
-
-	if (strlen(str) != 36)
-		return AE_BAD_PARAMETER;
-	for (i = 0; i < 36; i++) {
-		if (i == 8 || i == 13 || i == 18 || i == 23) {
-			if (str[i] != '-')
-				return AE_BAD_PARAMETER;
-		} else if (!isxdigit(str[i]))
-			return AE_BAD_PARAMETER;
-	}
-	for (i = 0; i < 16; i++) {
-		uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4;
-		uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]);
-	}
-	return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
-
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 {
 	acpi_status status;
 	struct acpi_object_list input;
 	union acpi_object in_params[4];
 	union acpi_object *out_obj;
-	u8 uuid[16];
+	guid_t guid;
 	u32 errors;
 	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
 	if (!context)
 		return AE_ERROR;
-	if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+	if (guid_parse(context->uuid_str, &guid))
 		return AE_ERROR;
 	context->ret.length = ACPI_ALLOCATE_BUFFER;
 	context->ret.pointer = NULL;
@@ -241,7 +218,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 	input.pointer = in_params;
 	in_params[0].type 		= ACPI_TYPE_BUFFER;
 	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= uuid;
+	in_params[0].buffer.pointer	= (u8 *)&guid;
 	in_params[1].type 		= ACPI_TYPE_INTEGER;
 	in_params[1].integer.value 	= context->rev;
 	in_params[2].type 		= ACPI_TYPE_INTEGER;

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index b7c2a06..e19f530 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c

@@ -57,6 +57,7 @@
 
 #define ACPI_BUTTON_LID_INIT_IGNORE	0x00
 #define ACPI_BUTTON_LID_INIT_OPEN	0x01
+#define ACPI_BUTTON_LID_INIT_METHOD	0x02
 
 #define _COMPONENT		ACPI_BUTTON_COMPONENT
 ACPI_MODULE_NAME("button");
@@ -112,7 +113,7 @@ struct acpi_button {
 
 static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
 static struct acpi_device *lid_device;
-static u8 lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
+static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
 
 static unsigned long lid_report_interval __read_mostly = 500;
 module_param(lid_report_interval, ulong, 0644);
@@ -216,7 +217,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state)
 	}
 
 	if (state)
-		pm_wakeup_hard_event(&device->dev);
+		pm_wakeup_event(&device->dev, 0);
 
 	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
 	if (ret == NOTIFY_DONE)
@@ -376,6 +377,9 @@ static void acpi_lid_initialize_state(struct acpi_device *device)
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		(void)acpi_lid_notify_state(device, 1);
 		break;
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		(void)acpi_lid_update_state(device);
+		break;
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 	default:
 		break;
@@ -398,7 +402,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event)
 		} else {
 			int keycode;
 
-			pm_wakeup_hard_event(&device->dev);
+			pm_wakeup_event(&device->dev, 0);
 			if (button->suspended)
 				break;
 
@@ -530,7 +534,6 @@ static int acpi_button_add(struct acpi_device *device)
 		lid_device = device;
 	}
 
-	device_init_wakeup(&device->dev, true);
 	printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device));
 	return 0;
 
@@ -560,6 +563,9 @@ static int param_set_lid_init_state(const char *val, struct kernel_param *kp)
 	if (!strncmp(val, "open", sizeof("open") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_OPEN;
 		pr_info("Notify initial lid state as open\n");
+	} else if (!strncmp(val, "method", sizeof("method") - 1)) {
+		lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
+		pr_info("Notify initial lid state with _LID return value\n");
 	} else if (!strncmp(val, "ignore", sizeof("ignore") - 1)) {
 		lid_init_state = ACPI_BUTTON_LID_INIT_IGNORE;
 		pr_info("Do not notify initial lid state\n");
@@ -573,6 +579,8 @@ static int param_get_lid_init_state(char *buffer, struct kernel_param *kp)
 	switch (lid_init_state) {
 	case ACPI_BUTTON_LID_INIT_OPEN:
 		return sprintf(buffer, "open");
+	case ACPI_BUTTON_LID_INIT_METHOD:
+		return sprintf(buffer, "method");
 	case ACPI_BUTTON_LID_INIT_IGNORE:
 		return sprintf(buffer, "ignore");
 	default:

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 798d500..993fd31 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c

@@ -24,7 +24,6 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
-#include <linux/suspend.h>
 
 #include "internal.h"
 
@@ -400,7 +399,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present) {
-		pm_wakeup_ws_event(adev->wakeup.ws, 0, true);
+		__pm_wakeup_event(adev->wakeup.ws, 0);
 		if (adev->wakeup.context.work.func)
 			queue_pm_work(&adev->wakeup.context.work);
 	}

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 656acb5..097eff0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c

@@ -74,11 +74,11 @@ struct nfit_table_prev {
 	struct list_head flushes;
 };
 
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
+static guid_t nfit_uuid[NFIT_UUID_MAX];
 
-const u8 *to_nfit_uuid(enum nfit_uuids id)
+const guid_t *to_nfit_uuid(enum nfit_uuids id)
 {
-	return nfit_uuid[id];
+	return &nfit_uuid[id];
 }
 EXPORT_SYMBOL(to_nfit_uuid);
 
@@ -222,7 +222,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	u32 offset, fw_status = 0;
 	acpi_handle handle;
 	unsigned int func;
-	const u8 *uuid;
+	const guid_t *guid;
 	int rc, i;
 
 	func = cmd;
@@ -245,7 +245,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nvdimm_cmd_mask(nvdimm);
 		dsm_mask = nfit_mem->dsm_mask;
 		desc = nd_cmd_dimm_desc(cmd);
-		uuid = to_nfit_uuid(nfit_mem->family);
+		guid = to_nfit_uuid(nfit_mem->family);
 		handle = adev->handle;
 	} else {
 		struct acpi_device *adev = to_acpi_dev(acpi_desc);
@@ -254,7 +254,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		cmd_mask = nd_desc->cmd_mask;
 		dsm_mask = cmd_mask;
 		desc = nd_cmd_bus_desc(cmd);
-		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		guid = to_nfit_uuid(NFIT_DEV_BUS);
 		handle = adev->handle;
 		dimm_name = "bus";
 	}
@@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
-	out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+	out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
 	if (!out_obj) {
 		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
 				cmd_name);
@@ -409,7 +409,7 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
 	int i;
 
 	for (i = 0; i < NFIT_UUID_MAX; i++)
-		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+		if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
 			return i;
 	return -1;
 }
@@ -1415,7 +1415,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
 	unsigned long dsm_mask;
-	const u8 *uuid;
+	const guid_t *guid;
 	int i;
 	int family = -1;
 
@@ -1444,7 +1444,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	/*
 	 * Until standardization materializes we need to consider 4
 	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * tells us if any commands are reachable through this GUID.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
 		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1474,9 +1474,9 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 	}
 
-	uuid = to_nfit_uuid(nfit_mem->family);
+	guid = to_nfit_uuid(nfit_mem->family);
 	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
 	return 0;
@@ -1611,7 +1611,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
 	struct acpi_device *adev;
 	int i;
 
@@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 		return;
 
 	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
 			set_bit(i, &nd_desc->cmd_mask);
 }
 
@@ -3051,19 +3051,19 @@ static __init int nfit_init(void)
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
 	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
 
-	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-	acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+	guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
+	guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+	guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
+	guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
+	guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
+	guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
+	guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
+	guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
 
 	nfit_wq = create_singlethread_workqueue("nfit");
 	if (!nfit_wq)

diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 3ba1c34..fd86bec 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c

@@ -26,7 +26,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 	struct nfit_spa *nfit_spa;
 
 	/* We only care about memory errors */
-	if (!(mce->status & MCACOD))
+	if (!mce_is_memory_error(mce))
 		return NOTIFY_DONE;
 
 	/*

diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 58fb7d6..29bdd95 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h

@@ -18,7 +18,6 @@
 #include <linux/libnvdimm.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
-#include <linux/uuid.h>
 #include <linux/acpi.h>
 #include <acpi/acuuid.h>
 
@@ -237,7 +236,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
-const u8 *to_nfit_uuid(enum nfit_uuids id);
+const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 919be0a..2405442 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c

@@ -523,7 +523,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 	struct acpi_pci_root *root;
 	acpi_handle handle = device->handle;
 	int no_aspm = 0;
-	bool hotadd = system_state != SYSTEM_BOOTING;
+	bool hotadd = system_state == SYSTEM_RUNNING;
 
 	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
 	if (!root)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e39ec7b..d531629 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c

@@ -1371,8 +1371,8 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 	iort_set_dma_mask(dev);
 
 	iommu = iort_iommu_configure(dev);
-	if (IS_ERR(iommu))
-		return PTR_ERR(iommu);
+	if (IS_ERR(iommu) && PTR_ERR(iommu) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
 	/*
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
 	adev->flags.coherent_dma = cca;
 }
 
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+	bool *is_spi_i2c_slave_p = data;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	/*
+	 * devices that are connected to UART still need to be enumerated to
+	 * platform bus
+	 */
+	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+		*is_spi_i2c_slave_p = true;
+
+	 /* no need to do more checking */
+	return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+	struct list_head resource_list;
+	bool is_spi_i2c_slave = false;
+
+	INIT_LIST_HEAD(&resource_list);
+	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+			       &is_spi_i2c_slave);
+	acpi_dev_free_resource_list(&resource_list);
+
+	return is_spi_i2c_slave;
+}
+
 void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 			     int type, unsigned long long sta)
 {
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	acpi_bus_get_flags(device);
 	device->flags.match_driver = false;
 	device->flags.initialized = true;
+	device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
 	acpi_device_clear_enumerated(device);
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
 	return AE_OK;
 }
 
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
-	bool *is_spi_i2c_slave_p = data;
-
-	if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-		return 1;
-
-	/*
-	 * devices that are connected to UART still need to be enumerated to
-	 * platform bus
-	 */
-	if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
-		*is_spi_i2c_slave_p = true;
-
-	 /* no need to do more checking */
-	return -1;
-}
-
 static void acpi_default_enumeration(struct acpi_device *device)
 {
-	struct list_head resource_list;
-	bool is_spi_i2c_slave = false;
-
 	/*
 	 * Do not enumerate SPI/I2C slaves as they will be enumerated by their
 	 * respective parents.
 	 */
-	INIT_LIST_HEAD(&resource_list);
-	acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
-			       &is_spi_i2c_slave);
-	acpi_dev_free_resource_list(&resource_list);
-	if (!is_spi_i2c_slave) {
+	if (!device->flags.spi_i2c_slave) {
 		acpi_create_platform_device(device, NULL);
 		acpi_device_set_enumerated(device);
 	} else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 		return;
 
 	device->flags.match_driver = true;
-	if (ret > 0) {
+	if (ret > 0 && !device->flags.spi_i2c_slave) {
 		acpi_device_set_enumerated(device);
 		goto ok;
 	}
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
 	if (ret < 0)
 		return;
 
-	if (device->pnp.type.platform_id)
-		acpi_default_enumeration(device);
-	else
+	if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
 		acpi_device_set_enumerated(device);
+	else
+		acpi_default_enumeration(device);
 
  ok:
 	list_for_each_entry(child, &device->children, node)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index a6574d6..097d630 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c

@@ -663,40 +663,14 @@ static int acpi_freeze_prepare(void)
 	acpi_os_wait_events_complete();
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
-
 	return 0;
 }
 
-static void acpi_freeze_wake(void)
-{
-	/*
-	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
-	 * that the SCI has triggered while suspended, so cancel the wakeup in
-	 * case it has not been a wakeup event (the GPEs will be checked later).
-	 */
-	if (acpi_sci_irq_valid() &&
-	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
-		pm_system_cancel_wakeup();
-}
-
-static void acpi_freeze_sync(void)
-{
-	/*
-	 * Process all pending events in case there are any wakeup ones.
-	 *
-	 * The EC driver uses the system workqueue, so that one needs to be
-	 * flushed too.
-	 */
-	acpi_os_wait_events_complete();
-	flush_scheduled_work();
-}
-
 static void acpi_freeze_restore(void)
 {
 	acpi_disable_wakeup_devices(ACPI_STATE_S0);
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
-
 	acpi_enable_all_runtime_gpes();
 }
 
@@ -708,8 +682,6 @@ static void acpi_freeze_end(void)
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
 	.prepare = acpi_freeze_prepare,
-	.wake = acpi_freeze_wake,
-	.sync = acpi_freeze_sync,
 	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 1b5ee1e..e414fab 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c

@@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj,
 	    container_of(bin_attr, struct acpi_table_attr, attr);
 	struct acpi_table_header *table_header = NULL;
 	acpi_status status;
+	ssize_t rc;
 
 	status = acpi_get_table(table_attr->name, table_attr->instance,
 				&table_header);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	return memory_read_from_buffer(buf, count, &offset,
-				       table_header, table_header->length);
+	rc = memory_read_from_buffer(buf, count, &offset, table_header,
+			table_header->length);
+	acpi_put_table(table_header);
+	return rc;
 }
 
 static int acpi_table_attr_init(struct kobject *tables_obj,

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 27d0dcf..b9d956c 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c

@@ -613,19 +613,19 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
 /**
  * acpi_evaluate_dsm - evaluate device's _DSM method
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes
+ * @guid: GUID of requested functions, should be 16 bytes
  * @rev: revision number of requested function
  * @func: requested function number
  * @argv4: the function specific parameter
  *
- * Evaluate device's _DSM method with specified UUID, revision id and
+ * Evaluate device's _DSM method with specified GUID, revision id and
  * function number. Caller needs to free the returned object.
  *
  * Though ACPI defines the fourth parameter for _DSM should be a package,
  * some old BIOSes do expect a buffer or an integer etc.
  */
 union acpi_object *
-acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
+acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
 		  union acpi_object *argv4)
 {
 	acpi_status ret;
@@ -638,7 +638,7 @@ acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
 
 	params[0].type = ACPI_TYPE_BUFFER;
 	params[0].buffer.length = 16;
-	params[0].buffer.pointer = (char *)uuid;
+	params[0].buffer.pointer = (u8 *)guid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = rev;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -666,7 +666,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
 /**
  * acpi_check_dsm - check if _DSM method supports requested functions.
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes at least
+ * @guid: GUID of requested functions, should be 16 bytes at least
  * @rev: revision number of requested functions
  * @funcs: bitmap of requested functions
  *
@@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
  * functions. Currently only support 64 functions at maximum, should be
  * enough for now.
  */
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
 {
 	int i;
 	u64 mask = 0;
@@ -683,7 +683,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	if (funcs == 0)
 		return false;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
+	obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL);
 	if (!obj)
 		return false;
 
@@ -697,7 +697,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
-	 * function 0 for the specified UUID and revision.
+	 * function 0 for the specified GUID and revision.
 	 */
 	if ((mask & 0x1) && (mask & funcs) == funcs)
 		return true;

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2fc5240..c699540 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c

@@ -1364,6 +1364,40 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
 {}
 #endif
 
+/*
+ * On the Acer Aspire Switch Alpha 12, sometimes all SATA ports are detected
+ * as DUMMY, or detected but eventually get a "link down" and never get up
+ * again. When this happens, CAP.NP may hold a value of 0x00 or 0x01, and the
+ * port_map may hold a value of 0x00.
+ *
+ * Overriding CAP.NP to 0x02 and the port_map to 0x7 will reveal all 3 ports
+ * and can significantly reduce the occurrence of the problem.
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=189471
+ */
+static void acer_sa5_271_workaround(struct ahci_host_priv *hpriv,
+				    struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		{
+			.ident = "Acer Switch Alpha 12",
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+				DMI_MATCH(DMI_PRODUCT_NAME, "Switch SA5-271")
+			},
+		},
+		{ }
+	};
+
+	if (dmi_check_system(sysids)) {
+		dev_info(&pdev->dev, "enabling Acer Switch Alpha 12 workaround\n");
+		if ((hpriv->saved_cap & 0xC734FF00) == 0xC734FF00) {
+			hpriv->port_map = 0x7;
+			hpriv->cap = 0xC734FF02;
+		}
+	}
+}
+
 #ifdef CONFIG_ARM64
 /*
  * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently.
@@ -1636,6 +1670,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			 "online status unreliable, applying workaround\n");
 	}
 
+
+	/* Acer SA5-271 workaround modifies private_data */
+	acer_sa5_271_workaround(hpriv, pdev);
+
 	/* CAP.NP sometimes indicate the index of the last enabled
 	 * port, at other times, that of the last possible port, so
 	 * determining the maximum port number requires looking at

diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index aaa761b..cd2eab6 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c

@@ -514,8 +514,9 @@ int ahci_platform_init_host(struct platform_device *pdev,
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq <= 0) {
-		dev_err(dev, "no irq\n");
-		return -EINVAL;
+		if (irq != -EPROBE_DEFER)
+			dev_err(dev, "no irq\n");
+		return irq;
 	}
 
 	hpriv->irq = irq;

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2d83b8c..e157a0e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c

@@ -6800,7 +6800,7 @@ static int __init ata_parse_force_one(char **cur,
 	}
 
 	force_ent->port = simple_strtoul(id, &endp, 10);
-	if (p == endp || *endp != '\0') {
+	if (id == endp || *endp != '\0') {
 		*reason = "invalid port/link";
 		return -EINVAL;
 	}

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index b66bcda..3b2246d 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c

@@ -4067,7 +4067,6 @@ static int mv_platform_probe(struct platform_device *pdev)
 	struct ata_host *host;
 	struct mv_host_priv *hpriv;
 	struct resource *res;
-	void __iomem *mmio;
 	int n_ports = 0, irq = 0;
 	int rc;
 	int port;
@@ -4086,9 +4085,8 @@ static int mv_platform_probe(struct platform_device *pdev)
 	 * Get the register base first
 	 */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mmio = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mmio))
-		return PTR_ERR(mmio);
+	if (res == NULL)
+		return -EINVAL;
 
 	/* allocate host */
 	if (pdev->dev.of_node) {
@@ -4132,7 +4130,12 @@ static int mv_platform_probe(struct platform_device *pdev)
 	hpriv->board_idx = chip_soc;
 
 	host->iomap = NULL;
-	hpriv->base = mmio - SATAHC0_REG_BASE;
+	hpriv->base = devm_ioremap(&pdev->dev, res->start,
+				   resource_size(res));
+	if (!hpriv->base)
+		return -ENOMEM;
+
+	hpriv->base -= SATAHC0_REG_BASE;
 
 	hpriv->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(hpriv->clk))

diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 5d38245..b7939a2 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c

@@ -890,7 +890,10 @@ static int sata_rcar_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to get access to sata clock\n");
 		return PTR_ERR(priv->clk);
 	}
-	clk_prepare_enable(priv->clk);
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	host = ata_host_alloc(&pdev->dev, 1);
 	if (!host) {
@@ -970,8 +973,11 @@ static int sata_rcar_resume(struct device *dev)
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct sata_rcar_priv *priv = host->private_data;
 	void __iomem *base = priv->base;
+	int ret;
 
-	clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	/* ack and mask */
 	iowrite32(0, base + SATAINTSTAT_REG);
@@ -988,8 +994,11 @@ static int sata_rcar_restore(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct sata_rcar_priv *priv = host->private_data;
+	int ret;
 
-	clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
 	sata_rcar_setup_port(host);
 

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5548f96..0440d95 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c

@@ -377,7 +377,7 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 	if (!pfn_valid_within(pfn))
 		return -1;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-	if (system_state == SYSTEM_BOOTING)
+	if (system_state < SYSTEM_RUNNING)
 		return early_pfn_to_nid(pfn);
 #endif
 	page = pfn_to_page(pfn);

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e987a6f..9faee1c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c

@@ -1091,6 +1091,11 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
+	if (pm_wakeup_pending()) {
+		async_error = -EBUSY;
+		goto Complete;
+	}
+
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f62082f..c313b60 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c

@@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly;
 /* First wakeup IRQ seen by the kernel in the last cycle. */
 unsigned int pm_wakeup_irq __read_mostly;
 
-/* If greater than 0 and the system is suspending, terminate the suspend. */
-static atomic_t pm_abort_suspend __read_mostly;
+/* If set and the system is suspending, terminate the suspend. */
+static bool pm_abort_suspend __read_mostly;
 
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
@@ -512,13 +512,12 @@ static bool wakeup_source_not_registered(struct wakeup_source *ws)
 /**
  * wakup_source_activate - Mark given wakeup source as active.
  * @ws: Wakeup source to handle.
- * @hard: If set, abort suspends in progress and wake up from suspend-to-idle.
  *
  * Update the @ws' statistics and, if @ws has just been activated, notify the PM
  * core of the event by incrementing the counter of of wakeup events being
  * processed.
  */
-static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
+static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
@@ -526,9 +525,6 @@ static void wakeup_source_activate(struct wakeup_source *ws, bool hard)
 			"unregistered wakeup source\n"))
 		return;
 
-	if (hard)
-		pm_system_wakeup();
-
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
@@ -554,7 +550,10 @@ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard)
 		ws->wakeup_count++;
 
 	if (!ws->active)
-		wakeup_source_activate(ws, hard);
+		wakeup_source_activate(ws);
+
+	if (hard)
+		pm_system_wakeup();
 }
 
 /**
@@ -856,26 +855,20 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret || atomic_read(&pm_abort_suspend) > 0;
+	return ret || pm_abort_suspend;
 }
 
 void pm_system_wakeup(void)
 {
-	atomic_inc(&pm_abort_suspend);
+	pm_abort_suspend = true;
 	freeze_wake();
 }
 EXPORT_SYMBOL_GPL(pm_system_wakeup);
 
-void pm_system_cancel_wakeup(void)
+void pm_wakeup_clear(void)
 {
-	atomic_dec(&pm_abort_suspend);
-}
-
-void pm_wakeup_clear(bool reset)
-{
+	pm_abort_suspend = false;
 	pm_wakeup_irq = 0;
-	if (reset)
-		atomic_set(&pm_abort_suspend, 0);
 }
 
 void pm_system_irq_wakeup(unsigned int irq_number)

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be..245a879 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c

@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
 						 bool SuccessfulIO)
 {
 	struct request *Request = Command->Request;
-	int Error = SuccessfulIO ? 0 : -EIO;
+	blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
 
 	pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
 		Command->SegmentCount, Command->DmaDirection);

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f67..49908c7 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c

@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
 	struct amiga_floppy_struct *floppy;
 	char *data;
 	unsigned long flags;
-	int err;
+	blk_status_t err;
 
 next_req:
 	rq = set_next_request();
@@ -1392,7 +1392,7 @@ static void redo_fd_request(void)
 
 next_segment:
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+	for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
 		       blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ static void redo_fd_request(void)
 #endif
 		block = blk_rq_pos(rq) + cnt;
 		if ((int)block > floppy->blocks) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1413,7 +1413,7 @@ static void redo_fd_request(void)
 #endif
 
 		if (get_track(drive, track) == -1) {
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			break;
 		}
 
@@ -1424,7 +1424,7 @@ static void redo_fd_request(void)
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				err = -EIO;
+				err = BLK_STS_IOERR;
 				break;
 			}
 			/*

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876..6797e6c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
 			d->aoemajor, d->aoeminor);
 		goto err_mempool;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	spin_lock_irqsave(&d->lock, flags);
 	WARN_ON(!(d->flags & DEVFL_GD_NOW));

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c0..dc43254 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c

@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
 		d->ip.rq = NULL;
 	do {
 		bio = rq->bio;
-		bok = !fastfail && !bio->bi_error;
-	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+		bok = !fastfail && !bio->bi_status;
+	} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
 	if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
 			ahout->cmdstat, ahin->cmdstat,
 			d->aoemajor, d->aoeminor);
 noskb:		if (buf)
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1144,7 +1144,7 @@ noskb:		if (buf)
 				"aoe: runt data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 			       skb->len, n);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb:		if (buf)
 				"aoe: too-large data size in read from",
 				(long) d->aoemajor, d->aoeminor,
 				n, f->iter.bi_size);
-			buf->bio->bi_error = -EIO;
+			buf->bio->bi_status = BLK_STS_IOERR;
 			break;
 		}
 		bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
 	if (buf == NULL)
 		return;
 	buf->iter.bi_size = 0;
-	buf->bio->bi_error = -EIO;
+	buf->bio->bi_status = BLK_STS_IOERR;
 	if (buf->nframesout == 0)
 		aoe_end_buf(d, buf);
 }

diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947..b28fefb 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c

@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
 	if (rq == NULL)
 		return;
 	while ((bio = d->ip.nxbio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		d->ip.nxbio = bio->bi_next;
 		n = (unsigned long) rq->special;
 		rq->special = (void *) --n;

diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd..92da886 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c

@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
 {
 	if (!__blk_end_request_cur(fd_request, err))
 		fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
 	fd_request->error_count++;
 	if (fd_request->error_count >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 	}
 	else if (fd_request->error_count == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
 		    }
 		    else {
 			/* all sectors finished */
-			fd_end_request_cur(0);
+			fd_end_request_cur(BLK_STS_OK);
 			redo_fd_request();
 			return;
 		    }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
 	}
 	else {
 		/* all sectors finished */
-		fd_end_request_cur(0);
+		fd_end_request_cur(BLK_STS_OK);
 		redo_fd_request();
 	}
 	return;
@@ -1445,7 +1445,7 @@ static void redo_fd_request(void)
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 		
@@ -1461,12 +1461,12 @@ static void redo_fd_request(void)
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			fd_end_request_cur(-EIO);
+			fd_end_request_cur(BLK_STS_IOERR);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ static void redo_fd_request(void)
 	}
 	
 	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
-		fd_end_request_cur(-EIO);
+		fd_end_request_cur(BLK_STS_IOERR);
 		goto repeat;
 	}
 

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f..6112e99 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c

@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
 
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
-	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
 	/* This is so fdisk will align partitions on 4k, because of
 	 * direct_access API needing 4k alignment, returning a PFN

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd37550..02a6119 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
 	/* set the residual count for pc requests */
 	if (blk_rq_is_passthrough(rq))
 		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-	blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+	blk_end_request_all(rq, scsi_req(rq)->result ?
+			BLK_STS_IOERR : BLK_STS_OK);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
 	disk->queue->queue_lock = &h->lock;
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)
 		goto cleanup_queue;
 

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa..e02c45c 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c

@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	else
 		submit_bio(bio);
 	wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		err = device->md_io.error;
 
  out:

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a41..809fd24 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c

@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
 	    !bm_test_page_unchanged(b->bm_pages[idx]))
 		drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* ctx error will hold the completed-last non-zero error code,
 		 * in case error codes differ. */
-		ctx->error = bio->bi_error;
+		ctx->error = blk_status_to_errno(bio->bi_status);
 		bm_set_page_io_err(b->bm_pages[idx]);
 		/* Not identical to on disk version of it.
 		 * Is BM_PAGE_IO_ERROR enough? */
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
-					bio->bi_error, idx);
+					bio->bi_status, idx);
 	} else {
 		bm_clear_page_io_err(b->bm_pages[idx]);
 		dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45b..d17b6e6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
 extern struct mutex resources_mutex;
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 	__release(local);
 	if (!bio->bi_bdev) {
 		drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
-		bio->bi_error = -ENODEV;
+		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return;
 	}

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c3..5fb99e0 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c

@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
 mempool_t *drbd_ee_mempool;
 mempool_t *drbd_md_io_page_pool;
 struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
 
 	/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
 
+	if (drbd_io_bio_set)
+		bioset_free(drbd_io_bio_set);
 	if (drbd_md_io_bio_set)
 		bioset_free(drbd_md_io_bio_set);
 	if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
 	if (drbd_al_ext_cache)
 		kmem_cache_destroy(drbd_al_ext_cache);
 
+	drbd_io_bio_set      = NULL;
 	drbd_md_io_bio_set   = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_ee_mempool      = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
 	drbd_pp_pool         = NULL;
 	drbd_md_io_page_pool = NULL;
 	drbd_md_io_bio_set   = NULL;
+	drbd_io_bio_set      = NULL;
 
 	/* caches */
 	drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
 		goto Enomem;
 
 	/* mempools */
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+	drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+	if (drbd_io_bio_set == NULL)
+		goto Enomem;
+
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER);
 	if (drbd_md_io_bio_set == NULL)
 		goto Enomem;
 
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	q->queue_lock = &resource->req_lock;
 
 	device->md_io.page = alloc_page(GFP_KERNEL);

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0..ad0fcb4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c

@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
 static enum drbd_ret_code
 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
 {
-	static enum drbd_ret_code rv;
+	enum drbd_ret_code rv;
 	struct drbd_peer_device *peer_device;
 	int i;
 

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be..c7e95e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c

@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
 	struct drbd_device *device = octx->device;
 	struct issue_flush_context *ctx = octx->ctx;
 
-	if (bio->bi_error) {
-		ctx->error = bio->bi_error;
-		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+	if (bio->bi_status) {
+		ctx->error = blk_status_to_errno(bio->bi_status);
+		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
 	}
 	kfree(octx);
 	bio_put(bio);

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 6566243..f6e865b 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c

@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
 void complete_master_bio(struct drbd_device *device,
 		struct bio_and_error *m)
 {
-	m->bio->bi_error = m->error;
+	m->bio->bi_status = errno_to_blk_status(m->error);
 	bio_endio(m->bio);
 	dec_ap_bio(device);
 }
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
 
 	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
 			GFP_NOIO, 0))
-		req->private_bio->bi_error = -EIO;
+		req->private_bio->bi_status = BLK_STS_IOERR;
 	bio_endio(req->private_bio);
 }
 
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		/* only pass the error to the upper layers.
 		 * if user cannot handle io errors, that's not our business. */
 		drbd_err(device, "could not kmalloc() req\n");
-		bio->bi_error = -ENOMEM;
+		bio->bi_status = BLK_STS_RESOURCE;
 		bio_endio(bio);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
 	unsigned long start_jif;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	start_jif = jiffies;
 

diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f..9e1866a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h

@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
 	struct bio *bio;
-	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
 
 	req->private_bio = bio;
 

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e..1d8726a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c

@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
 	struct drbd_device *device;
 
 	device = bio->bi_private;
-	device->md_io.error = bio->bi_error;
+	device->md_io.error = blk_status_to_errno(bio->bi_status);
 
 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
 	 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
 	bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
 			  bio_op(bio) == REQ_OP_DISCARD;
 
-	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+	if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
 		drbd_warn(device, "%s: error=%d s=%llus\n",
 				is_write ? (is_discard ? "discard" : "write")
-					: "read", bio->bi_error,
+					: "read", bio->bi_status,
 				(unsigned long long)peer_req->i.sector);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
 	bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
 		if (__ratelimit(&drbd_ratelimit_state))
 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
-		if (!bio->bi_error)
+		if (!bio->bi_status)
 			drbd_panic_after_delayed_completion_of_aborted_request(device);
 	}
 
 	/* to avoid recursion in __req_mod */
-	if (unlikely(bio->bi_error)) {
+	if (unlikely(bio->bi_status)) {
 		switch (bio_op(bio)) {
 		case REQ_OP_WRITE_ZEROES:
 		case REQ_OP_DISCARD:
-			if (bio->bi_error == -EOPNOTSUPP)
+			if (bio->bi_status == BLK_STS_NOTSUPP)
 				what = DISCARD_COMPLETED_NOTSUPP;
 			else
 				what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
 	}
 
 	bio_put(req->private_bio);
-	req->private_bio = ERR_PTR(bio->bi_error);
+	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c76..ce82364 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c

@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
  * =============================
  */
 
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
 {
 	unsigned int nr_sectors = current_count_sectors;
 	unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
-		floppy_end_request(req, -EIO);
+		floppy_end_request(req, BLK_STS_IOERR);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
 }
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
 	struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
 	int drive = cbdata->drive;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		pr_info("floppy: error %d while reading block 0\n",
-			bio->bi_error);
+			bio->bi_status);
 		set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
 	}
 	complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
 			goto out_put_disk;
 		}
 
+		blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
 		blk_queue_max_hw_sectors(disks[drive]->queue, 64);
 		disks[drive]->major = FLOPPY_MAJOR;
 		disks[drive]->first_minor = TOMINOR(drive);

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28d9329..0de1144 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c

@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 }
 
 static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+		 loff_t logical_blocksize)
 {
 	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
 	sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 		lo->lo_offset = offset;
 	if (lo->lo_sizelimit != sizelimit)
 		lo->lo_sizelimit = sizelimit;
+	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+		lo->lo_logical_blocksize = logical_blocksize;
+		blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+		blk_queue_logical_block_size(lo->lo_queue,
+					     lo->lo_logical_blocksize);
+	}
 	set_capacity(lo->lo_disk, x);
 	bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
 	/* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
 		zero_fill_bio(bio);
 	}
 
-	blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+	blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -608,6 +615,9 @@ static int loop_switch(struct loop_device *lo, struct file *file)
  */
 static int loop_flush(struct loop_device *lo)
 {
+	/* loop not yet configured, no running thread, nothing to flush */
+	if (lo->lo_state != Lo_bound)
+		return 0;
 	return loop_switch(lo, NULL);
 }
 
@@ -810,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
 	struct file *file = lo->lo_backing_file;
 	struct inode *inode = file->f_mapping->host;
 	struct request_queue *q = lo->lo_queue;
+	int lo_bits = 9;
 
 	/*
 	 * We use punch hole to reclaim the free space used by the
@@ -829,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
 
 	q->limits.discard_granularity = inode->i_sb->s_blocksize;
 	q->limits.discard_alignment = 0;
-	blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+		lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+	blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -840,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
 	kthread_stop(lo->worker_task);
 }
 
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+	current->flags |= PF_LESS_THROTTLE;
+	return kthread_worker_fn(worker_ptr);
+}
+
 static int loop_prepare_queue(struct loop_device *lo)
 {
 	kthread_init_worker(&lo->worker);
-	lo->worker_task = kthread_run(kthread_worker_fn,
+	lo->worker_task = kthread_run(loop_kthread_worker_fn,
 			&lo->worker, "loop%d", lo->lo_number);
 	if (IS_ERR(lo->worker_task))
 		return -ENOMEM;
@@ -918,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
 	lo->use_dio = false;
 	lo->lo_blocksize = lo_blocksize;
+	lo->lo_logical_blocksize = 512;
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
@@ -1083,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	int err;
 	struct loop_func_table *xfer;
 	kuid_t uid = current_uid();
+	int lo_flags = lo->lo_flags;
 
 	if (lo->lo_encrypt_key_size &&
 	    !uid_eq(lo->lo_key_owner, uid) &&
@@ -1115,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	if (err)
 		goto exit;
 
+	if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+		if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+			lo->lo_logical_blocksize = 512;
+		lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+		if (LO_INFO_BLOCKSIZE(info) != 512 &&
+		    LO_INFO_BLOCKSIZE(info) != 1024 &&
+		    LO_INFO_BLOCKSIZE(info) != 2048 &&
+		    LO_INFO_BLOCKSIZE(info) != 4096)
+			return -EINVAL;
+		if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+			return -EINVAL;
+	}
+
 	if (lo->lo_offset != info->lo_offset ||
-	    lo->lo_sizelimit != info->lo_sizelimit)
-		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+	    lo->lo_sizelimit != info->lo_sizelimit ||
+	    lo->lo_flags != lo_flags ||
+	    ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+	     lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+				     LO_INFO_BLOCKSIZE(info))) {
 			err = -EFBIG;
 			goto exit;
 		}
+	}
 
 	loop_config_discard(lo);
 
@@ -1303,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 	return err;
 }
 
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
 {
 	if (unlikely(lo->lo_state != Lo_bound))
 		return -ENXIO;
 
-	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+				lo->lo_logical_blocksize);
 }
 
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1366,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	case LOOP_SET_CAPACITY:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
-			err = loop_set_capacity(lo, bdev);
+			err = loop_set_capacity(lo);
 		break;
 	case LOOP_SET_DIRECT_IO:
 		err = -EPERM;
@@ -1642,7 +1683,7 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1651,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	if (lo->lo_state != Lo_bound)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	switch (req_op(cmd->rq)) {
 	case REQ_OP_FLUSH:
@@ -1666,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	kthread_queue_work(&lo->worker, &cmd->work);
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void loop_handle_cmd(struct loop_cmd *cmd)

diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f9..2c096b9 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h

@@ -49,6 +49,7 @@ struct loop_device {
 	struct file *	lo_backing_file;
 	struct block_device *lo_device;
 	unsigned	lo_blocksize;
+	unsigned	lo_logical_blocksize;
 	void		*key_data; 
 
 	gfp_t		old_gfp_mask;

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4..61b046f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c

@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
 						struct smart_attr *attrib);
 
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
-		mtip_complete_command(cmd, -EIO);
+		mtip_complete_command(cmd, BLK_STS_IOERR);
 		return;
 	}
 
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 					tag,
 					fail_reason != NULL ?
 						fail_reason : "unknown");
-					mtip_complete_command(cmd, -ENODATA);
+					mtip_complete_command(cmd, BLK_STS_MEDIUM);
 					continue;
 				}
 			}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 			dev_warn(&port->dd->pdev->dev,
 				"retiring tag %d\n", tag);
 
-			mtip_complete_command(cmd, -EIO);
+			mtip_complete_command(cmd, BLK_STS_IOERR);
 		}
 	}
 	print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	/* insert request and run queue */
 	blk_execute_rq(rq->q, NULL, rq, true);
 
-	rv = int_cmd->status;
-	if (rv < 0) {
-		if (rv == -ERESTARTSYS) { /* interrupted */
-			dev_err(&dd->pdev->dev,
-				"Internal command [%02X] was interrupted after %u ms\n",
-				fis->command,
-				jiffies_to_msecs(jiffies - start));
-			rv = -EINTR;
-			goto exec_ic_exit;
-		} else if (rv == 0) /* timeout */
-			dev_err(&dd->pdev->dev,
-				"Internal command did not complete [%02X] within timeout of  %lu ms\n",
-				fis->command, timeout);
-		else
-			dev_err(&dd->pdev->dev,
-				"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
-				fis->command, rv, timeout);
+	if (int_cmd->status) {
+		dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+				fis->command, int_cmd->status);
+		rv = -EIO;
 
 		if (mtip_check_surprise_removal(dd->pdev) ||
 			test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	mtip_softirq_done_fn(req);
 }
 
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_request(rq, err);
+		blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
 		return 0;
 	}
 
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
-				   struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+		struct request *rq)
 {
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 	struct mtip_cmd_sg *command_sg;
 
 	if (mtip_commands_active(dd->port))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	/* Populate the SG list */
 	cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(rq);
 	mtip_issue_non_ncq_command(dd->port, rq->tag);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return 0;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return mtip_issue_reserved_cmd(hctx, rq);
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	blk_mq_start_request(rq);
 
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
-		return BLK_MQ_RQ_QUEUE_OK;
-
-	return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_OK;
+	return BLK_STS_IOERR;
 }
 
 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 	if (reserved) {
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-		cmd->status = -ETIME;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	cmd->status = -ENODEV;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(rq);
 }
 

diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e..e8286af 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h

@@ -342,7 +342,7 @@ struct mtip_cmd {
 	int retries; /* The number of retries left for this command. */
 
 	int direction; /* Data transfer direction */
-	int status;
+	blk_status_t status;
 };
 
 /* Structure used to describe a port. */

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9a7bb2c..977ec96 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c

@@ -116,7 +116,7 @@ struct nbd_cmd {
 	int index;
 	int cookie;
 	struct completion send_complete;
-	int status;
+	blk_status_t status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	struct nbd_config *config;
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_TIMEOUT;
 		return BLK_EH_HANDLED;
 	}
 
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 				    "Connection timed out\n");
 	}
 	set_bit(NBD_TIMEDOUT, &config->runtime_flags);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
 
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	unsigned long size = blk_rq_bytes(req);
 	struct bio *bio;
 	u32 type;
+	u32 nbd_cmd_flags = 0;
 	u32 tag = blk_mq_unique_tag(req);
 	int sent = nsock->sent, skip = 0;
 
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		return -EIO;
 	}
 
+	if (req->cmd_flags & REQ_FUA)
+		nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
 	/* We did a partial send previously, and we at least sent the whole
 	 * request struct, so just go and send the rest of the pages in the
 	 * request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	}
 	cmd->index = index;
 	cmd->cookie = nsock->cookie;
-	request.type = htonl(type);
+	request.type = htonl(type | nbd_cmd_flags);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				nsock->pending = req;
 				nsock->sent = sent;
 			}
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 					 */
 					nsock->pending = req;
 					nsock->sent = sent;
-					return BLK_MQ_RQ_QUEUE_BUSY;
+					return BLK_STS_RESOURCE;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		cmd->status = -EIO;
+		cmd->status = BLK_STS_IOERR;
 		return cmd;
 	}
 
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 				 */
 				if (nbd_disconnected(config) ||
 				    config->num_connections <= 1) {
-					cmd->status = -EIO;
+					cmd->status = BLK_STS_IOERR;
 					return cmd;
 				}
 				return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	cmd->status = -EIO;
+	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 }
 
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		nbd_config_put(nbd);
 		return -EINVAL;
 	}
-	cmd->status = 0;
+	cmd->status = BLK_STS_OK;
 again:
 	nsock = config->socks[index];
 	mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 	return ret;
 }
 
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * appropriate.
 	 */
 	ret = nbd_handle_cmd(cmd, hctx->queue_num);
-	if (ret < 0)
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
-	if (!ret)
-		ret = BLK_MQ_RQ_QUEUE_OK;
 	complete(&cmd->send_complete);
 
-	return ret;
+	return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 			continue;
 		}
 		sk_set_memalloc(sock->sk);
+		sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
 		atomic_inc(&config->recv_threads);
 		refcount_inc(&nbd->config_refs);
 		old = nsock->sock;
@@ -937,14 +938,6 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
 	return -ENOSPC;
 }
 
-/* Reset all properties of an NBD device */
-static void nbd_reset(struct nbd_device *nbd)
-{
-	nbd->config = NULL;
-	nbd->tag_set.timeout = 0;
-	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-}
-
 static void nbd_bdev_reset(struct block_device *bdev)
 {
 	if (bdev->bd_openers > 1)
@@ -965,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
 		set_disk_ro(nbd->disk, false);
 	if (config->flags & NBD_FLAG_SEND_TRIM)
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-	if (config->flags & NBD_FLAG_SEND_FLUSH)
-		blk_queue_write_cache(nbd->disk->queue, true, false);
+	if (config->flags & NBD_FLAG_SEND_FLUSH) {
+		if (config->flags & NBD_FLAG_SEND_FUA)
+			blk_queue_write_cache(nbd->disk->queue, true, true);
+		else
+			blk_queue_write_cache(nbd->disk->queue, true, false);
+	}
 	else
 		blk_queue_write_cache(nbd->disk->queue, false, false);
 }
@@ -1029,7 +1026,11 @@ static void nbd_config_put(struct nbd_device *nbd)
 			}
 			kfree(config->socks);
 		}
-		nbd_reset(nbd);
+		kfree(nbd->config);
+		nbd->config = NULL;
+
+		nbd->tag_set.timeout = 0;
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 
 		mutex_unlock(&nbd->config_lock);
 		nbd_put(nbd);
@@ -1075,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
 			return -ENOMEM;
 		}
 		sk_set_memalloc(config->socks[i]->sock->sk);
+		config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
 		atomic_inc(&config->recv_threads);
 		refcount_inc(&nbd->config_refs);
 		INIT_WORK(&args->work, recv_work);
@@ -1309,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
 		seq_puts(s, "NBD_FLAG_READ_ONLY\n");
 	if (flags & NBD_FLAG_SEND_FLUSH)
 		seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+	if (flags & NBD_FLAG_SEND_FUA)
+		seq_puts(s, "NBD_FLAG_SEND_FUA\n");
 	if (flags & NBD_FLAG_SEND_TRIM)
 		seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
 
@@ -1483,7 +1487,6 @@ static int nbd_dev_add(int index)
 	disk->fops = &nbd_fops;
 	disk->private_data = nbd;
 	sprintf(disk->disk_name, "nbd%d", index);
-	nbd_reset(nbd);
 	add_disk(disk);
 	nbd_total_devices++;
 	return index;

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1e..71f4422 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c

@@ -35,7 +35,8 @@ struct nullb {
 	struct request_queue *q;
 	struct gendisk *disk;
 	struct nvm_dev *ndev;
-	struct blk_mq_tag_set tag_set;
+	struct blk_mq_tag_set *tag_set;
+	struct blk_mq_tag_set __tag_set;
 	struct hrtimer timer;
 	unsigned int queue_depth;
 	spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
 static int null_major;
 static int nullb_indexes;
 static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
 
 enum {
 	NULL_IRQ_NONE		= 0,
@@ -109,7 +111,7 @@ static int bs = 512;
 module_param(bs, int, S_IRUGO);
 MODULE_PARM_DESC(bs, "Block size (in bytes)");
 
-static int nr_devices = 2;
+static int nr_devices = 1;
 module_param(nr_devices, int, S_IRUGO);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
@@ -121,6 +123,10 @@ static bool blocking;
 module_param(blocking, bool, S_IRUGO);
 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
 
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
 
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_request(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, BLK_STS_OK);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
-		blk_end_request_all(cmd->rq, 0);
+		blk_end_request_all(cmd->rq, BLK_STS_OK);
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
-	return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
-	BUG_ON(!nullb);
-	BUG_ON(!nq);
-
-	init_waitqueue_head(&nq->wait);
-	nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			  unsigned int index)
-{
-	struct nullb *nullb = data;
-	struct nullb_queue *nq = &nullb->queues[index];
-
-	hctx->driver_data = nq;
-	null_init_queue(nullb, nq);
-	nullb->nr_queues++;
-
-	return 0;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
-	.init_hctx	= null_init_hctx,
 	.complete	= null_softirq_done_fn,
 };
 
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
 
 #ifdef CONFIG_NVM
 
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
-	rqd->error = error;
+	/* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+	rqd->error = status ? -EIO : 0;
 	nvm_end_io(rqd);
 
 	blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
 	else
 		del_gendisk(nullb->disk);
 	blk_cleanup_queue(nullb->q);
-	if (queue_mode == NULL_Q_MQ)
-		blk_mq_free_tag_set(&nullb->tag_set);
+	if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+		blk_mq_free_tag_set(nullb->tag_set);
 	if (!use_lightnvm)
 		put_disk(nullb->disk);
 	cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
 	.release =	null_release,
 };
 
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+	BUG_ON(!nullb);
+	BUG_ON(!nq);
+
+	init_waitqueue_head(&nq->wait);
+	nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+	struct request_queue *q = nullb->q;
+	struct blk_mq_hw_ctx *hctx;
+	struct nullb_queue *nq;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!hctx->nr_ctx || !hctx->tags)
+			continue;
+		nq = &nullb->queues[i];
+		hctx->driver_data = nq;
+		null_init_queue(nullb, nq);
+		nullb->nr_queues++;
+	}
+}
+
 static int setup_commands(struct nullb_queue *nq)
 {
 	struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
 	return 0;
 }
 
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+	set->ops = &null_mq_ops;
+	set->nr_hw_queues = submit_queues;
+	set->queue_depth = hw_queue_depth;
+	set->numa_node = home_node;
+	set->cmd_size	= sizeof(struct nullb_cmd);
+	set->flags = BLK_MQ_F_SHOULD_MERGE;
+	set->driver_data = NULL;
+
+	if (blocking)
+		set->flags |= BLK_MQ_F_BLOCKING;
+
+	return blk_mq_alloc_tag_set(set);
+}
+
 static int null_add_dev(void)
 {
 	struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
 		goto out_free_nullb;
 
 	if (queue_mode == NULL_Q_MQ) {
-		nullb->tag_set.ops = &null_mq_ops;
-		nullb->tag_set.nr_hw_queues = submit_queues;
-		nullb->tag_set.queue_depth = hw_queue_depth;
-		nullb->tag_set.numa_node = home_node;
-		nullb->tag_set.cmd_size	= sizeof(struct nullb_cmd);
-		nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-		nullb->tag_set.driver_data = nullb;
+		if (shared_tags) {
+			nullb->tag_set = &tag_set;
+			rv = 0;
+		} else {
+			nullb->tag_set = &nullb->__tag_set;
+			rv = null_init_tag_set(nullb->tag_set);
+		}
 
-		if (blocking)
-			nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
-		rv = blk_mq_alloc_tag_set(&nullb->tag_set);
 		if (rv)
 			goto out_cleanup_queues;
 
-		nullb->q = blk_mq_init_queue(&nullb->tag_set);
+		nullb->q = blk_mq_init_queue(nullb->tag_set);
 		if (IS_ERR(nullb->q)) {
 			rv = -ENOMEM;
 			goto out_cleanup_tags;
 		}
+		null_init_queues(nullb);
 	} else if (queue_mode == NULL_Q_BIO) {
 		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
 		if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
 out_cleanup_blk_queue:
 	blk_cleanup_queue(nullb->q);
 out_cleanup_tags:
-	if (queue_mode == NULL_Q_MQ)
-		blk_mq_free_tag_set(&nullb->tag_set);
+	if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+		blk_mq_free_tag_set(nullb->tag_set);
 out_cleanup_queues:
 	cleanup_queues(nullb);
 out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
 		queue_mode = NULL_Q_MQ;
 	}
 
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		null_init_tag_set(&tag_set);
+
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
 	}
 	mutex_unlock(&lock);
 
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		blk_mq_free_tag_set(&tag_set);
+
 	kmem_cache_destroy(ppa_cache);
 }
 

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef..7b8c636 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c

@@ -305,6 +305,7 @@ static void pcd_init_units(void)
 			put_disk(disk);
 			continue;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		cd->disk = disk;
 		cd->pi = &cd->pia;
 		cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else {
-			__blk_end_request_all(pcd_req, -EIO);
+			__blk_end_request_all(pcd_req, BLK_STS_IOERR);
 			pcd_req = NULL;
 		}
 	}
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
 	pcd_request();
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -837,7 +838,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f..27a44b9 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c

@@ -438,7 +438,7 @@ static void run_fsm(void)
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
-						res == Ok ? 0 : -EIO)) {
+						res == Ok ? 0 : BLK_STS_IOERR)) {
 					if (!set_next_request())
 						stop = 1;
 				}
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
 		return;
 	}
 	blk_queue_max_hw_sectors(p->queue, cluster);
+	blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
 
 	if (disk->drive == -1) {
 		for (disk->drive = 0; disk->drive <= 1; disk->drive++)

diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca73..eef7a91 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c

@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
 			return;
 		}
 		blk_queue_max_segments(disk->queue, cluster);
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		pf->disk = disk;
 		pf->pi = &pf->pia;
 		pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
 	return pf_req != NULL;
 }
 
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
 {
 	if (pf_req && !__blk_end_request_cur(pf_req, err))
 		pf_req = NULL;
@@ -821,7 +822,7 @@ static void pf_request(void)
 	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 
@@ -836,7 +837,7 @@ static void pf_request(void)
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(-EIO);
+		pf_end_request(BLK_STS_IOERR);
 		goto repeat;
 	}
 }
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
 	unsigned long saved_flags;
 
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(-EIO);
+			next_request(BLK_STS_IOERR);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(-EIO);
+		next_request(BLK_STS_IOERR);
 		return;
 	}
 	pi_disconnect(pf_current->pi);

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865..467beca 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c

@@ -98,6 +98,7 @@ static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
 static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
 
 static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
 static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	scsi_req_init(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
 
 	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
-		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&pkt->io_errors);
 	if (atomic_dec_and_test(&pkt->io_wait)) {
 		atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
 	pd->stats.pkt_ended++;
 
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	pkt_queue_bio(pd, pkt->w_bio);
 }
 
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 {
 	struct bio *bio;
 
-	if (error)
+	if (status)
 		pkt->cache_valid = 0;
 
 	/* Finish all bios corresponding to this packet */
 	while ((bio = bio_list_pop(&pkt->orig_bios))) {
-		bio->bi_error = error;
+		bio->bi_status = status;
 		bio_endio(bio);
 	}
 }
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			if (atomic_read(&pkt->io_wait) > 0)
 				return;
 
-			if (!pkt->w_bio->bi_error) {
+			if (!pkt->w_bio->bi_status) {
 				pkt_set_state(pkt, PACKET_FINISHED_STATE);
 			} else {
 				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 			break;
 
 		case PACKET_FINISHED_STATE:
-			pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
 			return;
 
 		default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 	struct packet_stacked_data *psd = bio->bi_private;
 	struct pktcdvd_device *pd = psd->pd;
 
-	psd->bio->bi_error = bio->bi_error;
+	psd->bio->bi_status = bio->bi_status;
 	bio_put(bio);
 	bio_endio(psd->bio);
 	mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
 	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
 
 	psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 	char b[BDEVNAME_SIZE];
 	struct bio *split;
 
-	blk_queue_bounce(q, &bio);
-
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	pd = q->queuedata;
 	if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
 			split = bio_split(bio, last_zone -
 					  bio->bi_iter.bi_sector,
-					  GFP_NOIO, fs_bio_set);
+					  GFP_NOIO, pkt_bio_set);
 			bio_chain(split, bio);
 		} else {
 			split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	bdev = bdget(dev);
 	if (!bdev)
 		return -ENOMEM;
+	if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		bdput(bdev);
+		return -EINVAL;
+	}
 	ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
 	if (ret)
 		return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
 					sizeof(struct packet_stacked_data));
 	if (!psd_pool)
 		return -ENOMEM;
+	pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!pkt_bio_set) {
+		mempool_destroy(psd_pool);
+		return -ENOMEM;
+	}
 
 	ret = register_blkdev(pktdev_major, DRIVER_NAME);
 	if (ret < 0) {
@@ -2951,6 +2959,7 @@ static int __init pkt_init(void)
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 out2:
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 	return ret;
 }
 
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
 
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 	mempool_destroy(psd_pool);
+	bioset_free(pkt_bio_set);
 }
 
 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e..075662f 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c

@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 		return 0;
 	}
 
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 			break;
 		default:
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 	}
 }
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	struct ps3_storage_device *dev = data;
 	struct ps3disk_private *priv;
 	struct request *req;
-	int res, read, error;
+	int res, read;
+	blk_status_t error;
 	u64 tag, status;
 	const char *op;
 
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	if (status) {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
 			__LINE__, op, status);
-		error = -EIO;
+		error = BLK_STS_IOERR;
 	} else {
 		dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
 			__LINE__, op);

diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe..e0e81ca 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c

@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
 	kfree(priv->cache.tags);
 }
 
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 			size_t len, size_t *retlen, u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 		(unsigned int)from, len);
 
 	if (from >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - from)
 		len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
 	return 0;
 }
 
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
 			 size_t len, size_t *retlen, const u_char *buf)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	unsigned int cached, count;
 
 	if (to >= priv->size)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	if (len > priv->size - to)
 		len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
 	loff_t offset = bio->bi_iter.bi_sector << 9;
-	int error = 0;
+	blk_status_t error = 0;
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 
 		if (retlen != len) {
 			dev_err(&dev->core, "Short %s\n", op);
-			error = -EIO;
+			error = BLK_STS_IOERR;
 			goto out;
 		}
 
@@ -593,7 +593,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 	next = bio_list_peek(&priv->list);
 	spin_unlock_irq(&priv->lock);
 
-	bio->bi_error = error;
+	bio->bi_status = error;
 	bio_endio(bio);
 	return next;
 }
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
 	dev_dbg(&dev->core, "%s\n", __func__);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&priv->lock);
 	busy = !bio_list_empty(&priv->list);

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 454bf9c..b008b6a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
 
+static struct bio_set		*rbd_bio_clone;
+
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
 {
 	struct bio *bio;
 
-	bio = bio_clone(bio_src, gfpmask);
+	bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
 	if (!bio)
 		return NULL;	/* ENOMEM */
 
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 		rbd_assert(img_request->obj_request != NULL);
 		more = obj_request->which < img_request->obj_request_count - 1;
 	} else {
+		blk_status_t status = errno_to_blk_status(result);
+
 		rbd_assert(img_request->rq != NULL);
 
-		more = blk_update_request(img_request->rq, result, xferred);
+		more = blk_update_request(img_request->rq, status, xferred);
 		if (!more)
-			__blk_mq_end_request(img_request->rq, result);
+			__blk_mq_end_request(img_request->rq, status);
 	}
 
 	return more;
@@ -4023,6 +4027,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 
 	switch (req_op(rq)) {
 	case REQ_OP_DISCARD:
+	case REQ_OP_WRITE_ZEROES:
 		op_type = OBJ_OP_DISCARD;
 		break;
 	case REQ_OP_WRITE:
@@ -4149,17 +4154,17 @@ static void rbd_queue_workfn(struct work_struct *work)
 			 obj_op_name(op_type), length, offset, result);
 	ceph_put_snap_context(snapc);
 err:
-	blk_mq_end_request(rq, result);
+	blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
 	struct work_struct *work = blk_mq_rq_to_pdu(rq);
 
 	queue_work(rbd_wq, work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -4420,6 +4425,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	q->limits.discard_granularity = segment_size;
 	q->limits.discard_alignment = segment_size;
 	blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
+	blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
 
 	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
 		q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
@@ -6412,8 +6418,16 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
+	rbd_assert(!rbd_bio_clone);
+	rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!rbd_bio_clone)
+		goto out_err_clone;
+
 	return 0;
 
+out_err_clone:
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
 out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
@@ -6429,6 +6443,10 @@ static void rbd_slab_exit(void)
 	rbd_assert(rbd_img_request_cache);
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
+
+	rbd_assert(rbd_bio_clone);
+	bioset_free(rbd_bio_clone);
+	rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c56636..7f4aceb 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c

@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct rsxx_cardinfo *card = q->queuedata;
 	struct rsxx_bio_meta *bio_meta;
-	int st = -EINVAL;
+	blk_status_t st = BLK_STS_IOERR;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	might_sleep();
 
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (bio_end_sector(bio) > get_capacity(card->gendisk))
 		goto req_err;
 
-	if (unlikely(card->halt)) {
-		st = -EFAULT;
+	if (unlikely(card->halt))
 		goto req_err;
-	}
 
-	if (unlikely(card->dma_fault)) {
-		st = (-EFAULT);
+	if (unlikely(card->dma_fault))
 		goto req_err;
-	}
 
 	if (bio->bi_iter.bi_size == 0) {
 		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
 	if (!bio_meta) {
-		st = -ENOMEM;
+		st = BLK_STS_RESOURCE;
 		goto req_err;
 	}
 
@@ -205,7 +201,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 	kmem_cache_free(bio_meta_pool, bio_meta);
 req_err:
 	if (st)
-		bio->bi_error = st;
+		bio->bi_status = st;
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 	}
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
-	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
 	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
 	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
 

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f8..6a1b217 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c

@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
 	mutex_unlock(&ctrl->work_lock);
 }
 
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
 				  struct list_head *q,
 				  unsigned int laddr,
 				  rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = HW_CMD_BLK_DISCARD;
 	dma->laddr        = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
 			      struct list_head *q,
 			      int dir,
 			      unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 
 	dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
 	dma->laddr        = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	unsigned int dma_len;
 	int dma_cnt[RSXX_MAX_TARGETS];
 	int tgt;
-	int st;
+	blk_status_t st;
 	int i;
 
 	addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	for (i = 0; i < card->n_targets; i++)
 		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
 					FREE_DMA);
-
 	return st;
 }
 

diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d..277f27e6 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h

@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,

diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4..d036868 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c

@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+		struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
 			       struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
 		if (req == NULL)
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 }
 
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
 	struct request *req = NULL;
 	struct skd_scsi_request *scsi_req;
 	unsigned long io_flags;
-	int error;
 	u32 lba;
 	u32 count;
 	int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
 		if (!req->bio)
 			goto skip_sg;
 
-		error = skd_preop_sg_list(skdev, skreq);
-
-		if (error != 0) {
+		if (!skd_preop_sg_list(skdev, skreq)) {
 			/*
 			 * Complete the native request with error.
 			 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
 			 */
 			pr_debug("%s:%s:%d error Out\n",
 				 skdev->name, __func__, __LINE__);
-			skd_end_request(skdev, skreq, error);
+			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -805,7 +802,7 @@ static void skd_request_fn(struct request_queue *q)
 }
 
 static void skd_end_request(struct skd_device *skdev,
-			    struct skd_request_context *skreq, int error)
+		struct skd_request_context *skreq, blk_status_t error)
 {
 	if (unlikely(error)) {
 		struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
 	__blk_end_request_all(skreq->req, error);
 }
 
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq)
 {
 	struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 
 	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	/*
 	 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 	 */
 	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
 	if (n_sg <= 0)
-		return -EINVAL;
+		return false;
 
 	SKD_ASSERT(n_sg <= skdev->sgs_per_request);
 
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 		}
 	}
 
-	return 0;
+	return true;
 }
 
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 	switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-		skd_end_request(skdev, skreq, 0);
+		skd_end_request(skdev, skreq, BLK_STS_OK);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
-		skd_end_request(skdev, skreq, -EIO);
+		skd_end_request(skdev, skreq, BLK_STS_IOERR);
 		break;
 	}
 }
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 			 * native request.
 			 */
 			if (likely(cmp_status == SAM_STAT_GOOD))
-				skd_end_request(skdev, skreq, 0);
+				skd_end_request(skdev, skreq, BLK_STS_OK);
 			else
 				skd_resolve_req_exception(skdev, skreq);
 		}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
 			    SKD_MAX_RETRIES)
 				blk_requeue_request(skdev->queue, skreq->req);
 			else
-				skd_end_request(skdev, skreq, -EIO);
+				skd_end_request(skdev, skreq, BLK_STS_IOERR);
 
 			skreq->req = NULL;
 
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
 		rc = -ENOMEM;
 		goto err_out;
 	}
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	skdev->queue = q;
 	disk->queue = q;

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab..6b16ead 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c

@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
 	rqe->req = NULL;
 
-	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+	__blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
 
 	vdc_blk_queue_start(port);
 }
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
 	struct request *req;
 
 	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 }
 
 static void vdc_ldc_reset_timer(unsigned long _arg)

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6..84434d3 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c

@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
 	return ret;
 }
 
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
 			       int req_sector, int sectors_nb,
 			       unsigned char *buffer)
 {
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
 			ret = swim_read_sector(fs, side, track, sector,
 						buffer);
 			if (try-- == 0)
-				return -EIO;
+				return BLK_STS_IOERR;
 		} while (ret != 512);
 
 		buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
 
 	req = swim_next_request(swd);
 	while (req) {
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		fs = req->rq_disk->private_data;
 		if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
 			put_disk(swd->unit[drive].disk);
 			goto exit_put_disks;
 		}
+		blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+				BLK_BOUNCE_HIGH);
 		swd->unit[drive].disk->queue->queuedata = swd;
 		swd->unit[drive].swd = swd;
 	}

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c..9f931f8 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c

@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
 {
 	struct request *req = fs->cur_req;
 	int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
 		if (fs->mdev->media_bay &&
 		    check_media_bay(fs->mdev->media_bay) != MB_FD) {
 			swim3_dbg("%s", "  media bay absent, dropping req\n");
-			swim3_end_request(fs, -ENODEV, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
 		if (blk_rq_pos(req) >= fs->total_secs) {
 			swim3_dbg("  pos out of bounds (%ld, max is %ld)\n",
 				  (long)blk_rq_pos(req), (long)fs->total_secs);
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 		if (fs->ejected) {
 			swim3_dbg("%s", "  disk ejected\n");
-			swim3_end_request(fs, -EIO, 0);
+			swim3_end_request(fs, BLK_STS_IOERR, 0);
 			continue;
 		}
 
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
 				swim3_dbg("%s", "  try to write, disk write protected\n");
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				continue;
 			}
 		}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
 				if (fs->retries > 5) {
 					swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
 						  fs->req_cyl, fs->cur_cyl);
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					return;
 				}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		swim3_end_request(fs, -EIO, 0);
+		swim3_end_request(fs, BLK_STS_IOERR, 0);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	swim3_err("%s", "Seek timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
 		goto unlock;
 	}
 	swim3_err("%s", "Seek settle timeout\n");
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
  unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
 	swim3_err("Timeout %sing sector %ld\n",
 	       (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
 	       (long)blk_rq_pos(fs->cur_req));
-	swim3_end_request(fs, -EIO, 0);
+	swim3_end_request(fs, BLK_STS_IOERR, 0);
 	fs->state = idle;
 	start_request(fs);
 	spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("%s", "Seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					swim3_end_request(fs, -EIO, 0);
+					swim3_end_request(fs, BLK_STS_IOERR, 0);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("Error %sing block %ld (err=%x)\n",
 				       rq_data_dir(req) == WRITE? "writ": "read",
 				       (long)blk_rq_pos(req), err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 			}
 		} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
 				swim3_err("  state=%d, dir=%x, intr=%x, err=%x\n",
 					  fs->state, rq_data_dir(req), intr, err);
-				swim3_end_request(fs, -EIO, 0);
+				swim3_end_request(fs, BLK_STS_IOERR, 0);
 				fs->state = idle;
 				start_request(fs);
 				break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
 		put_disk(disk);
 		return -ENOMEM;
 	}
+	blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 	disk->queue->queuedata = &floppy_states[index];
 
 	if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
 	return 0;
 }
 
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
 {
 	{
 	.name		= "swim3",

diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072c..08586dc 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c

@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
 
 static inline void carm_end_request_queued(struct carm_host *host,
 					   struct carm_request *crq,
-					   int error)
+					   blk_status_t error)
 {
 	struct request *req = crq->rq;
 	int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
 }
 
 static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
-			       int error)
+			       blk_status_t error)
 {
 	carm_end_request_queued(host, crq, error);
 	if (max_queue == 1)
@@ -869,14 +869,14 @@ static void carm_rq_fn(struct request_queue *q)
 	sg = &crq->sg[0];
 	n_elem = blk_rq_map_sg(q, rq, sg);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 
 	/* map scatterlist to PCI bus addresses */
 	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
 	if (n_elem <= 0) {
-		carm_end_rq(host, crq, -EIO);
+		carm_end_rq(host, crq, BLK_STS_IOERR);
 		return;		/* request with no s/g entries? */
 	}
 	crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ static void carm_rq_fn(struct request_queue *q)
 
 static void carm_handle_array_info(struct carm_host *host,
 				   struct carm_request *crq, u8 *mem,
-				   int error)
+				   blk_status_t error)
 {
 	struct carm_port *port;
 	u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ static void carm_handle_array_info(struct carm_host *host,
 
 static void carm_handle_scan_chan(struct carm_host *host,
 				  struct carm_request *crq, u8 *mem,
-				  int error)
+				  blk_status_t error)
 {
 	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
 	unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ static void carm_handle_scan_chan(struct carm_host *host,
 }
 
 static void carm_handle_generic(struct carm_host *host,
-				struct carm_request *crq, int error,
+				struct carm_request *crq, blk_status_t error,
 				int cur_state, int next_state)
 {
 	DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
 }
 
 static inline void carm_handle_rw(struct carm_host *host,
-				  struct carm_request *crq, int error)
+				  struct carm_request *crq, blk_status_t error)
 {
 	int pci_dir;
 
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 	u32 handle = le32_to_cpu(ret_handle_le);
 	unsigned int msg_idx;
 	struct carm_request *crq;
-	int error = (status == RMSG_OK) ? 0 : -EIO;
+	blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
 	u8 *mem;
 
 	VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 err_out:
 	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
 	       pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
-	carm_end_rq(host, crq, -EIO);
+	carm_end_rq(host, crq, BLK_STS_IOERR);
 }
 
 static inline void carm_handle_responses(struct carm_host *host)

diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3..0677d25 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c

@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
 				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 		if (control & DMASCR_HARD_ERROR) {
 			/* error */
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			dev_printk(KERN_WARNING, &card->dev->dev,
 				"I/O error on sector %d/%d\n",
 				le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
 		 (unsigned long long)bio->bi_iter.bi_sector,
 		 bio->bi_iter.bi_size);
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c..0297ad7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -64,15 +64,15 @@ struct virtblk_req {
 	struct scatterlist sg[];
 };
 
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {
 	case VIRTIO_BLK_S_OK:
-		return 0;
+		return BLK_STS_OK;
 	case VIRTIO_BLK_S_UNSUPP:
-		return -ENOTTY;
+		return BLK_STS_NOTSUPP;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 		/* Out of mem doesn't actually happen, since we fall back
 		 * to direct descriptors */
 		if (err == -ENOMEM || err == -ENOSPC)
-			return BLK_MQ_RQ_QUEUE_BUSY;
-		return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_RESOURCE;
+		return BLK_STS_IOERR;
 	}
 
 	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (notify)
 		virtqueue_notify(vblk->vqs[qid].vq);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 		goto out;
 
 	blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
-	err = virtblk_result(blk_mq_rq_to_pdu(req));
+	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
 	blk_put_request(req);
 	return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	/* We can handle whatever the host told us to handle. */
 	blk_queue_max_segments(q, vblk->sg_elems-2);
 
-	/* No need to bounce any requests */
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
 	/* No real sector limit. */
 	blk_queue_max_hw_sectors(q, -1U);
 

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 726c32e..fe7cd58 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c

@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
 	unsigned long timeout;
 	int ret;
 
-	xen_blkif_get(blkif);
-
 	set_freezable();
 	while (!kthread_should_stop()) {
 		if (try_to_freeze())
@@ -665,7 +663,6 @@ int xen_blkif_schedule(void *arg)
 		print_stats(ring);
 
 	ring->xenblkd = NULL;
-	xen_blkif_put(blkif);
 
 	return 0;
 }
@@ -1069,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
 	atomic_set(&blkif->drain, 0);
 }
 
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+		blk_status_t error)
 {
 	/* An error fails the entire request. */
-	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-	    (error == -EOPNOTSUPP)) {
+	if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+	    error == BLK_STS_NOTSUPP) {
 		pr_debug("flush diskcache op failed, not supported\n");
 		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-	} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-		    (error == -EOPNOTSUPP)) {
+	} else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+		   error == BLK_STS_NOTSUPP) {
 		pr_debug("write barrier op failed, not supported\n");
 		xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
 		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1106,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
  */
 static void end_block_io_op(struct bio *bio)
 {
-	__end_block_io_op(bio->bi_private, bio->bi_error);
+	__end_block_io_op(bio->bi_private, bio->bi_status);
 	bio_put(bio);
 }
 
@@ -1423,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 	for (i = 0; i < nbio; i++)
 		bio_put(biolist[i]);
 	atomic_set(&pending_req->pendcnt, 1);
-	__end_block_io_op(pending_req, -EINVAL);
+	__end_block_io_op(pending_req, BLK_STS_RESOURCE);
 	msleep(1); /* back off a bit */
 	return -EIO;
 }
@@ -1436,34 +1430,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 static void make_response(struct xen_blkif_ring *ring, u64 id,
 			  unsigned short op, int st)
 {
-	struct blkif_response  resp;
+	struct blkif_response *resp;
 	unsigned long     flags;
 	union blkif_back_rings *blk_rings;
 	int notify;
 
-	resp.id        = id;
-	resp.operation = op;
-	resp.status    = st;
-
 	spin_lock_irqsave(&ring->blk_ring_lock, flags);
 	blk_rings = &ring->blk_rings;
 	/* Place on the response ring for the relevant domain. */
 	switch (ring->blkif->blk_protocol) {
 	case BLKIF_PROTOCOL_NATIVE:
-		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->native,
+					 blk_rings->native.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_32:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+					 blk_rings->x86_32.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_64:
-		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-		       &resp, sizeof(resp));
+		resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+					 blk_rings->x86_64.rsp_prod_pvt);
 		break;
 	default:
 		BUG();
 	}
+
+	resp->id        = id;
+	resp->operation = op;
+	resp->status    = st;
+
 	blk_rings->common.rsp_prod_pvt++;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
 	spin_unlock_irqrestore(&ring->blk_ring_lock, flags);

diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dea61f6..ecb35fe 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h

@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
 struct blkif_common_request {
 	char dummy;
 };
-struct blkif_common_response {
-	char dummy;
-};
+
+/* i386 protocol version */
 
 struct blkif_x86_32_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
 	} u;
 } __attribute__((__packed__));
 
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
-	uint64_t        id;              /* copied from request */
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-#pragma pack(pop)
 /* x86_64 protocol version */
 
 struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
 	} u;
 } __attribute__((__packed__));
 
-struct blkif_x86_64_response {
-	uint64_t       __attribute__((__aligned__(8))) id;
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
-};
-
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
-		  struct blkif_common_response);
+		  struct blkif_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
-		  struct blkif_x86_32_response);
+		  struct blkif_response __packed);
 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
-		  struct blkif_x86_64_response);
+		  struct blkif_response);
 
 union blkif_back_rings {
 	struct blkif_back_ring        native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
 
 	wait_queue_head_t	wq;
 	atomic_t		inflight;
+	bool			active;
 	/* One thread per blkif ring. */
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 1f3dfaa..792da68 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c

@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
 		init_waitqueue_head(&ring->shutdown_wq);
 		ring->blkif = blkif;
 		ring->st_print = jiffies;
-		xen_blkif_get(blkif);
+		ring->active = true;
 	}
 
 	return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		struct xen_blkif_ring *ring = &blkif->rings[r];
 		unsigned int i = 0;
 
+		if (!ring->active)
+			continue;
+
 		if (ring->xenblkd) {
 			kthread_stop(ring->xenblkd);
 			wake_up(&ring->shutdown_wq);
-			ring->xenblkd = NULL;
 		}
 
 		/* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 		BUG_ON(ring->free_pages_num != 0);
 		BUG_ON(ring->persistent_gnt_c != 0);
 		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
-		xen_blkif_put(blkif);
+		ring->active = false;
 	}
 	blkif->nr_ring_pages = 0;
 	/*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-
-	xen_blkif_disconnect(blkif);
+	WARN_ON(xen_blkif_disconnect(blkif));
 	xen_vbd_free(&blkif->vbd);
+	kfree(blkif->be->mode);
+	kfree(blkif->be);
 
 	/* Make sure everything is drained before shutting down */
 	kmem_cache_free(xen_blkif_cachep, blkif);
@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 		xen_blkif_put(be->blkif);
 	}
 
-	kfree(be->mode);
-	kfree(be);
 	return 0;
 }
 

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3945963..c852ed3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c

@@ -110,11 +110,6 @@ struct blk_shadow {
 	unsigned long associated_id;
 };
 
-struct split_bio {
-	struct bio *bio;
-	atomic_t pending;
-};
-
 struct blkif_req {
 	int	error;
 };
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
 		 !info->feature_fua));
 }
 
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	flush_requests(rinfo);
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_err:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-	return BLK_MQ_RQ_QUEUE_ERROR;
+	return BLK_STS_IOERR;
 
 out_busy:
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 	blk_mq_stop_hw_queue(hctx);
-	return BLK_MQ_RQ_QUEUE_BUSY;
+	return BLK_STS_RESOURCE;
 }
 
 static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
 }
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			continue;
 		}
 
-		blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		if (bret->status == BLKIF_RSP_OKAY)
+			blkif_req(req)->error = BLK_STS_OK;
+		else
+			blkif_req(req)->error = BLK_STS_IOERR;
+
 		switch (bret->operation) {
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
 				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret->operation));
-				blkif_req(req)->error = -EOPNOTSUPP;
+				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
-				if (blkif_req(req)->error == -EOPNOTSUPP)
-					blkif_req(req)->error = 0;
+				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+					blkif_req(req)->error = BLK_STS_OK;
 				info->feature_fua = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
-static void split_bio_end(struct bio *bio)
-{
-	struct split_bio *split_bio = bio->bi_private;
-
-	if (atomic_dec_and_test(&split_bio->pending)) {
-		split_bio->bio->bi_phys_segments = 0;
-		split_bio->bio->bi_error = bio->bi_error;
-		bio_endio(split_bio->bio);
-		kfree(split_bio);
-	}
-	bio_put(bio);
-}
-
 static int blkif_recover(struct blkfront_info *info)
 {
-	unsigned int i, r_index;
+	unsigned int r_index;
 	struct request *req, *n;
 	int rc;
-	struct bio *bio, *cloned_bio;
-	unsigned int segs, offset;
-	int pending, size;
-	struct split_bio *split_bio;
+	struct bio *bio;
+	unsigned int segs;
 
 	blkfront_gather_backend_features(info);
 	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
 
 	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
-		if (bio_segments(bio) > segs) {
-			/*
-			 * This bio has more segments than what we can
-			 * handle, we have to split it.
-			 */
-			pending = (bio_segments(bio) + segs - 1) / segs;
-			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
-			BUG_ON(split_bio == NULL);
-			atomic_set(&split_bio->pending, pending);
-			split_bio->bio = bio;
-			for (i = 0; i < pending; i++) {
-				offset = (i * segs * XEN_PAGE_SIZE) >> 9;
-				size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
-					   (unsigned int)bio_sectors(bio) - offset);
-				cloned_bio = bio_clone(bio, GFP_NOIO);
-				BUG_ON(cloned_bio == NULL);
-				bio_trim(cloned_bio, offset, size);
-				cloned_bio->bi_private = split_bio;
-				cloned_bio->bi_end_io = split_bio_end;
-				submit_bio(cloned_bio);
-			}
-			/*
-			 * Now we have to wait for all those smaller bios to
-			 * end, so we can also end the "parent" bio.
-			 */
-			continue;
-		}
-		/* We don't need to split this bio */
 		submit_bio(bio);
 	}
 
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
 			merge_bio.tail = shadow[j].request->biotail;
 			bio_list_merge(&info->bio_list, &merge_bio);
 			shadow[j].request->bio = NULL;
-			blk_mq_end_request(shadow[j].request, 0);
+			blk_mq_end_request(shadow[j].request, BLK_STS_OK);
 		}
 	}
 

diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2..14459d6 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c

@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
 		if (!blk_rq_is_passthrough(req))
 			break;
 		blk_start_request(req);
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 	return req;
 }
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all in-flight and pending requests */
 		if (ace->req) {
-			__blk_end_request_all(ace->req, -EIO);
+			__blk_end_request_all(ace->req, BLK_STS_IOERR);
 			ace->req = NULL;
 		}
 		while ((req = blk_fetch_request(ace->queue)) != NULL)
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		}
 
 		/* bio finished; is there another one? */
-		if (__blk_end_request_cur(ace->req, 0)) {
+		if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
 	if (ace->queue == NULL)
 		goto err_blk_initq;
 	blk_queue_logical_block_size(ace->queue, 512);
+	blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
 
 	/*
 	 * Allocate and initialize GD structure

diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e5..41c95c9 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c

@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
-		int err = 0;
+		blk_status_t err = BLK_STS_OK;
 
 		if (start + len > z2ram_size) {
 			pr_err(DEVICE_NAME ": bad access: block=%llu, "
 			       "count=%u\n",
 			       (unsigned long long)blk_rq_pos(req),
 			       blk_rq_cur_sectors(req));
-			err = -EIO;
+			err = BLK_STS_IOERR;
 			goto done;
 		}
 		while (len) {

diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index c38cb5b..fe850f0 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c

@@ -602,7 +602,7 @@ static int btmrvl_service_main_thread(void *data)
 	struct btmrvl_thread *thread = data;
 	struct btmrvl_private *priv = thread->priv;
 	struct btmrvl_adapter *adapter = priv->adapter;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct sk_buff *skb;
 	ulong flags;
 

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952f..e36d160 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c

@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 	if (!q)
 		return -ENXIO;
 
+	if (!blk_queue_scsi_passthrough(q)) {
+		WARN_ONCE(true,
+			  "Attempt read CDDA info through a non-SCSI queue\n");
+		return -EINVAL;
+	}
+
 	cdi->last_sense = 0;
 
 	while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			break;
 		}
 		req = scsi_req(rq);
-		scsi_req_init(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763..6495b03f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c

@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
  */
 static void gdrom_readdisk_dma(struct work_struct *work)
 {
-	int err, block, block_cnt;
+	int block, block_cnt;
+	blk_status_t err;
 	struct packet_command *read_command;
 	struct list_head *elem, *next;
 	struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		__raw_writeb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer ? -EIO : 0;
+		err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
 			break;
 		case REQ_OP_WRITE:
 			pr_notice("Read only device - write request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		default:
 			printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			break;
 		}
 	}
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
 		err = -ENOMEM;
 		goto probe_fail_requestq;
 	}
+	blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
 
 	err = probe_gdrom_setupqueue();
 	if (err)

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index d165af8..a5c6cfe 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c

@@ -821,7 +821,7 @@ static ssize_t ipmi_read(struct file *file,
 			 loff_t      *ppos)
 {
 	int          rv = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (count <= 0)
 		return 0;

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6e0cbe0..593a881 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c

@@ -343,7 +343,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
 
 	/* It's illegal to wrap around the end of the physical address space. */
-	if (offset + (phys_addr_t)size < offset)
+	if (offset + (phys_addr_t)size - 1 < offset)
 		return -EINVAL;
 
 	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))

diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index d4dbd8d..382c864 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c

@@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 
 	rc = write_sync_reg(SCR_HOST_TO_READER_START, dev);
 	if (rc <= 0) {
-		DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+		DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
 		DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 		if (rc == -ERESTARTSYS)
 			return rc;
@@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 	for (i = 0; i < bytes_to_write; i++) {
 		rc = wait_for_bulk_out_ready(dev);
 		if (rc <= 0) {
-			DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n",
+			DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n",
 			       rc);
 			DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 			if (rc == -ERESTARTSYS)
@@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf,
 	rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev);
 
 	if (rc <= 0) {
-		DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc);
+		DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc);
 		DEBUGP(2, dev, "<- cm4040_write (failed)\n");
 		if (rc == -ERESTARTSYS)
 			return rc;

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0ab0249..01a260f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -1,6 +1,9 @@
 /*
  * random.c -- A strong random number generator
  *
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All
+ * Rights Reserved.
+ *
  * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005
  *
  * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999.  All
@@ -762,6 +765,8 @@ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 static struct crng_state **crng_node_pool __read_mostly;
 #endif
 
+static void invalidate_batched_entropy(void);
+
 static void crng_initialize(struct crng_state *crng)
 {
 	int		i;
@@ -798,12 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
 		p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
 		cp++; crng_init_cnt++; len--;
 	}
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
+		invalidate_batched_entropy();
 		crng_init = 1;
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: fast init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	return 1;
 }
 
@@ -835,13 +841,14 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	}
 	memzero_explicit(&buf, sizeof(buf));
 	crng->init_time = jiffies;
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
 	if (crng == &primary_crng && crng_init < 2) {
+		invalidate_batched_entropy();
 		crng_init = 2;
 		process_random_ready_list();
 		wake_up_interruptible(&crng_init_wait);
 		pr_notice("random: crng init done\n");
 	}
-	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 static inline void crng_wait_ready(void)
@@ -1097,12 +1104,16 @@ static void add_interrupt_bench(cycles_t start)
 static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
 {
 	__u32 *ptr = (__u32 *) regs;
+	unsigned int idx;
 
 	if (regs == NULL)
 		return 0;
-	if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32))
-		f->reg_idx = 0;
-	return *(ptr + f->reg_idx++);
+	idx = READ_ONCE(f->reg_idx);
+	if (idx >= sizeof(struct pt_regs) / sizeof(__u32))
+		idx = 0;
+	ptr += idx++;
+	WRITE_ONCE(f->reg_idx, idx);
+	return *ptr;
 }
 
 void add_interrupt_randomness(int irq, int irq_flags)
@@ -2019,6 +2030,7 @@ struct batched_entropy {
 	};
 	unsigned int position;
 };
+static rwlock_t batched_entropy_reset_lock = __RW_LOCK_UNLOCKED(batched_entropy_reset_lock);
 
 /*
  * Get a random word for internal kernel use only. The quality of the random
@@ -2029,6 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
 {
 	u64 ret;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 #if BITS_PER_LONG == 64
@@ -2041,11 +2055,15 @@ u64 get_random_u64(void)
 #endif
 
 	batch = &get_cpu_var(batched_entropy_u64);
+	if (use_lock)
+		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
 		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
+	if (use_lock)
+		read_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 	put_cpu_var(batched_entropy_u64);
 	return ret;
 }
@@ -2055,22 +2073,45 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
 	u32 ret;
+	bool use_lock = READ_ONCE(crng_init) < 2;
+	unsigned long flags = 0;
 	struct batched_entropy *batch;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
 	batch = &get_cpu_var(batched_entropy_u32);
+	if (use_lock)
+		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
 		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];
+	if (use_lock)
+		read_unlock_irqrestore(&batched_entropy_reset_lock, flags);
 	put_cpu_var(batched_entropy_u32);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u32);
 
+/* It's important to invalidate all potential batched entropy that might
+ * be stored before the crng is initialized, which we can do lazily by
+ * simply resetting the counter to zero so that it's re-extracted on the
+ * next usage. */
+static void invalidate_batched_entropy(void)
+{
+	int cpu;
+	unsigned long flags;
+
+	write_lock_irqsave(&batched_entropy_reset_lock, flags);
+	for_each_possible_cpu (cpu) {
+		per_cpu_ptr(&batched_entropy_u32, cpu)->position = 0;
+		per_cpu_ptr(&batched_entropy_u64, cpu)->position = 0;
+	}
+	write_unlock_irqrestore(&batched_entropy_reset_lock, flags);
+}
+
 /**
  * randomize_page - Generate a random, page aligned address
  * @start:	The smallest acceptable address the caller will take.

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index b917b9d..c378c7b 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c

@@ -27,10 +27,9 @@
 
 #define ACPI_SIG_TPM2 "TPM2"
 
-static const u8 CRB_ACPI_START_UUID[] = {
-	/* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
-	/* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
-};
+static const guid_t crb_acpi_start_guid =
+	GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
+		  0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4);
 
 enum crb_defaults {
 	CRB_ACPI_START_REVISION_ID = 1,
@@ -266,7 +265,7 @@ static int crb_do_acpi_start(struct tpm_chip *chip)
 	int rc;
 
 	obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
-				CRB_ACPI_START_UUID,
+				&crb_acpi_start_guid,
 				CRB_ACPI_START_REVISION_ID,
 				CRB_ACPI_START_INDEX,
 				NULL);

diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 692a2c6..86dd852 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c

@@ -32,20 +32,16 @@
 #define PPI_VS_REQ_START	128
 #define PPI_VS_REQ_END		255
 
-static const u8 tpm_ppi_uuid[] = {
-	0xA6, 0xFA, 0xDD, 0x3D,
-	0x1B, 0x36,
-	0xB4, 0x4E,
-	0xA4, 0x24,
-	0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
-};
+static const guid_t tpm_ppi_guid =
+	GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4,
+		  0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53);
 
 static inline union acpi_object *
 tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
 	     union acpi_object *argv4)
 {
 	BUG_ON(!ppi_handle);
-	return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+	return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid,
 				       TPM_PPI_REVISION_ID,
 				       func, argv4, type);
 }
@@ -107,7 +103,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 	 * is updated with function index from SUBREQ to SUBREQ2 since PPI
 	 * version 1.1
 	 */
-	if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			   TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
 		func = TPM_PPI_FN_SUBREQ2;
 
@@ -268,7 +264,7 @@ static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start,
 		"User not required",
 	};
 
-	if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+	if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID,
 			    1 << TPM_PPI_FN_GETOPR))
 		return -EPERM;
 
@@ -341,12 +337,12 @@ void tpm_add_ppi(struct tpm_chip *chip)
 	if (!chip->acpi_dev_handle)
 		return;
 
-	if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+	if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
 			    TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
 		return;
 
 	/* Cache PPI version string. */
-	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid,
 				      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
 				      NULL, ACPI_TYPE_STRING);
 	if (obj) {

diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig
index 19480bc..2f29ee1 100644
--- a/drivers/clk/meson/Kconfig
+++ b/drivers/clk/meson/Kconfig

@@ -14,6 +14,7 @@
 config COMMON_CLK_GXBB
 	bool
 	depends on COMMON_CLK_AMLOGIC
+	select RESET_CONTROLLER
 	help
 	  Support for the clock controller on AmLogic S905 devices, aka gxbb.
 	  Say Y if you want peripherals and CPU frequency scaling to work.

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index b0d551a..eb89c78 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig

@@ -156,6 +156,7 @@
 	bool "Support for Allwinner SoCs' PRCM CCUs"
 	select SUNXI_CCU_DIV
 	select SUNXI_CCU_GATE
+	select SUNXI_CCU_MP
 	default MACH_SUN8I || (ARCH_SUNXI && ARM64)
 
 endif

diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h
index 9b3cd24..061b6fb 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.h
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.h

@@ -31,7 +31,9 @@
 #define CLK_PLL_VIDEO0_2X		8
 #define CLK_PLL_VE			9
 #define CLK_PLL_DDR0			10
-#define CLK_PLL_PERIPH0			11
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X		12
 #define CLK_PLL_PERIPH1			13
 #define CLK_PLL_PERIPH1_2X		14

diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
index 5c476f9..5372bf8 100644
--- a/drivers/clk/sunxi-ng/ccu-sun5i.c
+++ b/drivers/clk/sunxi-ng/ccu-sun5i.c

@@ -243,7 +243,7 @@ static SUNXI_CCU_GATE(ahb_ss_clk,	"ahb-ss",	"ahb",
 static SUNXI_CCU_GATE(ahb_dma_clk,	"ahb-dma",	"ahb",
 		      0x060, BIT(6), 0);
 static SUNXI_CCU_GATE(ahb_bist_clk,	"ahb-bist",	"ahb",
-		      0x060, BIT(6), 0);
+		      0x060, BIT(7), 0);
 static SUNXI_CCU_GATE(ahb_mmc0_clk,	"ahb-mmc0",	"ahb",
 		      0x060, BIT(8), 0);
 static SUNXI_CCU_GATE(ahb_mmc1_clk,	"ahb-mmc1",	"ahb",

diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index 89e68d2..df97e25 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c

@@ -556,7 +556,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch1_clk, "lcd0-ch1", lcd_ch1_parents,
 				 0x12c, 0, 4, 24, 3, BIT(31),
 				 CLK_SET_RATE_PARENT);
 static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents,
-				 0x12c, 0, 4, 24, 3, BIT(31),
+				 0x130, 0, 4, 24, 3, BIT(31),
 				 CLK_SET_RATE_PARENT);
 
 static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1",

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
index 85973d1..1b4baea 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h

@@ -29,7 +29,9 @@
 #define CLK_PLL_VIDEO		6
 #define CLK_PLL_VE		7
 #define CLK_PLL_DDR		8
-#define CLK_PLL_PERIPH0		9
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X	10
 #define CLK_PLL_GPU		11
 #define CLK_PLL_PERIPH1		12

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index e58706b..6297add 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c

@@ -537,7 +537,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
 	[RST_BUS_EMAC]		=  { 0x2c0, BIT(17) },
 	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
 	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
-	[RST_BUS_OTG]		=  { 0x2c0, BIT(23) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(24) },
 	[RST_BUS_EHCI0]		=  { 0x2c0, BIT(26) },
 	[RST_BUS_OHCI0]		=  { 0x2c0, BIT(29) },
 

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 4bed671e..8b5c300 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c

@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
 		return 0;
 	}
 
-	rate = readl_relaxed(frame + CNTFRQ);
+	rate = readl_relaxed(base + CNTFRQ);
 
-	iounmap(frame);
+	iounmap(base);
 
 	return rate;
 }

diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e95..8e64b84 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c

@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>

diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index 2e9c830..c4656c4 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c

@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 74ed7e9..2011fec 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm

@@ -71,6 +71,15 @@
 
 	  If in doubt, say N.
 
+config ARM_DB8500_CPUFREQ
+	tristate "ST-Ericsson DB8500 cpufreq" if COMPILE_TEST && !ARCH_U8500
+	default ARCH_U8500
+	depends on HAS_IOMEM
+	depends on !CPU_THERMAL || THERMAL
+	help
+	  This adds the CPUFreq driver for ST-Ericsson Ux500 (DB8500) SoC
+	  series.
+
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC

diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index b7e78f0..ab3a42c 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile

@@ -53,7 +53,7 @@
 
 obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ)	+= brcmstb-avs-cpufreq.o
 obj-$(CONFIG_ARCH_DAVINCI)		+= davinci-cpufreq.o
-obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
+obj-$(CONFIG_ARM_DB8500_CPUFREQ)	+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0e3f649..26b643d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -2468,6 +2468,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY) &&
 	    list_empty(&cpufreq_policy_list)) {
 		/* if all ->init() calls failed, unregister */
+		ret = -ENODEV;
 		pr_debug("%s: No CPU initialized for driver %s\n", __func__,
 			 driver_data->name);
 		goto err_if_unreg;

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 992f7c2..88220ff 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c

@@ -185,8 +185,8 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
 	int ret;
 	ret = sscanf(buf, "%u", &input);
 
-	/* cannot be lower than 11 otherwise freq will not fall */
-	if (ret != 1 || input < 11 || input > 100 ||
+	/* cannot be lower than 1 otherwise freq will not fall */
+	if (ret != 1 || input < 1 || input > 100 ||
 			input >= dbs_data->up_threshold)
 		return -EINVAL;
 

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b7de5bd..eb11585 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -571,9 +571,10 @@ static inline void update_turbo_state(void)
 static int min_perf_pct_min(void)
 {
 	struct cpudata *cpu = all_cpu_data[0];
+	int turbo_pstate = cpu->pstate.turbo_pstate;
 
-	return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
-			    cpu->pstate.turbo_pstate);
+	return turbo_pstate ?
+		DIV_ROUND_UP(cpu->pstate.min_pstate * 100, turbo_pstate) : 0;
 }
 
 static s16 intel_pstate_get_epb(struct cpudata *cpu_data)

diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index 1b9bcd7..c2dd43f 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c

@@ -127,7 +127,12 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		return PTR_ERR(priv.cpu_clk);
 	}
 
-	clk_prepare_enable(priv.cpu_clk);
+	err = clk_prepare_enable(priv.cpu_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare cpuclk\n");
+		return err;
+	}
+
 	kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000;
 
 	priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
@@ -137,7 +142,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		goto out_cpu;
 	}
 
-	clk_prepare_enable(priv.ddr_clk);
+	err = clk_prepare_enable(priv.ddr_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare ddrclk\n");
+		goto out_cpu;
+	}
 	kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000;
 
 	priv.powersave_clk = of_clk_get_by_name(np, "powersave");
@@ -146,7 +155,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 		err = PTR_ERR(priv.powersave_clk);
 		goto out_ddr;
 	}
-	clk_prepare_enable(priv.powersave_clk);
+	err = clk_prepare_enable(priv.powersave_clk);
+	if (err) {
+		dev_err(priv.dev, "Unable to prepare powersave clk\n");
+		goto out_ddr;
+	}
 
 	of_node_put(np);
 	np = NULL;

diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 35dd4d7..b257fc7 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c

@@ -226,7 +226,7 @@ static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	 * We don't support CPU hotplug. Don't unmap after the system
 	 * has already made it to a running state.
 	 */
-	if (system_state != SYSTEM_BOOTING)
+	if (system_state >= SYSTEM_RUNNING)
 		return 0;
 
 	if (sdcasr_mapbase)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 2706be7..60bb64f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c

@@ -220,6 +220,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
+	sched_clock_idle_wakeup_event();
 	time_end = ns_to_ktime(local_clock());
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 

diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
index ffca4fc..ae8eb03 100644
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c

@@ -180,8 +180,10 @@ int dt_init_idle_driver(struct cpuidle_driver *drv,
 		if (!state_node)
 			break;
 
-		if (!of_device_is_available(state_node))
+		if (!of_device_is_available(state_node)) {
+			of_node_put(state_node);
 			continue;
+		}
 
 		if (!idle_state_valid(state_node, i, cpumask)) {
 			pr_warn("%s idle state not valid, bailing out\n",

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 6ed32aa..922d082 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c

@@ -210,9 +210,12 @@ EXPORT_SYMBOL_GPL(kill_dax);
 static struct inode *dax_alloc_inode(struct super_block *sb)
 {
 	struct dax_device *dax_dev;
+	struct inode *inode;
 
 	dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
-	return &dax_dev->inode;
+	inode = &dax_dev->inode;
+	inode->i_rdev = 0;
+	return inode;
 }
 
 static struct dax_device *to_dax_dev(struct inode *inode)
@@ -227,7 +230,8 @@ static void dax_i_callback(struct rcu_head *head)
 
 	kfree(dax_dev->host);
 	dax_dev->host = NULL;
-	ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
+	if (inode->i_rdev)
+		ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
 	kmem_cache_free(dax_cache, dax_dev);
 }
 
@@ -423,6 +427,7 @@ static void init_once(void *_dax_dev)
 	struct dax_device *dax_dev = _dax_dev;
 	struct inode *inode = &dax_dev->inode;
 
+	memset(dax_dev, 0, sizeof(*dax_dev));
 	inode_init_once(inode);
 }
 

diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 5c3e7b1..f6e7956 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c

@@ -267,7 +267,11 @@ static int exynos_nocp_probe(struct platform_device *pdev)
 	}
 	platform_set_drvdata(pdev, nocp);
 
-	clk_prepare_enable(nocp->clk);
+	ret = clk_prepare_enable(nocp->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+		return ret;
+	}
 
 	pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
 			dev_name(dev));

diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 9b73509..d96e3dc 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c

@@ -44,7 +44,7 @@ struct exynos_ppmu {
 	{ "ppmu-event2-"#name, PPMU_PMNCNT2 },	\
 	{ "ppmu-event3-"#name, PPMU_PMNCNT3 }
 
-struct __exynos_ppmu_events {
+static struct __exynos_ppmu_events {
 	char *name;
 	int id;
 } ppmu_events[] = {
@@ -648,7 +648,11 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
 			dev_name(&pdev->dev), desc[i].name);
 	}
 
-	clk_prepare_enable(info->ppmu.clk);
+	ret = clk_prepare_enable(info->ppmu.clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+		return ret;
+	}
 
 	return 0;
 }

diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index d37e8dd..ec24059 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c

@@ -201,6 +201,7 @@ struct ep93xx_dma_engine {
 	struct dma_device	dma_dev;
 	bool			m2m;
 	int			(*hw_setup)(struct ep93xx_dma_chan *);
+	void			(*hw_synchronize)(struct ep93xx_dma_chan *);
 	void			(*hw_shutdown)(struct ep93xx_dma_chan *);
 	void			(*hw_submit)(struct ep93xx_dma_chan *);
 	int			(*hw_interrupt)(struct ep93xx_dma_chan *);
@@ -323,6 +324,8 @@ static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
 		| M2P_CONTROL_ENABLE;
 	m2p_set_control(edmac, control);
 
+	edmac->buffer = 0;
+
 	return 0;
 }
 
@@ -331,21 +334,27 @@ static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
 	return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
 }
 
-static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac)
 {
+	unsigned long flags;
 	u32 control;
 
+	spin_lock_irqsave(&edmac->lock, flags);
 	control = readl(edmac->regs + M2P_CONTROL);
 	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
 	m2p_set_control(edmac, control);
+	spin_unlock_irqrestore(&edmac->lock, flags);
 
 	while (m2p_channel_state(edmac) >= M2P_STATE_ON)
-		cpu_relax();
+		schedule();
+}
 
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
 	m2p_set_control(edmac, 0);
 
-	while (m2p_channel_state(edmac) == M2P_STATE_STALL)
-		cpu_relax();
+	while (m2p_channel_state(edmac) != M2P_STATE_IDLE)
+		dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n");
 }
 
 static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
@@ -1161,6 +1170,26 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 }
 
 /**
+ * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ *
+ * Synchronizes the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory associated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ */
+static void ep93xx_dma_synchronize(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+	if (edmac->edma->hw_synchronize)
+		edmac->edma->hw_synchronize(edmac);
+}
+
+/**
  * ep93xx_dma_terminate_all - terminate all transactions
  * @chan: channel
  *
@@ -1323,6 +1352,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
 	dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
 	dma_dev->device_config = ep93xx_dma_slave_config;
+	dma_dev->device_synchronize = ep93xx_dma_synchronize;
 	dma_dev->device_terminate_all = ep93xx_dma_terminate_all;
 	dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
 	dma_dev->device_tx_status = ep93xx_dma_tx_status;
@@ -1340,6 +1370,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	} else {
 		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
 
+		edma->hw_synchronize = m2p_hw_synchronize;
 		edma->hw_setup = m2p_hw_setup;
 		edma->hw_shutdown = m2p_hw_shutdown;
 		edma->hw_submit = m2p_hw_submit;

diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index a28a01f..f3e211f 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c

@@ -161,6 +161,7 @@ struct mv_xor_v2_device {
 	struct mv_xor_v2_sw_desc *sw_desq;
 	int desc_size;
 	unsigned int npendings;
+	unsigned int hw_queue_idx;
 };
 
 /**
@@ -214,18 +215,6 @@ static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
 }
 
 /*
- * Return the next available index in the DESQ.
- */
-static int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)
-{
-	/* read the index for the next available descriptor in the DESQ */
-	u32 reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ALLOC_OFF);
-
-	return ((reg >> MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT)
-		& MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK);
-}
-
-/*
  * notify the engine of new descriptors, and update the available index.
  */
 static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
@@ -257,22 +246,6 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
 	return MV_XOR_V2_EXT_DESC_SIZE;
 }
 
-/*
- * Set the IMSG threshold
- */
-static inline
-void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val)
-{
-	u32 reg;
-
-	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
-
-	reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
-	reg |= (thrd_val << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
-
-	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
-}
-
 static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 {
 	struct mv_xor_v2_device *xor_dev = data;
@@ -288,12 +261,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 	if (!ndescs)
 		return IRQ_NONE;
 
-	/*
-	 * Update IMSG threshold, to disable new IMSG interrupts until
-	 * end of the tasklet
-	 */
-	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_DESC_NUM);
-
 	/* schedule a tasklet to handle descriptors callbacks */
 	tasklet_schedule(&xor_dev->irq_tasklet);
 
@@ -306,7 +273,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
 static dma_cookie_t
 mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	int desq_ptr;
 	void *dest_hw_desc;
 	dma_cookie_t cookie;
 	struct mv_xor_v2_sw_desc *sw_desc =
@@ -322,15 +288,15 @@ mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_bh(&xor_dev->lock);
 	cookie = dma_cookie_assign(tx);
 
-	/* get the next available slot in the DESQ */
-	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
-
 	/* copy the HW descriptor from the SW descriptor to the DESQ */
-	dest_hw_desc = xor_dev->hw_desq_virt + desq_ptr;
+	dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx;
 
 	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
 
 	xor_dev->npendings++;
+	xor_dev->hw_queue_idx++;
+	if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM)
+		xor_dev->hw_queue_idx = 0;
 
 	spin_unlock_bh(&xor_dev->lock);
 
@@ -344,6 +310,7 @@ static struct mv_xor_v2_sw_desc	*
 mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
 {
 	struct mv_xor_v2_sw_desc *sw_desc;
+	bool found = false;
 
 	/* Lock the channel */
 	spin_lock_bh(&xor_dev->lock);
@@ -355,19 +322,23 @@ mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
 		return NULL;
 	}
 
-	/* get a free SW descriptor from the SW DESQ */
-	sw_desc = list_first_entry(&xor_dev->free_sw_desc,
-				   struct mv_xor_v2_sw_desc, free_list);
+	list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) {
+		if (async_tx_test_ack(&sw_desc->async_tx)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		spin_unlock_bh(&xor_dev->lock);
+		return NULL;
+	}
+
 	list_del(&sw_desc->free_list);
 
 	/* Release the channel */
 	spin_unlock_bh(&xor_dev->lock);
 
-	/* set the async tx descriptor */
-	dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan);
-	sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
-	async_tx_ack(&sw_desc->async_tx);
-
 	return sw_desc;
 }
 
@@ -389,6 +360,8 @@ mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
 		__func__, len, &src, &dest, flags);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	sw_desc->async_tx.flags = flags;
 
@@ -443,6 +416,8 @@ mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		__func__, src_cnt, len, &dest, flags);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	sw_desc->async_tx.flags = flags;
 
@@ -491,6 +466,8 @@ mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
 		container_of(chan, struct mv_xor_v2_device, dmachan);
 
 	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
 
 	/* set the HW descriptor */
 	hw_descriptor = &sw_desc->hw_desc;
@@ -554,7 +531,6 @@ static void mv_xor_v2_tasklet(unsigned long data)
 {
 	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
 	int pending_ptr, num_of_pending, i;
-	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
 	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
 
 	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
@@ -562,17 +538,10 @@ static void mv_xor_v2_tasklet(unsigned long data)
 	/* get the pending descriptors parameters */
 	num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
 
-	/* next HW descriptor */
-	next_pending_hw_desc = xor_dev->hw_desq_virt + pending_ptr;
-
 	/* loop over free descriptors */
 	for (i = 0; i < num_of_pending; i++) {
-
-		if (pending_ptr > MV_XOR_V2_DESC_NUM)
-			pending_ptr = 0;
-
-		if (next_pending_sw_desc != NULL)
-			next_pending_hw_desc++;
+		struct mv_xor_v2_descriptor *next_pending_hw_desc =
+			xor_dev->hw_desq_virt + pending_ptr;
 
 		/* get the SW descriptor related to the HW descriptor */
 		next_pending_sw_desc =
@@ -608,15 +577,14 @@ static void mv_xor_v2_tasklet(unsigned long data)
 
 		/* increment the next descriptor */
 		pending_ptr++;
+		if (pending_ptr >= MV_XOR_V2_DESC_NUM)
+			pending_ptr = 0;
 	}
 
 	if (num_of_pending != 0) {
 		/* free the descriptores */
 		mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
 	}
-
-	/* Update IMSG threshold, to enable new IMSG interrupts */
-	mv_xor_v2_set_imsg_thrd(xor_dev, 0);
 }
 
 /*
@@ -648,9 +616,6 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
 	writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32,
 	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
 
-	/* enable the DMA engine */
-	writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
-
 	/*
 	 * This is a temporary solution, until we activate the
 	 * SMMU. Set the attributes for reading & writing data buffers
@@ -694,6 +659,9 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
 	reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL;
 	writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
 
+	/* enable the DMA engine */
+	writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
 	return 0;
 }
 
@@ -725,6 +693,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, xor_dev);
 
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	if (ret)
+		return ret;
+
 	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER)
 		return -EPROBE_DEFER;
@@ -785,8 +757,15 @@ static int mv_xor_v2_probe(struct platform_device *pdev)
 
 	/* add all SW descriptors to the free list */
 	for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) {
-		xor_dev->sw_desq[i].idx = i;
-		list_add(&xor_dev->sw_desq[i].free_list,
+		struct mv_xor_v2_sw_desc *sw_desc =
+			xor_dev->sw_desq + i;
+		sw_desc->idx = i;
+		dma_async_tx_descriptor_init(&sw_desc->async_tx,
+					     &xor_dev->dmachan);
+		sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+		async_tx_ack(&sw_desc->async_tx);
+
+		list_add(&sw_desc->free_list,
 			 &xor_dev->free_sw_desc);
 	}
 

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 8b0da7f..e90a7a0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c

@@ -3008,7 +3008,8 @@ static int pl330_remove(struct amba_device *adev)
 
 	for (i = 0; i < AMBA_NR_IRQS; i++) {
 		irq = adev->irq[i];
-		devm_free_irq(&adev->dev, irq, pl330);
+		if (irq)
+			devm_free_irq(&adev->dev, irq, pl330);
 	}
 
 	dma_async_device_unregister(&pl330->ddma);

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index db41795..bd261c9 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c

@@ -1287,6 +1287,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	if (desc->hwdescs.use) {
 		dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
 			RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+		if (dptr == 0)
+			dptr = desc->nchunks;
+		dptr--;
 		WARN_ON(dptr >= desc->nchunks);
 	} else {
 		running = desc->running;

diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 72c6497..31a1451 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c

@@ -117,7 +117,7 @@ struct usb_dmac {
 #define USB_DMASWR			0x0008
 #define USB_DMASWR_SWR			(1 << 0)
 #define USB_DMAOR			0x0060
-#define USB_DMAOR_AE			(1 << 2)
+#define USB_DMAOR_AE			(1 << 1)
 #define USB_DMAOR_DME			(1 << 0)
 
 #define USB_DMASAR			0x0000

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 7717b09..db75d4b 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c

@@ -214,24 +214,16 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
 static unsigned long get_total_mem(void)
 {
 	struct device_node *np = NULL;
-	const unsigned int *reg, *reg_end;
-	int len, sw, aw;
-	unsigned long start, size, total_mem = 0;
+	struct resource res;
+	int ret;
+	unsigned long total_mem = 0;
 
 	for_each_node_by_type(np, "memory") {
-		aw = of_n_addr_cells(np);
-		sw = of_n_size_cells(np);
-		reg = (const unsigned int *)of_get_property(np, "reg", &len);
-		reg_end = reg + (len / sizeof(u32));
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			continue;
 
-		total_mem = 0;
-		do {
-			start = of_read_number(reg, aw);
-			reg += aw;
-			size = of_read_number(reg, sw);
-			reg += sw;
-			total_mem += size;
-		} while (reg < reg_end);
+		total_mem += resource_size(&res);
 	}
 	edac_dbg(0, "total_mem 0x%lx\n", total_mem);
 	return total_mem;
@@ -1839,7 +1831,7 @@ static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-static struct irq_domain_ops a10_eccmgr_ic_ops = {
+static const struct irq_domain_ops a10_eccmgr_ic_ops = {
 	.map = a10_eccmgr_irqdomain_map,
 	.xlate = irq_domain_xlate_twocell,
 };

diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index f683919..8f5a56e 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c

@@ -227,7 +227,7 @@
 #define			NREC_RDWR(x)		(((x)>>11) & 1)
 #define			NREC_RANK(x)		(((x)>>8) & 0x7)
 #define		NRECMEMB		0xC0
-#define			NREC_CAS(x)		(((x)>>16) & 0xFFFFFF)
+#define			NREC_CAS(x)		(((x)>>16) & 0xFFF)
 #define			NREC_RAS(x)		((x) & 0x7FFF)
 #define		NRECFGLOG		0xC4
 #define		NREEECFBDA		0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
 	/* These registers are input ONLY if there was a
 	 * Non-Recoverable Error */
 	u16 nrecmema;		/* Non-Recoverable Mem log A */
-	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+	u32 nrecmemb;		/* Non-Recoverable Mem log B */
 
 };
 
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
 				NERR_FAT_FBD, &info->nerr_fat_fbd);
 		pci_read_config_word(pvt->branchmap_werrors,
 				NRECMEMA, &info->nrecmema);
-		pci_read_config_word(pvt->branchmap_werrors,
+		pci_read_config_dword(pvt->branchmap_werrors,
 				NRECMEMB, &info->nrecmemb);
 
 		/* Clear the error bits, by writing them back */

diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 37a9ba7..cd889ed 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c

@@ -368,7 +368,7 @@ struct i5400_error_info {
 
 	/* These registers are input ONLY if there was a Non-Rec Error */
 	u16 nrecmema;		/* Non-Recoverable Mem log A */
-	u16 nrecmemb;		/* Non-Recoverable Mem log B */
+	u32 nrecmemb;		/* Non-Recoverable Mem log B */
 
 };
 
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
 				NERR_FAT_FBD, &info->nerr_fat_fbd);
 		pci_read_config_word(pvt->branchmap_werrors,
 				NRECMEMA, &info->nrecmema);
-		pci_read_config_word(pvt->branchmap_werrors,
+		pci_read_config_dword(pvt->branchmap_werrors,
 				NRECMEMB, &info->nrecmemb);
 
 		/* Clear the error bits, by writing them back */

diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 2733fb5..4260579 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c

@@ -18,10 +18,12 @@
  * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
  * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
  * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
+ * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
  * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
  * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
+ * http://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
  *
  * According to the above datasheet (p.16):
  * "
@@ -57,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918
 
 #define IE31200_DIMMS			4
 #define IE31200_RANKS			8
@@ -376,7 +379,12 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 	void __iomem *window;
 	struct ie31200_priv *priv;
 	u32 addr_decode, mad_offset;
-	bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
+
+	/*
+	 * Kaby Lake seems to work like Skylake. Please re-visit this logic
+	 * when adding new CPU support.
+	 */
+	bool skl = (pdev->device >= PCI_DEVICE_ID_INTEL_IE31200_HB_8);
 
 	edac_dbg(0, "MC:\n");
 
@@ -560,6 +568,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
 		PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		IE31200},
 	{
+		PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		IE31200},
+	{
 		0,
 	}            /* 0 terminated list. */
 };

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ba35b7e..9a2658a 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c

@@ -161,7 +161,7 @@ static const char * const smca_ls_mce_desc[] = {
 	"Sys Read data error thread 0",
 	"Sys read data error thread 1",
 	"DC tag error type 2",
-	"DC data error type 1 (poison comsumption)",
+	"DC data error type 1 (poison consumption)",
 	"DC data error type 2",
 	"DC data error type 3",
 	"DC tag error type 4",

diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 14b7e7b..d3650df 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c

@@ -32,21 +32,21 @@ static void mv64x60_pci_check(struct edac_pci_ctl_info *pci)
 	struct mv64x60_pci_pdata *pdata = pci->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	cause = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 	if (!cause)
 		return;
 
 	printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose);
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
 	printk(KERN_ERR "Attribute: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
 	printk(KERN_ERR "Command: 0x%08x\n",
-	       in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause);
+	       readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
+	writel(~cause, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 
 	if (cause & MV64X60_PCI_PE_MASK)
 		edac_pci_handle_pe(pci, pci->ctl_name);
@@ -61,7 +61,7 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
 	struct mv64x60_pci_pdata *pdata = pci->pvt_info;
 	u32 val;
 
-	val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	val = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 	if (!val)
 		return IRQ_NONE;
 
@@ -93,7 +93,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
 	if (!pci_serr)
 		return -ENOMEM;
 
-	out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+	writel(readl(pci_serr) & ~0x1, pci_serr);
 	iounmap(pci_serr);
 
 	return 0;
@@ -116,7 +116,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
 	pdata = pci->pvt_info;
 
 	pdata->pci_hose = pdev->id;
-	pdata->name = "mpc85xx_pci_err";
+	pdata->name = "mv64x60_pci_err";
 	platform_set_drvdata(pdev, pci);
 	pci->dev = &pdev->dev;
 	pci->dev_name = dev_name(&pdev->dev);
@@ -161,10 +161,10 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
-	out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
-		 MV64X60_PCIx_ERR_MASK_VAL);
+	writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+	writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
+	writel(MV64X60_PCIx_ERR_MASK_VAL,
+		  pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
 
 	if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
 		edac_dbg(3, "failed edac_pci_add_device()\n");
@@ -233,23 +233,23 @@ static void mv64x60_sram_check(struct edac_device_ctl_info *edac_dev)
 	struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+	cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 	if (!cause)
 		return;
 
 	printk(KERN_ERR "Error in internal SRAM\n");
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
 	printk(KERN_ERR "Data Low: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
 	printk(KERN_ERR "Data High: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
 	printk(KERN_ERR "Parity: 0x%08x\n",
-	       in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
-	out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+	       readl(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
+	writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
 	edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -260,7 +260,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id)
 	struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+	cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 	if (!cause)
 		return IRQ_NONE;
 
@@ -322,7 +322,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev)
 	}
 
 	/* setup SRAM err registers */
-	out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+	writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
 	edac_dev->mod_name = EDAC_MOD_STR;
 	edac_dev->ctl_name = pdata->name;
@@ -398,7 +398,7 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
 	struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+	cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
 	    MV64x60_CPU_CAUSE_MASK;
 	if (!cause)
 		return;
@@ -406,16 +406,16 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
 	printk(KERN_ERR "Error on CPU interface\n");
 	printk(KERN_ERR "Cause register: 0x%08x\n", cause);
 	printk(KERN_ERR "Address Low: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
+	       readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
 	printk(KERN_ERR "Address High: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
+	       readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
 	printk(KERN_ERR "Data Low: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
 	printk(KERN_ERR "Data High: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
 	printk(KERN_ERR "Parity: 0x%08x\n",
-	       in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
+	       readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
 
 	edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -426,7 +426,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id)
 	struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
 	u32 cause;
 
-	cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+	cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
 	    MV64x60_CPU_CAUSE_MASK;
 	if (!cause)
 		return IRQ_NONE;
@@ -515,9 +515,9 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev)
 	}
 
 	/* setup CPU err registers */
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0);
-	out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff);
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
+	writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
+	writel(0x000000ff, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
 
 	edac_dev->mod_name = EDAC_MOD_STR;
 	edac_dev->ctl_name = pdata->name;
@@ -596,13 +596,13 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
 	u32 comp_ecc;
 	u32 syndrome;
 
-	reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 	if (!reg)
 		return;
 
 	err_addr = reg & ~0x3;
-	sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
-	comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
+	sdram_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
+	comp_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
 	syndrome = sdram_ecc ^ comp_ecc;
 
 	/* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
@@ -620,7 +620,7 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
 				     mci->ctl_name, "");
 
 	/* clear the error */
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
+	writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 }
 
 static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
@@ -629,7 +629,7 @@ static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
 	struct mv64x60_mc_pdata *pdata = mci->pvt_info;
 	u32 reg;
 
-	reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 	if (!reg)
 		return IRQ_NONE;
 
@@ -664,7 +664,7 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 
 	get_total_mem(pdata);
 
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 
 	csrow = mci->csrows[0];
 	dimm = csrow->channels[0]->dimm;
@@ -753,7 +753,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 	if (!(ctl & MV64X60_SDRAM_ECC)) {
 		/* Non-ECC RAM? */
 		printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
@@ -779,10 +779,10 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
 	mv64x60_init_csrows(mci, pdata);
 
 	/* setup MC registers */
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
-	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
+	writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+	ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
 	ctl = (ctl & 0xff00ffff) | 0x10000;
-	out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
+	writel(ctl, pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
 
 	res = edac_mc_add_mc(mci);
 	if (res) {
@@ -853,10 +853,10 @@ static struct platform_driver * const drivers[] = {
 
 static int __init mv64x60_edac_init(void)
 {
-	int ret = 0;
 
 	printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n");
 	printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n");
+
 	/* make sure error reporting method is sane */
 	switch (edac_op_state) {
 	case EDAC_OPSTATE_POLL:

diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 1cad5a9..8e59949 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c

@@ -131,7 +131,7 @@ static struct mem_ctl_info *pnd2_mci;
 
 #ifdef CONFIG_X86_INTEL_SBI_APL
 #include "linux/platform_data/sbi_apl.h"
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
 	struct sbi_apl_message sbi_arg;
 	int ret, read = 0;
@@ -160,7 +160,7 @@ int sbi_send(int port, int off, int op, u32 *data)
 	return ret;
 }
 #else
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
 	return -EUNATCH;
 }
@@ -168,14 +168,15 @@ int sbi_send(int port, int off, int op, u32 *data)
 
 static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
 {
-	int	ret = 0;
+	int ret = 0;
 
 	edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
 	switch (sz) {
 	case 8:
 		ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+		/* fall through */
 	case 4:
-		ret = sbi_send(port, off, op, (u32 *)data);
+		ret |= sbi_send(port, off, op, (u32 *)data);
 		pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
 					sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
 		break;
@@ -423,16 +424,21 @@ static void dnv_mk_region(char *name, struct region *rp, void *asym)
 
 static int apl_get_registers(void)
 {
+	int ret = -ENODEV;
 	int i;
 
 	if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
 		return -ENODEV;
 
+	/*
+	 * RD_REGP() will fail for unpopulated or non-existent
+	 * DIMM slots. Return success if we find at least one DIMM.
+	 */
 	for (i = 0; i < APL_NUM_CHANNELS; i++)
-		if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
-			return -ENODEV;
+		if (!RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+			ret = 0;
 
-	return 0;
+	return ret;
 }
 
 static int dnv_get_registers(void)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index ea21cb6..80d860c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c

@@ -35,7 +35,7 @@ static LIST_HEAD(sbridge_edac_list);
 /*
  * Alter this version for the module when modifications are made
  */
-#define SBRIDGE_REVISION    " Ver: 1.1.1 "
+#define SBRIDGE_REVISION    " Ver: 1.1.2 "
 #define EDAC_MOD_STR      "sbridge_edac"
 
 /*
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS		8	/* 2MC per socket, four chan per MC */
+#define NUM_CHANNELS		4	/* Max channels per MC */
 #define MAX_DIMMS		3	/* Max DIMMS per channel */
 #define KNL_MAX_CHAS		38	/* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS	6	/* KNL max num. of PCI channels */
@@ -294,6 +294,12 @@ enum type {
 	KNIGHTS_LANDING,
 };
 
+enum domain {
+	IMC0 = 0,
+	IMC1,
+	SOCK,
+};
+
 struct sbridge_pvt;
 struct sbridge_info {
 	enum type	type;
@@ -324,11 +330,14 @@ struct sbridge_channel {
 struct pci_id_descr {
 	int			dev_id;
 	int			optional;
+	enum domain		dom;
 };
 
 struct pci_id_table {
 	const struct pci_id_descr	*descr;
-	int				n_devs;
+	int				n_devs_per_imc;
+	int				n_devs_per_sock;
+	int				n_imcs_per_sock;
 	enum type			type;
 };
 
@@ -337,7 +346,9 @@ struct sbridge_dev {
 	u8			bus, mc;
 	u8			node_id, source_id;
 	struct pci_dev		**pdev;
+	enum domain		dom;
 	int			n_devs;
+	int			i_devs;
 	struct mem_ctl_info	*mci;
 };
 
@@ -352,11 +363,12 @@ struct knl_pvt {
 };
 
 struct sbridge_pvt {
-	struct pci_dev		*pci_ta, *pci_ddrio, *pci_ras;
+	/* Devices per socket */
+	struct pci_dev		*pci_ddrio;
 	struct pci_dev		*pci_sad0, *pci_sad1;
-	struct pci_dev		*pci_ha0, *pci_ha1;
 	struct pci_dev		*pci_br0, *pci_br1;
-	struct pci_dev		*pci_ha1_ta;
+	/* Devices per memory controller */
+	struct pci_dev		*pci_ha, *pci_ta, *pci_ras;
 	struct pci_dev		*pci_tad[NUM_CHANNELS];
 
 	struct sbridge_dev	*sbridge_dev;
@@ -373,39 +385,42 @@ struct sbridge_pvt {
 	struct knl_pvt knl;
 };
 
-#define PCI_DESCR(device_id, opt)	\
+#define PCI_DESCR(device_id, opt, domain)	\
 	.dev_id = (device_id),		\
-	.optional = opt
+	.optional = opt,	\
+	.dom = domain
 
 static const struct pci_id_descr pci_dev_descr_sbridge[] = {
 		/* Processor Home Agent */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0,   0, IMC0) },
 
 		/* Memory controller */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA,    0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3,  0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1, SOCK) },
 
 		/* System Address Decoder */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0,      0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1,      0, SOCK) },
 
 		/* Broadcast Registers */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR,        0, SOCK) },
 };
 
-#define PCI_ID_TABLE_ENTRY(A, T) {	\
+#define PCI_ID_TABLE_ENTRY(A, N, M, T) {	\
 	.descr = A,			\
-	.n_devs = ARRAY_SIZE(A),	\
+	.n_devs_per_imc = N,	\
+	.n_devs_per_sock = ARRAY_SIZE(A),	\
+	.n_imcs_per_sock = M,	\
 	.type = T			\
 }
 
 static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -439,40 +454,39 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_ibridge[] = {
 		/* Processor Home Agent */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0,        0, IMC0) },
 
 		/* Memory controller */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0)	},
-
-		/* System Address Decoder */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0)			},
-
-		/* Broadcast Registers */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1)			},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0)			},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA,     0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS,    0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3,   0, IMC0) },
 
 		/* Optional, mode 2HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1)		},
-#if 0
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1)	},
-#endif
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1,        1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA,     1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS,    1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3,   1, IMC1) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1, SOCK) },
+
+		/* System Address Decoder */
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD,            0, SOCK) },
+
+		/* Broadcast Registers */
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0,            1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1,            0, SOCK) },
+
 };
 
 static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -498,9 +512,9 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0	0x2fa0
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1	0x2f60
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA	0x2fa8
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM	0x2f71
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA	0x2f68
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM	0x2f79
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa
@@ -517,35 +531,33 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
 static const struct pci_id_descr pci_dev_descr_haswell[] = {
 	/* first item must be the HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0,      0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1,      1, IMC1) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1, IMC0) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1, IMC1) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1)		},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1)		},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2,   1, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_haswell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -559,7 +571,7 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
 /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
 /* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */
-#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL  0x7843
+#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN     0x7843
 /* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_TA       0x7844
 /* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */
@@ -579,17 +591,17 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
  */
 
 static const struct pci_id_descr pci_dev_descr_knl[] = {
-	[0]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) },
-	[1]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) },
-	[2 ... 3]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)},
-	[4 ... 41]  = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) },
-	[42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) },
-	[48]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) },
-	[49]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) },
+	[0 ... 1]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC,    0, IMC0)},
+	[2 ... 7]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN,  0, IMC0) },
+	[8]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA,    0, IMC0) },
+	[9]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0, IMC0) },
+	[10]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0,  0, SOCK) },
+	[11]	    = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1,  0, SOCK) },
+	[12 ... 49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA,   0, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_knl_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING),
 	{0,}
 };
 
@@ -615,9 +627,9 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0	0x6fa0
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1	0x6f60
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA	0x6fa8
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM	0x6f71
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA	0x6f68
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM	0x6f79
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa
@@ -632,32 +644,30 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_broadwell[] = {
 	/* first item must be the HA */
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0,      0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1,      1, IMC1) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM,   0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1, IMC0) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1, IMC0) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1)		},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM,   1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1, IMC1) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1, IMC1) },
 
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1)	},
-
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1)	},
-	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1)	},
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0, SOCK) },
+	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -709,7 +719,8 @@ static inline int numcol(u32 mtr)
 	return 1 << cols;
 }
 
-static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
+static struct sbridge_dev *get_sbridge_dev(u8 bus, enum domain dom, int multi_bus,
+					   struct sbridge_dev *prev)
 {
 	struct sbridge_dev *sbridge_dev;
 
@@ -722,16 +733,19 @@ static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
 				struct sbridge_dev, list);
 	}
 
-	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-		if (sbridge_dev->bus == bus)
+	sbridge_dev = list_entry(prev ? prev->list.next
+				      : sbridge_edac_list.next, struct sbridge_dev, list);
+
+	list_for_each_entry_from(sbridge_dev, &sbridge_edac_list, list) {
+		if (sbridge_dev->bus == bus && (dom == SOCK || dom == sbridge_dev->dom))
 			return sbridge_dev;
 	}
 
 	return NULL;
 }
 
-static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
-					   const struct pci_id_table *table)
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus, enum domain dom,
+					     const struct pci_id_table *table)
 {
 	struct sbridge_dev *sbridge_dev;
 
@@ -739,15 +753,17 @@ static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
 	if (!sbridge_dev)
 		return NULL;
 
-	sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
-				   GFP_KERNEL);
+	sbridge_dev->pdev = kcalloc(table->n_devs_per_imc,
+				    sizeof(*sbridge_dev->pdev),
+				    GFP_KERNEL);
 	if (!sbridge_dev->pdev) {
 		kfree(sbridge_dev);
 		return NULL;
 	}
 
 	sbridge_dev->bus = bus;
-	sbridge_dev->n_devs = table->n_devs;
+	sbridge_dev->dom = dom;
+	sbridge_dev->n_devs = table->n_devs_per_imc;
 	list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
 
 	return sbridge_dev;
@@ -1044,79 +1060,6 @@ static int haswell_chan_hash(int idx, u64 addr)
 	return idx;
 }
 
-/****************************************************************************
-			Memory check routines
- ****************************************************************************/
-static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id)
-{
-	struct pci_dev *pdev = NULL;
-
-	do {
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev);
-		if (pdev && pdev->bus->number == bus)
-			break;
-	} while (pdev);
-
-	return pdev;
-}
-
-/**
- * check_if_ecc_is_active() - Checks if ECC is active
- * @bus:	Device bus
- * @type:	Memory controller type
- * returns: 0 in case ECC is active, -ENODEV if it can't be determined or
- *	    disabled
- */
-static int check_if_ecc_is_active(const u8 bus, enum type type)
-{
-	struct pci_dev *pdev = NULL;
-	u32 mcmtr, id;
-
-	switch (type) {
-	case IVY_BRIDGE:
-		id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA;
-		break;
-	case HASWELL:
-		id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA;
-		break;
-	case SANDY_BRIDGE:
-		id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA;
-		break;
-	case BROADWELL:
-		id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
-		break;
-	case KNIGHTS_LANDING:
-		/*
-		 * KNL doesn't group things by bus the same way
-		 * SB/IB/Haswell does.
-		 */
-		id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	if (type != KNIGHTS_LANDING)
-		pdev = get_pdev_same_bus(bus, id);
-	else
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0);
-
-	if (!pdev) {
-		sbridge_printk(KERN_ERR, "Couldn't find PCI device "
-					"%04x:%04x! on bus %02d\n",
-					PCI_VENDOR_ID_INTEL, id, bus);
-		return -ENODEV;
-	}
-
-	pci_read_config_dword(pdev,
-			type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr);
-	if (!IS_ECC_ENABLED(mcmtr)) {
-		sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
-		return -ENODEV;
-	}
-	return 0;
-}
-
 /* Low bits of TAD limit, and some metadata. */
 static const u32 knl_tad_dram_limit_lo[] = {
 	0x400, 0x500, 0x600, 0x700,
@@ -1587,25 +1530,13 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
 	return 0;
 }
 
-static int get_dimm_config(struct mem_ctl_info *mci)
+static void get_source_id(struct mem_ctl_info *mci)
 {
 	struct sbridge_pvt *pvt = mci->pvt_info;
-	struct dimm_info *dimm;
-	unsigned i, j, banks, ranks, rows, cols, npages;
-	u64 size;
 	u32 reg;
-	enum edac_type mode;
-	enum mem_type mtype;
-	int channels = pvt->info.type == KNIGHTS_LANDING ?
-		KNL_MAX_CHANNELS : NUM_CHANNELS;
-	u64 knl_mc_sizes[KNL_MAX_CHANNELS];
 
-	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
-		pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, &reg);
-		pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
-	}
 	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
-			pvt->info.type == KNIGHTS_LANDING)
+	    pvt->info.type == KNIGHTS_LANDING)
 		pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
 	else
 		pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
@@ -1614,50 +1545,19 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 		pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg);
 	else
 		pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+}
 
-	pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
-	edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
-		 pvt->sbridge_dev->mc,
-		 pvt->sbridge_dev->node_id,
-		 pvt->sbridge_dev->source_id);
-
-	/* KNL doesn't support mirroring or lockstep,
-	 * and is always closed page
-	 */
-	if (pvt->info.type == KNIGHTS_LANDING) {
-		mode = EDAC_S4ECD4ED;
-		pvt->is_mirrored = false;
-
-		if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
-			return -1;
-	} else {
-		pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
-		if (IS_MIRROR_ENABLED(reg)) {
-			edac_dbg(0, "Memory mirror is enabled\n");
-			pvt->is_mirrored = true;
-		} else {
-			edac_dbg(0, "Memory mirror is disabled\n");
-			pvt->is_mirrored = false;
-		}
-
-		pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
-		if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
-			edac_dbg(0, "Lockstep is enabled\n");
-			mode = EDAC_S8ECD8ED;
-			pvt->is_lockstep = true;
-		} else {
-			edac_dbg(0, "Lockstep is disabled\n");
-			mode = EDAC_S4ECD4ED;
-			pvt->is_lockstep = false;
-		}
-		if (IS_CLOSE_PG(pvt->info.mcmtr)) {
-			edac_dbg(0, "address map is on closed page mode\n");
-			pvt->is_close_pg = true;
-		} else {
-			edac_dbg(0, "address map is on open page mode\n");
-			pvt->is_close_pg = false;
-		}
-	}
+static int __populate_dimms(struct mem_ctl_info *mci,
+			    u64 knl_mc_sizes[KNL_MAX_CHANNELS],
+			    enum edac_type mode)
+{
+	struct sbridge_pvt *pvt = mci->pvt_info;
+	int channels = pvt->info.type == KNIGHTS_LANDING ? KNL_MAX_CHANNELS
+							 : NUM_CHANNELS;
+	unsigned int i, j, banks, ranks, rows, cols, npages;
+	struct dimm_info *dimm;
+	enum mem_type mtype;
+	u64 size;
 
 	mtype = pvt->info.get_memory_type(pvt);
 	if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4)
@@ -1688,8 +1588,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 		}
 
 		for (j = 0; j < max_dimms_per_channel; j++) {
-			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-				       i, j, 0);
+			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
 			if (pvt->info.type == KNIGHTS_LANDING) {
 				pci_read_config_dword(pvt->knl.pci_channel[i],
 					knl_mtr_reg, &mtr);
@@ -1699,6 +1598,12 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 			}
 			edac_dbg(4, "Channel #%d  MTR%d = %x\n", i, j, mtr);
 			if (IS_DIMM_PRESENT(mtr)) {
+				if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
+					sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
+						       pvt->sbridge_dev->source_id,
+						       pvt->sbridge_dev->dom, i);
+					return -ENODEV;
+				}
 				pvt->channel[i].dimms++;
 
 				ranks = numrank(pvt->info.type, mtr);
@@ -1717,7 +1622,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 				npages = MiB_TO_PAGES(size);
 
 				edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
-					 pvt->sbridge_dev->mc, i/4, i%4, j,
+					 pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
 					 size, npages,
 					 banks, ranks, rows, cols);
 
@@ -1727,8 +1632,8 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 				dimm->mtype = mtype;
 				dimm->edac_mode = mode;
 				snprintf(dimm->label, sizeof(dimm->label),
-					 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
-					 pvt->sbridge_dev->source_id, i/4, i%4, j);
+						 "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
+						 pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
 			}
 		}
 	}
@@ -1736,6 +1641,65 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 	return 0;
 }
 
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+	struct sbridge_pvt *pvt = mci->pvt_info;
+	u64 knl_mc_sizes[KNL_MAX_CHANNELS];
+	enum edac_type mode;
+	u32 reg;
+
+	if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+		pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
+		pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+	}
+	pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
+	edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+		 pvt->sbridge_dev->mc,
+		 pvt->sbridge_dev->node_id,
+		 pvt->sbridge_dev->source_id);
+
+	/* KNL doesn't support mirroring or lockstep,
+	 * and is always closed page
+	 */
+	if (pvt->info.type == KNIGHTS_LANDING) {
+		mode = EDAC_S4ECD4ED;
+		pvt->is_mirrored = false;
+
+		if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
+			return -1;
+		pci_read_config_dword(pvt->pci_ta, KNL_MCMTR, &pvt->info.mcmtr);
+	} else {
+		pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
+		if (IS_MIRROR_ENABLED(reg)) {
+			edac_dbg(0, "Memory mirror is enabled\n");
+			pvt->is_mirrored = true;
+		} else {
+			edac_dbg(0, "Memory mirror is disabled\n");
+			pvt->is_mirrored = false;
+		}
+
+		pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+		if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+			edac_dbg(0, "Lockstep is enabled\n");
+			mode = EDAC_S8ECD8ED;
+			pvt->is_lockstep = true;
+		} else {
+			edac_dbg(0, "Lockstep is disabled\n");
+			mode = EDAC_S4ECD4ED;
+			pvt->is_lockstep = false;
+		}
+		if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+			edac_dbg(0, "address map is on closed page mode\n");
+			pvt->is_close_pg = true;
+		} else {
+			edac_dbg(0, "address map is on open page mode\n");
+			pvt->is_close_pg = false;
+		}
+	}
+
+	return __populate_dimms(mci, knl_mc_sizes, mode);
+}
+
 static void get_memory_layout(const struct mem_ctl_info *mci)
 {
 	struct sbridge_pvt *pvt = mci->pvt_info;
@@ -1816,8 +1780,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	 */
 	prv = 0;
 	for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
-		pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
-				      &reg);
+		pci_read_config_dword(pvt->pci_ha, tad_dram_rule[n_tads], &reg);
 		limit = TAD_LIMIT(reg);
 		if (limit <= prv)
 			break;
@@ -1899,12 +1862,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	}
 }
 
-static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
 {
 	struct sbridge_dev *sbridge_dev;
 
 	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-		if (sbridge_dev->node_id == node_id)
+		if (sbridge_dev->node_id == node_id && sbridge_dev->dom == ha)
 			return sbridge_dev->mci;
 	}
 	return NULL;
@@ -1925,7 +1888,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	int			interleave_mode, shiftup = 0;
 	unsigned		sad_interleave[pvt->info.max_interleave];
 	u32			reg, dram_rule;
-	u8			ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0;
+	u8			ch_way, sck_way, pkg, sad_ha = 0;
 	u32			tad_offset;
 	u32			rir_way;
 	u32			mb, gb;
@@ -2038,13 +2001,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
 		*socket = sad_pkg_socket(pkg);
 		sad_ha = sad_pkg_ha(pkg);
-		if (sad_ha)
-			ch_add = 4;
 
 		if (a7mode) {
 			/* MCChanShiftUpEnable */
-			pci_read_config_dword(pvt->pci_ha0,
-					      HASWELL_HASYSDEFEATURE2, &reg);
+			pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
 			shiftup = GET_BITFIELD(reg, 22, 22);
 		}
 
@@ -2056,8 +2016,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
 		*socket = sad_pkg_socket(pkg);
 		sad_ha = sad_pkg_ha(pkg);
-		if (sad_ha)
-			ch_add = 4;
 		edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
 			 idx, *socket, sad_ha);
 	}
@@ -2068,7 +2026,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Move to the proper node structure, in order to access the
 	 * right PCI registers
 	 */
-	new_mci = get_mci_for_node_id(*socket);
+	new_mci = get_mci_for_node_id(*socket, sad_ha);
 	if (!new_mci) {
 		sprintf(msg, "Struct for socket #%u wasn't initialized",
 			*socket);
@@ -2081,14 +2039,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Step 2) Get memory channel
 	 */
 	prv = 0;
-	if (pvt->info.type == SANDY_BRIDGE)
-		pci_ha = pvt->pci_ha0;
-	else {
-		if (sad_ha)
-			pci_ha = pvt->pci_ha1;
-		else
-			pci_ha = pvt->pci_ha0;
-	}
+	pci_ha = pvt->pci_ha;
 	for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
 		pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], &reg);
 		limit = TAD_LIMIT(reg);
@@ -2139,9 +2090,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 	*channel_mask = 1 << base_ch;
 
-	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-				tad_ch_nilv_offset[n_tads],
-				&tad_offset);
+	pci_read_config_dword(pvt->pci_tad[base_ch], tad_ch_nilv_offset[n_tads], &tad_offset);
 
 	if (pvt->is_mirrored) {
 		*channel_mask |= 1 << ((base_ch + 2) % 4);
@@ -2192,9 +2141,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	 * Step 3) Decode rank
 	 */
 	for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
-		pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-				      rir_way_limit[n_rir],
-				      &reg);
+		pci_read_config_dword(pvt->pci_tad[base_ch], rir_way_limit[n_rir], &reg);
 
 		if (!IS_RIR_VALID(reg))
 			continue;
@@ -2222,9 +2169,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		idx = (ch_addr >> 13);	/* FIXME: Datasheet says to shift by 15 */
 	idx %= 1 << rir_way;
 
-	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-			      rir_offset[n_rir][idx],
-			      &reg);
+	pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], &reg);
 	*rank = RIR_RNK_TGT(pvt->info.type, reg);
 
 	edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
@@ -2277,10 +2222,11 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 				 const unsigned devno,
 				 const int multi_bus)
 {
-	struct sbridge_dev *sbridge_dev;
+	struct sbridge_dev *sbridge_dev = NULL;
 	const struct pci_id_descr *dev_descr = &table->descr[devno];
 	struct pci_dev *pdev = NULL;
 	u8 bus = 0;
+	int i = 0;
 
 	sbridge_printk(KERN_DEBUG,
 		"Seeking for: PCI ID %04x:%04x\n",
@@ -2311,9 +2257,14 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 	}
 	bus = pdev->bus->number;
 
-	sbridge_dev = get_sbridge_dev(bus, multi_bus);
+next_imc:
+	sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
 	if (!sbridge_dev) {
-		sbridge_dev = alloc_sbridge_dev(bus, table);
+
+		if (dev_descr->dom == SOCK)
+			goto out_imc;
+
+		sbridge_dev = alloc_sbridge_dev(bus, dev_descr->dom, table);
 		if (!sbridge_dev) {
 			pci_dev_put(pdev);
 			return -ENOMEM;
@@ -2321,7 +2272,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 		(*num_mc)++;
 	}
 
-	if (sbridge_dev->pdev[devno]) {
+	if (sbridge_dev->pdev[sbridge_dev->i_devs]) {
 		sbridge_printk(KERN_ERR,
 			"Duplicated device for %04x:%04x\n",
 			PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
@@ -2329,8 +2280,16 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
 		return -ENODEV;
 	}
 
-	sbridge_dev->pdev[devno] = pdev;
+	sbridge_dev->pdev[sbridge_dev->i_devs++] = pdev;
 
+	/* pdev belongs to more than one IMC, do extra gets */
+	if (++i > 1)
+		pci_dev_get(pdev);
+
+	if (dev_descr->dom == SOCK && i < table->n_imcs_per_sock)
+		goto next_imc;
+
+out_imc:
 	/* Be sure that the device is enabled */
 	if (unlikely(pci_enable_device(pdev) < 0)) {
 		sbridge_printk(KERN_ERR,
@@ -2374,7 +2333,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
 	if (table->type == KNIGHTS_LANDING)
 		allow_dups = multi_bus = 1;
 	while (table && table->descr) {
-		for (i = 0; i < table->n_devs; i++) {
+		for (i = 0; i < table->n_devs_per_sock; i++) {
 			if (!allow_dups || i == 0 ||
 					table->descr[i].dev_id !=
 						table->descr[i-1].dev_id) {
@@ -2385,7 +2344,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
 							   table, i, multi_bus);
 				if (rc < 0) {
 					if (i == 0) {
-						i = table->n_devs;
+						i = table->n_devs_per_sock;
 						break;
 					}
 					sbridge_put_all_devices();
@@ -2399,6 +2358,13 @@ static int sbridge_get_all_devices(u8 *num_mc,
 	return 0;
 }
 
+/*
+ * Device IDs for {SBRIDGE,IBRIDGE,HASWELL,BROADWELL}_IMC_HA0_TAD0 are in
+ * the format: XXXa. So we can convert from a device to the corresponding
+ * channel like this
+ */
+#define TAD_DEV_TO_CHAN(dev) (((dev) & 0xf) - 0xa)
+
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 				 struct sbridge_dev *sbridge_dev)
 {
@@ -2423,7 +2389,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_br0 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA:
 			pvt->pci_ta = pdev;
@@ -2436,7 +2402,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2:
 		case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2455,7 +2421,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+	if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha ||
 	    !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
@@ -2488,19 +2454,26 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 
 		switch (pdev->device) {
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
+		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2520,19 +2493,6 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1:
 			pvt->pci_br1 = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
-		case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4;
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		default:
 			goto error;
 		}
@@ -2544,13 +2504,12 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_br0 ||
 	    !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2593,32 +2552,27 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_sad1 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM:
+		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0;
-
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2:
 		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4;
-
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2630,12 +2584,6 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 			if (!pvt->pci_ddrio)
 				pvt->pci_ddrio = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
-			pvt->pci_ha1_ta = pdev;
-			break;
 		default:
 			break;
 		}
@@ -2647,13 +2595,12 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
 	    !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2690,30 +2637,27 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->pci_sad1 = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
-			pvt->pci_ha0 = pdev;
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
+			pvt->pci_ha = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
 			pvt->pci_ta = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM:
+		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM:
 			pvt->pci_ras = pdev;
 			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3:
-		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0;
-			pvt->pci_tad[id] = pdev;
-			saw_chan_mask |= 1 << id;
-		}
-			break;
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2:
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3:
 		{
-			int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4;
+			int id = TAD_DEV_TO_CHAN(pdev->device);
 			pvt->pci_tad[id] = pdev;
 			saw_chan_mask |= 1 << id;
 		}
@@ -2721,12 +2665,6 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0:
 			pvt->pci_ddrio = pdev;
 			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
-			pvt->pci_ha1 = pdev;
-			break;
-		case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
-			pvt->pci_ha1_ta = pdev;
-			break;
 		default:
 			break;
 		}
@@ -2738,13 +2676,12 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
 	}
 
 	/* Check if everything were registered */
-	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+	if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
 	    !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
 		goto enodev;
 
-	if (saw_chan_mask != 0x0f && /* -EN */
-	    saw_chan_mask != 0x33 && /* -EP */
-	    saw_chan_mask != 0xff)   /* -EX */
+	if (saw_chan_mask != 0x0f && /* -EN/-EX */
+	    saw_chan_mask != 0x03)   /* -EP */
 		goto enodev;
 	return 0;
 
@@ -2812,7 +2749,7 @@ static int knl_mci_bind_devs(struct mem_ctl_info *mci,
 			pvt->knl.pci_cha[devidx] = pdev;
 			break;
 
-		case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL:
+		case PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN:
 			devidx = -1;
 
 			/*
@@ -3006,7 +2943,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 
 	if (rc < 0)
 		goto err_parsing;
-	new_mci = get_mci_for_node_id(socket);
+	new_mci = get_mci_for_node_id(socket, ha);
 	if (!new_mci) {
 		strcpy(msg, "Error: socket got corrupted!");
 		goto err_parsing;
@@ -3053,7 +2990,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 	/* Call the helper to output message */
 	edac_mc_handle_error(tp_event, mci, core_err_cnt,
 			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
-			     4*ha+channel, dimm, -1,
+			     channel, dimm, -1,
 			     optype, msg);
 	return;
 err_parsing:
@@ -3078,7 +3015,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
-	mci = get_mci_for_node_id(mce->socketid);
+	mci = get_mci_for_node_id(mce->socketid, IMC0);
 	if (!mci)
 		return NOTIFY_DONE;
 	pvt = mci->pvt_info;
@@ -3159,11 +3096,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 	struct pci_dev *pdev = sbridge_dev->pdev[0];
 	int rc;
 
-	/* Check the number of active and not disabled channels */
-	rc = check_if_ecc_is_active(sbridge_dev->bus, type);
-	if (unlikely(rc < 0))
-		return rc;
-
 	/* allocate a new MC control structure */
 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
 	layers[0].size = type == KNIGHTS_LANDING ?
@@ -3192,7 +3124,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		MEM_FLAG_DDR4 : MEM_FLAG_DDR3;
 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
 	mci->edac_cap = EDAC_FLAG_NONE;
-	mci->mod_name = "sbridge_edac.c";
+	mci->mod_name = "sb_edac.c";
 	mci->mod_ver = SBRIDGE_REVISION;
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
@@ -3215,12 +3147,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = ibridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = ibridge_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case SANDY_BRIDGE:
 		pvt->info.rankcfgr = SB_RANK_CFG_A;
@@ -3238,12 +3172,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
 		pvt->info.interleave_pkg = sbridge_interleave_pkg;
 		pvt->info.get_width = sbridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = sbridge_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case HASWELL:
 		/* rankcfgr isn't used */
@@ -3261,12 +3197,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = ibridge_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = haswell_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case BROADWELL:
 		/* rankcfgr isn't used */
@@ -3284,12 +3222,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = broadwell_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
 
 		/* Store pci devices at mci for faster access */
 		rc = broadwell_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	case KNIGHTS_LANDING:
 		/* pvt->info.rankcfgr == ??? */
@@ -3307,17 +3247,22 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 		pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list);
 		pvt->info.interleave_pkg = ibridge_interleave_pkg;
 		pvt->info.get_width = knl_get_width;
-		mci->ctl_name = kasprintf(GFP_KERNEL,
-			"Knights Landing Socket#%d", mci->mc_idx);
 
 		rc = knl_mci_bind_devs(mci, sbridge_dev);
 		if (unlikely(rc < 0))
 			goto fail0;
+		get_source_id(mci);
+		mci->ctl_name = kasprintf(GFP_KERNEL, "Knights Landing SrcID#%d_Ha#%d",
+			pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
 		break;
 	}
 
 	/* Get dimm basic config and the memory layout */
-	get_dimm_config(mci);
+	rc = get_dimm_config(mci);
+	if (rc < 0) {
+		edac_dbg(0, "MC: failed to get_dimm_config()\n");
+		goto fail;
+	}
 	get_memory_layout(mci);
 
 	/* record ptr to the generic device */
@@ -3327,13 +3272,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
 	if (unlikely(edac_mc_add_mc(mci))) {
 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
 		rc = -EINVAL;
-		goto fail0;
+		goto fail;
 	}
 
 	return 0;
 
-fail0:
+fail:
 	kfree(mci->ctl_name);
+fail0:
 	edac_mc_free(mci);
 	sbridge_dev->mci = NULL;
 	return rc;

diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 86d585c..2d352b4 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c

@@ -2080,7 +2080,7 @@ static int thunderx_l2c_probe(struct pci_dev *pdev,
 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
 		l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
 
-		thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+		ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
 					      l2c, dfs_entries);
 
 		if (ret != dfs_entries) {

diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 44c0139..951b6c7 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c

@@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name,		0444, DMI_PRODUCT_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(product_version,	0444, DMI_PRODUCT_VERSION);
 DEFINE_DMI_ATTR_WITH_SHOW(product_serial,	0400, DMI_PRODUCT_SERIAL);
 DEFINE_DMI_ATTR_WITH_SHOW(product_uuid,		0400, DMI_PRODUCT_UUID);
+DEFINE_DMI_ATTR_WITH_SHOW(product_family,	0444, DMI_PRODUCT_FAMILY);
 DEFINE_DMI_ATTR_WITH_SHOW(board_vendor,		0444, DMI_BOARD_VENDOR);
 DEFINE_DMI_ATTR_WITH_SHOW(board_name,		0444, DMI_BOARD_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(board_version,	0444, DMI_BOARD_VERSION);
@@ -191,6 +192,7 @@ static void __init dmi_id_init_attr_table(void)
 	ADD_DMI_ATTR(product_version,   DMI_PRODUCT_VERSION);
 	ADD_DMI_ATTR(product_serial,    DMI_PRODUCT_SERIAL);
 	ADD_DMI_ATTR(product_uuid,      DMI_PRODUCT_UUID);
+	ADD_DMI_ATTR(product_family,    DMI_PRODUCT_FAMILY);
 	ADD_DMI_ATTR(board_vendor,      DMI_BOARD_VENDOR);
 	ADD_DMI_ATTR(board_name,        DMI_BOARD_NAME);
 	ADD_DMI_ATTR(board_version,     DMI_BOARD_VERSION);

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 54be60e..7830419 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c

@@ -144,7 +144,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 
 	buf = dmi_early_remap(dmi_base, orig_dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, NULL);
 
@@ -178,7 +178,7 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
 	const char *d = (const char *) dm;
 	const char *p;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= string)
 		return;
 
 	p = dmi_string(dm, d[string]);
@@ -191,13 +191,14 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
 static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 		int index)
 {
-	const u8 *d = (u8 *) dm + index;
+	const u8 *d;
 	char *s;
 	int is_ff = 1, is_00 = 1, i;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= index + 16)
 		return;
 
+	d = (u8 *) dm + index;
 	for (i = 0; i < 16 && (is_ff || is_00); i++) {
 		if (d[i] != 0x00)
 			is_00 = 0;
@@ -228,16 +229,17 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 static void __init dmi_save_type(const struct dmi_header *dm, int slot,
 		int index)
 {
-	const u8 *d = (u8 *) dm + index;
+	const u8 *d;
 	char *s;
 
-	if (dmi_ident[slot])
+	if (dmi_ident[slot] || dm->length <= index)
 		return;
 
 	s = dmi_alloc(4);
 	if (!s)
 		return;
 
+	d = (u8 *) dm + index;
 	sprintf(s, "%u", *d & 0x7F);
 	dmi_ident[slot] = s;
 }
@@ -278,9 +280,13 @@ static void __init dmi_save_devices(const struct dmi_header *dm)
 
 static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 {
-	int i, count = *(u8 *)(dm + 1);
+	int i, count;
 	struct dmi_device *dev;
 
+	if (dm->length < 0x05)
+		return;
+
+	count = *(u8 *)(dm + 1);
 	for (i = 1; i <= count; i++) {
 		const char *devname = dmi_string(dm, i);
 
@@ -353,6 +359,9 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 	const char *name;
 	const u8 *d = (u8 *)dm;
 
+	if (dm->length < 0x0B)
+		return;
+
 	/* Skip disabled device */
 	if ((d[0x5] & 0x80) == 0)
 		return;
@@ -387,7 +396,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
 	const char *d = (const char *)dm;
 	static int nr;
 
-	if (dm->type != DMI_ENTRY_MEM_DEVICE)
+	if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x12)
 		return;
 	if (nr >= dmi_memdev_nr) {
 		pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
@@ -430,6 +439,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy)
 		dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
 		dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
 		dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8);
+		dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26);
 		break;
 	case 2:		/* Base Board Information */
 		dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
@@ -649,6 +659,21 @@ void __init dmi_scan_machine(void)
 			goto error;
 
 		/*
+		 * Same logic as above, look for a 64-bit entry point
+		 * first, and if not found, fall back to 32-bit entry point.
+		 */
+		memcpy_fromio(buf, p, 16);
+		for (q = p + 16; q < p + 0x10000; q += 16) {
+			memcpy_fromio(buf + 16, q, 16);
+			if (!dmi_smbios3_present(buf)) {
+				dmi_available = 1;
+				dmi_early_unmap(p, 0x10000);
+				goto out;
+			}
+			memcpy(buf, buf + 16, 16);
+		}
+
+		/*
 		 * Iterate over all possible DMI header addresses q.
 		 * Maintain the 32 bytes around q in buf.  On the
 		 * first iteration, substitute zero for the
@@ -658,7 +683,7 @@ void __init dmi_scan_machine(void)
 		memset(buf, 0, 16);
 		for (q = p; q < p + 0x10000; q += 16) {
 			memcpy_fromio(buf + 16, q, 16);
-			if (!dmi_smbios3_present(buf) || !dmi_present(buf)) {
+			if (!dmi_present(buf)) {
 				dmi_available = 1;
 				dmi_early_unmap(p, 0x10000);
 				goto out;
@@ -992,7 +1017,8 @@ EXPORT_SYMBOL(dmi_get_date);
  *	@decode: Callback function
  *	@private_data: Private data to be passed to the callback function
  *
- *	Returns -1 when the DMI table can't be reached, 0 on success.
+ *	Returns 0 on success, -ENXIO if DMI is not selected or not present,
+ *	or a different negative error code if DMI walking fails.
  */
 int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	     void *private_data)
@@ -1000,11 +1026,11 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	u8 *buf;
 
 	if (!dmi_available)
-		return -1;
+		return -ENXIO;
 
 	buf = dmi_remap(dmi_base, dmi_len);
 	if (buf == NULL)
-		return -1;
+		return -ENOMEM;
 
 	dmi_decode_table(buf, decode, private_data);
 

diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 2e78b0b..394db40 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig

@@ -112,6 +112,15 @@
 
 	  Most users should say N.
 
+config EFI_CAPSULE_QUIRK_QUARK_CSH
+	boolean "Add support for Quark capsules with non-standard headers"
+	depends on X86 && !64BIT
+	select EFI_CAPSULE_LOADER
+	default y
+	help
+	  Add support for processing Quark X1000 EFI capsules, whose header
+	  layout deviates from the layout mandated by the UEFI specification.
+
 config EFI_TEST
 	tristate "EFI Runtime Service Tests Support"
 	depends on EFI

diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 974c5a3..1cc41c3 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c

@@ -11,6 +11,7 @@
  *
  */
 
+#include <linux/dmi.h>
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
@@ -166,3 +167,18 @@ void efi_virtmap_unload(void)
 	efi_set_pgd(current->active_mm);
 	preempt_enable();
 }
+
+
+static int __init arm_dmi_init(void)
+{
+	/*
+	 * On arm64/ARM, DMI depends on UEFI, and dmi_scan_machine() needs to
+	 * be called early because dmi_id_init(), which is an arch_initcall
+	 * itself, depends on dmi_scan_machine() having been called already.
+	 */
+	dmi_scan_machine();
+	if (dmi_available)
+		dmi_set_dump_stack_arch_desc();
+	return 0;
+}
+core_initcall(arm_dmi_init);

diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index 9ae6c11..ec8ac5c 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c

@@ -20,15 +20,9 @@
 
 #define NO_FURTHER_WRITE_ACTION -1
 
-struct capsule_info {
-	bool		header_obtained;
-	int		reset_type;
-	long		index;
-	size_t		count;
-	size_t		total_size;
-	struct page	**pages;
-	size_t		page_bytes_remain;
-};
+#ifndef phys_to_page
+#define phys_to_page(x)		pfn_to_page((x) >> PAGE_SHIFT)
+#endif
 
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
@@ -41,62 +35,67 @@ struct capsule_info {
 static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 {
 	while (cap_info->index > 0)
-		__free_page(cap_info->pages[--cap_info->index]);
+		__free_page(phys_to_page(cap_info->pages[--cap_info->index]));
 
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
 
+int __efi_capsule_setup_info(struct capsule_info *cap_info)
+{
+	size_t pages_needed;
+	int ret;
+	void *temp_page;
+
+	pages_needed = ALIGN(cap_info->total_size, PAGE_SIZE) / PAGE_SIZE;
+
+	if (pages_needed == 0) {
+		pr_err("invalid capsule size");
+		return -EINVAL;
+	}
+
+	/* Check if the capsule binary supported */
+	ret = efi_capsule_supported(cap_info->header.guid,
+				    cap_info->header.flags,
+				    cap_info->header.imagesize,
+				    &cap_info->reset_type);
+	if (ret) {
+		pr_err("capsule not supported\n");
+		return ret;
+	}
+
+	temp_page = krealloc(cap_info->pages,
+			     pages_needed * sizeof(void *),
+			     GFP_KERNEL | __GFP_ZERO);
+	if (!temp_page)
+		return -ENOMEM;
+
+	cap_info->pages = temp_page;
+
+	return 0;
+}
+
 /**
  * efi_capsule_setup_info - obtain the efi capsule header in the binary and
  *			    setup capsule_info structure
  * @cap_info: pointer to current instance of capsule_info structure
  * @kbuff: a mapped first page buffer pointer
  * @hdr_bytes: the total received number of bytes for efi header
+ *
+ * Platforms with non-standard capsule update mechanisms can override
+ * this __weak function so they can perform any required capsule
+ * image munging. See quark_quirk_function() for an example.
  **/
-static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
-				      void *kbuff, size_t hdr_bytes)
+int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+				  size_t hdr_bytes)
 {
-	efi_capsule_header_t *cap_hdr;
-	size_t pages_needed;
-	int ret;
-	void *temp_page;
-
 	/* Only process data block that is larger than efi header size */
 	if (hdr_bytes < sizeof(efi_capsule_header_t))
 		return 0;
 
-	/* Reset back to the correct offset of header */
-	cap_hdr = kbuff - cap_info->count;
-	pages_needed = ALIGN(cap_hdr->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+	memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+	cap_info->total_size = cap_info->header.imagesize;
 
-	if (pages_needed == 0) {
-		pr_err("%s: pages count invalid\n", __func__);
-		return -EINVAL;
-	}
-
-	/* Check if the capsule binary supported */
-	ret = efi_capsule_supported(cap_hdr->guid, cap_hdr->flags,
-				    cap_hdr->imagesize,
-				    &cap_info->reset_type);
-	if (ret) {
-		pr_err("%s: efi_capsule_supported() failed\n",
-		       __func__);
-		return ret;
-	}
-
-	cap_info->total_size = cap_hdr->imagesize;
-	temp_page = krealloc(cap_info->pages,
-			     pages_needed * sizeof(void *),
-			     GFP_KERNEL | __GFP_ZERO);
-	if (!temp_page) {
-		pr_debug("%s: krealloc() failed\n", __func__);
-		return -ENOMEM;
-	}
-
-	cap_info->pages = temp_page;
-	cap_info->header_obtained = true;
-
-	return 0;
+	return __efi_capsule_setup_info(cap_info);
 }
 
 /**
@@ -107,26 +106,17 @@ static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
 static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
 {
 	int ret;
-	void *cap_hdr_temp;
 
-	cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
-			VM_MAP, PAGE_KERNEL);
-	if (!cap_hdr_temp) {
-		pr_debug("%s: vmap() failed\n", __func__);
-		return -EFAULT;
-	}
-
-	ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-	vunmap(cap_hdr_temp);
+	ret = efi_capsule_update(&cap_info->header, cap_info->pages);
 	if (ret) {
-		pr_err("%s: efi_capsule_update() failed\n", __func__);
+		pr_err("capsule update failed\n");
 		return ret;
 	}
 
 	/* Indicate capsule binary uploading is done */
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
-	pr_info("%s: Successfully upload capsule file with reboot type '%s'\n",
-		__func__, !cap_info->reset_type ? "RESET_COLD" :
+	pr_info("Successfully upload capsule file with reboot type '%s'\n",
+		!cap_info->reset_type ? "RESET_COLD" :
 		cap_info->reset_type == 1 ? "RESET_WARM" :
 		"RESET_SHUTDOWN");
 	return 0;
@@ -171,37 +161,30 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 	if (!cap_info->page_bytes_remain) {
 		page = alloc_page(GFP_KERNEL);
 		if (!page) {
-			pr_debug("%s: alloc_page() failed\n", __func__);
 			ret = -ENOMEM;
 			goto failed;
 		}
 
-		cap_info->pages[cap_info->index++] = page;
+		cap_info->pages[cap_info->index++] = page_to_phys(page);
 		cap_info->page_bytes_remain = PAGE_SIZE;
+	} else {
+		page = phys_to_page(cap_info->pages[cap_info->index - 1]);
 	}
 
-	page = cap_info->pages[cap_info->index - 1];
-
 	kbuff = kmap(page);
-	if (!kbuff) {
-		pr_debug("%s: kmap() failed\n", __func__);
-		ret = -EFAULT;
-		goto failed;
-	}
 	kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
 
 	/* Copy capsule binary data from user space to kernel space buffer */
 	write_byte = min_t(size_t, count, cap_info->page_bytes_remain);
 	if (copy_from_user(kbuff, buff, write_byte)) {
-		pr_debug("%s: copy_from_user() failed\n", __func__);
 		ret = -EFAULT;
 		goto fail_unmap;
 	}
 	cap_info->page_bytes_remain -= write_byte;
 
 	/* Setup capsule binary info structure */
-	if (!cap_info->header_obtained) {
-		ret = efi_capsule_setup_info(cap_info, kbuff,
+	if (cap_info->header.headersize == 0) {
+		ret = efi_capsule_setup_info(cap_info, kbuff - cap_info->count,
 					     cap_info->count + write_byte);
 		if (ret)
 			goto fail_unmap;
@@ -211,11 +194,10 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
 	kunmap(page);
 
 	/* Submit the full binary to efi_capsule_update() API */
-	if (cap_info->header_obtained &&
+	if (cap_info->header.headersize > 0 &&
 	    cap_info->count >= cap_info->total_size) {
 		if (cap_info->count > cap_info->total_size) {
-			pr_err("%s: upload size exceeded header defined size\n",
-			       __func__);
+			pr_err("capsule upload size exceeded header defined size\n");
 			ret = -EINVAL;
 			goto failed;
 		}
@@ -249,7 +231,7 @@ static int efi_capsule_flush(struct file *file, fl_owner_t id)
 	struct capsule_info *cap_info = file->private_data;
 
 	if (cap_info->index > 0) {
-		pr_err("%s: capsule upload not complete\n", __func__);
+		pr_err("capsule upload not complete\n");
 		efi_free_all_buff_pages(cap_info);
 		ret = -ECANCELED;
 	}
@@ -328,8 +310,7 @@ static int __init efi_capsule_loader_init(void)
 
 	ret = misc_register(&efi_capsule_misc);
 	if (ret)
-		pr_err("%s: Failed to register misc char file note\n",
-		       __func__);
+		pr_err("Unable to register capsule loader device\n");
 
 	return ret;
 }

diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 6eedff4..901b930 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c

@@ -214,7 +214,7 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
  *
  * Return 0 on success, a converted EFI status code on failure.
  */
-int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+int efi_capsule_update(efi_capsule_header_t *capsule, phys_addr_t *pages)
 {
 	u32 imagesize = capsule->imagesize;
 	efi_guid_t guid = capsule->guid;
@@ -247,16 +247,13 @@ int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
 		efi_capsule_block_desc_t *sglist;
 
 		sglist = kmap(sg_pages[i]);
-		if (!sglist) {
-			rv = -ENOMEM;
-			goto out;
-		}
 
 		for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
-			u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+			u64 sz = min_t(u64, imagesize,
+				       PAGE_SIZE - (u64)*pages % PAGE_SIZE);
 
 			sglist[j].length = sz;
-			sglist[j].data = page_to_phys(*pages++);
+			sglist[j].data = *pages++;
 
 			imagesize -= sz;
 			count--;

diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c
index 04ca876..b58233e 100644
--- a/drivers/firmware/efi/efi-bgrt.c
+++ b/drivers/firmware/efi/efi-bgrt.c

@@ -27,6 +27,26 @@ struct bmp_header {
 	u32 size;
 } __packed;
 
+static bool efi_bgrt_addr_valid(u64 addr)
+{
+	efi_memory_desc_t *md;
+
+	for_each_efi_memory_desc(md) {
+		u64 size;
+		u64 end;
+
+		if (md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+
+		size = md->num_pages << EFI_PAGE_SHIFT;
+		end = md->phys_addr + size;
+		if (addr >= md->phys_addr && addr < end)
+			return true;
+	}
+
+	return false;
+}
+
 void __init efi_bgrt_init(struct acpi_table_header *table)
 {
 	void *image;
@@ -36,6 +56,9 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
 	if (acpi_disabled)
 		return;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	if (table->length < sizeof(bgrt_tab)) {
 		pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n",
 		       table->length, sizeof(bgrt_tab));
@@ -62,6 +85,10 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
 		goto out;
 	}
 
+	if (!efi_bgrt_addr_valid(bgrt->image_address)) {
+		pr_notice("Ignoring BGRT: invalid image address\n");
+		goto out;
+	}
 	image = early_memremap(bgrt->image_address, sizeof(bmp_header));
 	if (!image) {
 		pr_notice("Ignoring BGRT: failed to map image header memory\n");

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index ab3a951a..ef1fafd 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c

@@ -53,6 +53,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	if (sscanf(name, "dump-type%u-%u-%d-%lu-%c",
 		   &record->type, &part, &cnt, &time, &data_type) == 5) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -64,6 +65,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 	} else if (sscanf(name, "dump-type%u-%u-%d-%lu",
 		   &record->type, &part, &cnt, &time) == 4) {
 		record->id = generic_id(time, part, cnt);
+		record->part = part;
 		record->count = cnt;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -77,6 +79,7 @@ static int efi_pstore_read_func(struct efivar_entry *entry,
 		 * multiple logs, remains.
 		 */
 		record->id = generic_id(time, part, 0);
+		record->part = part;
 		record->count = 0;
 		record->time.tv_sec = time;
 		record->time.tv_nsec = 0;
@@ -241,9 +244,15 @@ static int efi_pstore_write(struct pstore_record *record)
 	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
 	int i, ret = 0;
 
+	record->time.tv_sec = get_seconds();
+	record->time.tv_nsec = 0;
+
+	record->id = generic_id(record->time.tv_sec, record->part,
+				record->count);
+
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu-%c",
 		 record->type, record->part, record->count,
-		 get_seconds(), record->compressed ? 'C' : 'D');
+		 record->time.tv_sec, record->compressed ? 'C' : 'D');
 
 	for (i = 0; i < DUMP_NAME_LEN; i++)
 		efi_name[i] = name[i];
@@ -255,7 +264,6 @@ static int efi_pstore_write(struct pstore_record *record)
 	if (record->reason == KMSG_DUMP_OOPS)
 		efivar_run_worker();
 
-	record->id = record->part;
 	return ret;
 };
 
@@ -287,7 +295,7 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
 		 * holding multiple logs, remains.
 		 */
 		snprintf(name_old, sizeof(name_old), "dump-type%u-%u-%lu",
-			ed->record->type, (unsigned int)ed->record->id,
+			ed->record->type, ed->record->part,
 			ed->record->time.tv_sec);
 
 		for (i = 0; i < DUMP_NAME_LEN; i++)
@@ -320,10 +328,7 @@ static int efi_pstore_erase(struct pstore_record *record)
 	char name[DUMP_NAME_LEN];
 	efi_char16_t efi_name[DUMP_NAME_LEN];
 	int found, i;
-	unsigned int part;
 
-	do_div(record->id, 1000);
-	part = do_div(record->id, 100);
 	snprintf(name, sizeof(name), "dump-type%u-%u-%d-%lu",
 		 record->type, record->part, record->count,
 		 record->time.tv_sec);

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index b372aad..045d6d3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c

@@ -528,7 +528,8 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 		}
 	}
 
-	efi_memattr_init();
+	if (efi_enabled(EFI_MEMMAP))
+		efi_memattr_init();
 
 	/* Parse the EFI Properties table if it exists */
 	if (efi.properties_table != EFI_INVALID_TABLE_ADDR) {

diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 8c34d50..959777e 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c

@@ -16,10 +16,10 @@
 
 /* BIOS variables */
 static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
-static const efi_char16_t const efi_SecureBoot_name[] = {
+static const efi_char16_t efi_SecureBoot_name[] = {
 	'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0
 };
-static const efi_char16_t const efi_SetupMode_name[] = {
+static const efi_char16_t efi_SetupMode_name[] = {
 	'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0
 };
 

diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index 8cd578f..08129b7 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c

@@ -71,18 +71,13 @@ copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
 	if (!access_ok(VERIFY_READ, src, 1))
 		return -EFAULT;
 
-	buf = kmalloc(len, GFP_KERNEL);
-	if (!buf) {
+	buf = memdup_user(src, len);
+	if (IS_ERR(buf)) {
 		*dst = NULL;
-		return -ENOMEM;
+		return PTR_ERR(buf);
 	}
 	*dst = buf;
 
-	if (copy_from_user(*dst, src, len)) {
-		kfree(buf);
-		return -EFAULT;
-	}
-
 	return 0;
 }
 

diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index 1e7860f..31058d40 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c

@@ -136,12 +136,12 @@ static int vpd_section_attrib_add(const u8 *key, s32 key_len,
 	info->value = value;
 
 	INIT_LIST_HEAD(&info->list);
-	list_add_tail(&info->list, &sec->attribs);
 
 	ret = sysfs_create_bin_file(sec->kobj, &info->bin_attr);
 	if (ret)
 		goto free_info_key;
 
+	list_add_tail(&info->list, &sec->attribs);
 	return 0;
 
 free_info_key:
@@ -158,8 +158,8 @@ static void vpd_section_attrib_destroy(struct vpd_section *sec)
 	struct vpd_attrib_info *temp;
 
 	list_for_each_entry_safe(info, temp, &sec->attribs, list) {
-		kfree(info->key);
 		sysfs_remove_bin_file(sec->kobj, &info->bin_attr);
+		kfree(info->key);
 		kfree(info);
 	}
 }
@@ -244,7 +244,7 @@ static int vpd_section_destroy(struct vpd_section *sec)
 {
 	if (sec->enabled) {
 		vpd_section_attrib_destroy(sec);
-		kobject_del(sec->kobj);
+		kobject_put(sec->kobj);
 		sysfs_remove_bin_file(vpd_kobj, &sec->bin_attr);
 		kfree(sec->raw_name);
 		iounmap(sec->baseaddr);
@@ -331,7 +331,7 @@ static void __exit vpd_platform_exit(void)
 {
 	vpd_section_destroy(&ro_vpd);
 	vpd_section_destroy(&rw_vpd);
-	kobject_del(vpd_kobj);
+	kobject_put(vpd_kobj);
 }
 
 module_init(vpd_platform_init);

diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index ccea609..4ca436e 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c

@@ -646,6 +646,9 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset,
 	int rc;
 	int i;
 
+	if (!gpio->clk)
+		return -EINVAL;
+
 	rc = usecs_to_cycles(gpio, usecs, &requested_cycles);
 	if (rc < 0) {
 		dev_warn(chip->parent, "Failed to convert %luus to cycles at %luHz: %d\n",

diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 2197368..e60156e 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c

@@ -90,8 +90,18 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
 {
 	int reg;
 
-	if (gpio == 94)
-		return GPIOPANELCTL;
+	if (gpio >= CRYSTALCOVE_GPIO_NUM) {
+		/*
+		 * Virtual GPIO called from ACPI, for now we only support
+		 * the panel ctl.
+		 */
+		switch (gpio) {
+		case 0x5e:
+			return GPIOPANELCTL;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
 
 	if (reg_type == CTRL_IN) {
 		if (gpio < 8)
@@ -130,36 +140,36 @@ static void crystalcove_update_irq_ctrl(struct crystalcove_gpio *cg, int gpio)
 static int crystalcove_gpio_dir_in(struct gpio_chip *chip, unsigned gpio)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	return regmap_write(cg->regmap, to_reg(gpio, CTRL_OUT),
-			    CTLO_INPUT_SET);
+	return regmap_write(cg->regmap, reg, CTLO_INPUT_SET);
 }
 
 static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned gpio,
 				    int value)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	return regmap_write(cg->regmap, to_reg(gpio, CTRL_OUT),
-			    CTLO_OUTPUT_SET | value);
+	return regmap_write(cg->regmap, reg, CTLO_OUTPUT_SET | value);
 }
 
 static int crystalcove_gpio_get(struct gpio_chip *chip, unsigned gpio)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
-	int ret;
 	unsigned int val;
+	int ret, reg = to_reg(gpio, CTRL_IN);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return 0;
 
-	ret = regmap_read(cg->regmap, to_reg(gpio, CTRL_IN), &val);
+	ret = regmap_read(cg->regmap, reg, &val);
 	if (ret)
 		return ret;
 
@@ -170,14 +180,15 @@ static void crystalcove_gpio_set(struct gpio_chip *chip,
 				 unsigned gpio, int value)
 {
 	struct crystalcove_gpio *cg = gpiochip_get_data(chip);
+	int reg = to_reg(gpio, CTRL_OUT);
 
-	if (gpio > CRYSTALCOVE_VGPIO_NUM)
+	if (reg < 0)
 		return;
 
 	if (value)
-		regmap_update_bits(cg->regmap, to_reg(gpio, CTRL_OUT), 1, 1);
+		regmap_update_bits(cg->regmap, reg, 1, 1);
 	else
-		regmap_update_bits(cg->regmap, to_reg(gpio, CTRL_OUT), 1, 0);
+		regmap_update_bits(cg->regmap, reg, 1, 0);
 }
 
 static int crystalcove_irq_type(struct irq_data *data, unsigned type)
@@ -185,6 +196,9 @@ static int crystalcove_irq_type(struct irq_data *data, unsigned type)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
+	if (data->hwirq >= CRYSTALCOVE_GPIO_NUM)
+		return 0;
+
 	switch (type) {
 	case IRQ_TYPE_NONE:
 		cg->intcnt_value = CTLI_INTCNT_DIS;
@@ -235,8 +249,10 @@ static void crystalcove_irq_unmask(struct irq_data *data)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
-	cg->set_irq_mask = false;
-	cg->update |= UPDATE_IRQ_MASK;
+	if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
+		cg->set_irq_mask = false;
+		cg->update |= UPDATE_IRQ_MASK;
+	}
 }
 
 static void crystalcove_irq_mask(struct irq_data *data)
@@ -244,8 +260,10 @@ static void crystalcove_irq_mask(struct irq_data *data)
 	struct crystalcove_gpio *cg =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
 
-	cg->set_irq_mask = true;
-	cg->update |= UPDATE_IRQ_MASK;
+	if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
+		cg->set_irq_mask = true;
+		cg->update |= UPDATE_IRQ_MASK;
+	}
 }
 
 static struct irq_chip crystalcove_irqchip = {

diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 19a92ef..c83ea68 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c

@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	u32 set;
 
 	if (!of_device_is_compatible(mvchip->chip.of_node,
-				     "marvell,armada-370-xp-gpio"))
+				     "marvell,armada-370-gpio"))
 		return 0;
 
 	if (IS_ERR(mvchip->clk))
@@ -747,7 +747,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 		set = U32_MAX;
 	else
 		return -EINVAL;
-	writel_relaxed(0, mvebu_gpioreg_blink_counter_select(mvchip));
+	writel_relaxed(set, mvebu_gpioreg_blink_counter_select(mvchip));
 
 	mvpwm = devm_kzalloc(dev, sizeof(struct mvebu_pwm), GFP_KERNEL);
 	if (!mvpwm)
@@ -768,6 +768,13 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 	mvpwm->chip.dev = dev;
 	mvpwm->chip.ops = &mvebu_pwm_ops;
 	mvpwm->chip.npwm = mvchip->chip.ngpio;
+	/*
+	 * There may already be some PWM allocated, so we can't force
+	 * mvpwm->chip.base to a fixed point like mvchip->chip.base.
+	 * So, we let pwmchip_add() do the numbering and take the next free
+	 * region.
+	 */
+	mvpwm->chip.base = -1;
 
 	spin_lock_init(&mvpwm->lock);
 
@@ -845,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
 	},
 	{
-		.compatible = "marvell,armada-370-xp-gpio",
+		.compatible = "marvell,armada-370-gpio",
 		.data	    = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
 	},
 	{
@@ -1121,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
 						 mvchip);
 	}
 
-	/* Armada 370/XP has simple PWM support for GPIO lines */
+	/* Some MVEBU SoCs have simple PWM support for GPIO lines */
 	if (IS_ENABLED(CONFIG_PWM))
 		return mvebu_pwm_probe(pdev, mvchip, id);
 

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 2185232..8fa5fcd 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c

@@ -201,7 +201,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 			handler = acpi_gpio_irq_handler_evt;
 	}
 	if (!handler)
-		return AE_BAD_PARAMETER;
+		return AE_OK;
 
 	pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
 	if (pin < 0)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5db4413..a42a1ee 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c

@@ -708,7 +708,8 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p)
 
 	ge.timestamp = ktime_get_real_ns();
 
-	if (le->eflags & GPIOEVENT_REQUEST_BOTH_EDGES) {
+	if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE
+	    && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
 		int level = gpiod_get_value_cansleep(le->desc);
 
 		if (level)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1cf78f4..1e8e112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 			DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
 				 adev->clock.default_dispclk / 100);
 			adev->clock.default_dispclk = 60000;
+		} else if (adev->clock.default_dispclk <= 60000) {
+			DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+				 adev->clock.default_dispclk / 100);
+			adev->clock.default_dispclk = 62500;
 		}
 		adev->clock.dp_extclk =
 			le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2d705e..ab6b0d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 236d995..c0d8c6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c

@@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
 
 void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev)
 {
-	struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev;
+	struct amdgpu_fbdev *afbdev;
 	struct drm_fb_helper *fb_helper;
 	int ret;
 
+	if (!adev)
+		return;
+
+	afbdev = adev->mode_info.rfbdev;
+
 	if (!afbdev)
 		return;
 

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 07ff3b1..8ecf82c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -634,7 +634,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		mutex_unlock(&id_mgr->lock);
 	}
 
-	if (gds_switch_needed) {
+	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
 		id->gds_base = job->gds_base;
 		id->gds_size = job->gds_size;
 		id->gws_base = job->gws_base;
@@ -672,6 +672,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
 
+	atomic64_set(&id->owner, 0);
 	id->gds_base = 0;
 	id->gds_size = 0;
 	id->gws_base = 0;
@@ -681,6 +682,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 }
 
 /**
+ * amdgpu_vm_reset_all_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vm_id_manager *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		for (j = 1; j < id_mgr->num_ids; ++j)
+			amdgpu_vm_reset_id(adev, i, j);
+	}
+}
+
+/**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
  * @vm: requested vm
@@ -2270,7 +2291,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;
 
-
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 	spin_lock_init(&adev->vm_manager.prt_lock);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d97e28b..e1d951e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

@@ -204,6 +204,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
 			unsigned vmid);
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a4831fe..a2c59a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

@@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
 }
 
 const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
-	amdgpu_vram_mgr_init,
-	amdgpu_vram_mgr_fini,
-	amdgpu_vram_mgr_new,
-	amdgpu_vram_mgr_del,
-	amdgpu_vram_mgr_debug
+	.init		= amdgpu_vram_mgr_init,
+	.takedown	= amdgpu_vram_mgr_fini,
+	.get_node	= amdgpu_vram_mgr_new,
+	.put_node	= amdgpu_vram_mgr_del,
+	.debug		= amdgpu_vram_mgr_debug
 };

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8c9bc75..8a0818b 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c

@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
 {
 	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
 	memset(&args, 0, sizeof(args));
 

diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 6dc1410..ec93714 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c

@@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev)
 	u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
 	u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+	 * blanking period would allow it
+	 */
+	if (amdgpu_dpm_get_vrefresh(adev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 0cdeb6a..5dffa27 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c

@@ -1207,8 +1207,11 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 773654a..47bbc87f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c

@@ -1176,8 +1176,11 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 1f35529..d8c9a95 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c

@@ -983,8 +983,11 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	fixed20_12 a, b, c;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 3c558c1..db30c6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c

@@ -1091,8 +1091,11 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index a572979..d860939 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c

@@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v6_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v6_0_hw_fini(adev);
 
 	return 0;
@@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v6_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v6_0_is_idle(void *handle)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index a9083a1..2750e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

@@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v7_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v7_0_hw_fini(adev);
 
 	return 0;
@@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v7_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v7_0_is_idle(void *handle)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 4ac9978..f56b408 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

@@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v8_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v8_0_hw_fini(adev);
 
 	return 0;
@@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v8_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v8_0_is_idle(void *handle)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc1e1c1..f936332 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

@@ -791,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->vm_manager.enabled) {
-		gmc_v9_0_vm_fini(adev);
-		adev->vm_manager.enabled = false;
-	}
 	gmc_v9_0_hw_fini(adev);
 
 	return 0;
@@ -809,17 +805,9 @@ static int gmc_v9_0_resume(void *handle)
 	if (r)
 		return r;
 
-	if (!adev->vm_manager.enabled) {
-		r = gmc_v9_0_vm_init(adev);
-		if (r) {
-			dev_err(adev->dev,
-				"vm manager initialization failed (%d).\n", r);
-			return r;
-		}
-		adev->vm_manager.enabled = true;
-	}
+	amdgpu_vm_reset_all_ids(adev);
 
-	return r;
+	return 0;
 }
 
 static bool gmc_v9_0_is_idle(void *handle)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index fb08193..90332f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c

@@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle,
 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
+	u32 v;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 
 	if (ring == &adev->vce.ring[0])
-		return RREG32(mmVCE_RB_RPTR);
+		v = RREG32(mmVCE_RB_RPTR);
 	else if (ring == &adev->vce.ring[1])
-		return RREG32(mmVCE_RB_RPTR2);
+		v = RREG32(mmVCE_RB_RPTR2);
 	else
-		return RREG32(mmVCE_RB_RPTR3);
+		v = RREG32(mmVCE_RB_RPTR3);
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return v;
 }
 
 /**
@@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
+	u32 v;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
 
 	if (ring == &adev->vce.ring[0])
-		return RREG32(mmVCE_RB_WPTR);
+		v = RREG32(mmVCE_RB_WPTR);
 	else if (ring == &adev->vce.ring[1])
-		return RREG32(mmVCE_RB_WPTR2);
+		v = RREG32(mmVCE_RB_WPTR2);
 	else
-		return RREG32(mmVCE_RB_WPTR3);
+		v = RREG32(mmVCE_RB_WPTR3);
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return v;
 }
 
 /**
@@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
+	mutex_lock(&adev->grbm_idx_mutex);
+	if (adev->vce.harvest_config == 0 ||
+		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
+	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
+		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
+
 	if (ring == &adev->vce.ring[0])
 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
 	else if (ring == &adev->vce.ring[1])
 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
 	else
 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+
+	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
+	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
@@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring;
 	int idx, r;
 
-	ring = &adev->vce.ring[0];
-	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
-
-	ring = &adev->vce.ring[1];
-	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
-
-	ring = &adev->vce.ring[2];
-	WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
-	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
-
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (idx = 0; idx < 2; ++idx) {
 		if (adev->vce.harvest_config & (1 << idx))
 			continue;
 
 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
+
+		/* Program instance 0 reg space for two instances or instance 0 case
+		program instance 1 reg space for only instance 1 available case */
+		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
+			ring = &adev->vce.ring[0];
+			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+			ring = &adev->vce.ring[1];
+			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+
+			ring = &adev->vce.ring[2];
+			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
+			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
+			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
+			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
+		}
+
 		vce_v3_0_mc_resume(adev, idx);
 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
 

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index a74a3db..102eb6d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c

@@ -2655,6 +2655,28 @@ static int smu7_get_power_state_size(struct pp_hwmgr *hwmgr)
 	return sizeof(struct smu7_power_state);
 }
 
+static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
+				 uint32_t vblank_time_us)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t switch_limit_us;
+
+	switch (hwmgr->chip_id) {
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+		switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+		break;
+	default:
+		switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
+		break;
+	}
+
+	if (vblank_time_us < switch_limit_us)
+		return true;
+	else
+		return false;
+}
 
 static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				struct pp_power_state *request_ps,
@@ -2669,6 +2691,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	bool disable_mclk_switching;
 	bool disable_mclk_switching_for_frame_lock;
 	struct cgs_display_info info = {0};
+	struct cgs_mode_info mode_info = {0};
 	const struct phm_clock_and_voltage_limits *max_limits;
 	uint32_t i;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -2677,6 +2700,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 	int32_t count;
 	int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
 
+	info.mode_info = &mode_info;
 	data->battery_state = (PP_StateUILabel_Battery ==
 			request_ps->classification.ui_label);
 
@@ -2703,8 +2727,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cgs_get_active_displays_info(hwmgr->device, &info);
 
-	/*TO DO result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
-
 	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
 	minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
 
@@ -2769,8 +2791,10 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-	disable_mclk_switching = (1 < info.display_count) ||
-				    disable_mclk_switching_for_frame_lock;
+	disable_mclk_switching = ((1 < info.display_count) ||
+				  disable_mclk_switching_for_frame_lock ||
+				  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+				  (mode_info.refresh_rate > 120));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ad30f5d..2614af2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c

@@ -4186,7 +4186,7 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
 	struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend);
-	uint32_t i;
+	int i;
 
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
 		return -EINVAL;

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index d5f53d0..83e40fe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c

@@ -709,17 +709,17 @@ static int tf_vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr,
 
 static struct phm_master_table_item
 vega10_thermal_start_thermal_controller_master_list[] = {
-	{NULL, tf_vega10_thermal_initialize},
-	{NULL, tf_vega10_thermal_set_temperature_range},
-	{NULL, tf_vega10_thermal_enable_alert},
+	{ .tableFunction = tf_vega10_thermal_initialize },
+	{ .tableFunction = tf_vega10_thermal_set_temperature_range },
+	{ .tableFunction = tf_vega10_thermal_enable_alert },
 /* We should restrict performance levels to low before we halt the SMC.
  * On the other hand we are still in boot state when we do this
  * so it would be pointless.
  * If this assumption changes we have to revisit this table.
  */
-	{NULL, tf_vega10_thermal_setup_fan_table},
-	{NULL, tf_vega10_thermal_start_smc_fan_control},
-	{NULL, NULL}
+	{ .tableFunction = tf_vega10_thermal_setup_fan_table },
+	{ .tableFunction = tf_vega10_thermal_start_smc_fan_control },
+	{ }
 };
 
 static struct phm_master_table_header
@@ -731,10 +731,10 @@ vega10_thermal_start_thermal_controller_master = {
 
 static struct phm_master_table_item
 vega10_thermal_set_temperature_range_master_list[] = {
-	{NULL, tf_vega10_thermal_disable_alert},
-	{NULL, tf_vega10_thermal_set_temperature_range},
-	{NULL, tf_vega10_thermal_enable_alert},
-	{NULL, NULL}
+	{ .tableFunction = tf_vega10_thermal_disable_alert },
+	{ .tableFunction = tf_vega10_thermal_set_temperature_range },
+	{ .tableFunction = tf_vega10_thermal_enable_alert },
+	{ }
 };
 
 struct phm_master_table_header

diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig
index 40d2827..53e78d0 100644
--- a/drivers/gpu/drm/bridge/synopsys/Kconfig
+++ b/drivers/gpu/drm/bridge/synopsys/Kconfig

@@ -1,6 +1,7 @@
 config DRM_DW_HDMI
 	tristate
 	select DRM_KMS_HELPER
+	select REGMAP_MMIO
 
 config DRM_DW_HDMI_AHB_AUDIO
 	tristate "Synopsys Designware AHB Audio interface"

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 8be9719..aa885a6 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c

@@ -508,6 +508,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		bool has_connectors =
 			!!new_crtc_state->connector_mask;
 
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
+
 		if (!drm_mode_equal(&old_crtc_state->mode, &new_crtc_state->mode)) {
 			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n",
 					 crtc->base.id, crtc->name);
@@ -551,6 +553,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
 		const struct drm_connector_helper_funcs *funcs = connector->helper_private;
 
+		WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+
 		/*
 		 * This only sets crtc->connectors_changed for routing changes,
 		 * drivers must set crtc->connectors_changed themselves when
@@ -650,6 +654,8 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs;
 
+		WARN_ON(!drm_modeset_is_locked(&plane->mutex));
+
 		funcs = plane->helper_private;
 
 		drm_atomic_helper_plane_changed(state, old_plane_state, new_plane_state, plane);
@@ -2663,7 +2669,12 @@ int drm_atomic_helper_resume(struct drm_device *dev,
 
 	drm_modeset_acquire_init(&ctx, 0);
 	while (1) {
+		err = drm_modeset_lock_all_ctx(dev, &ctx);
+		if (err)
+			goto out;
+
 		err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
+out:
 		if (err != -EDEADLK)
 			break;
 

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 9f84761..48ca245 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c

@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	if (!connector)
 		return -ENOENT;
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-	encoder = drm_connector_get_encoder(connector);
-	if (encoder)
-		out_resp->encoder_id = encoder->base.id;
-	else
-		out_resp->encoder_id = 0;
-
-	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
-			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
-			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
-			&out_resp->count_props);
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
-	if (ret)
-		goto out_unref;
-
 	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
 		if (connector->encoder_ids[i] != 0)
 			encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 				if (put_user(connector->encoder_ids[i],
 					     encoder_ptr + copied)) {
 					ret = -EFAULT;
-					goto out_unref;
+					goto out;
 				}
 				copied++;
 			}
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 			if (copy_to_user(mode_ptr + copied,
 					 &u_mode, sizeof(u_mode))) {
 				ret = -EFAULT;
+				mutex_unlock(&dev->mode_config.mutex);
+
 				goto out;
 			}
 			copied++;
 		}
 	}
 	out_resp->count_modes = mode_count;
-out:
 	mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+	encoder = drm_connector_get_encoder(connector);
+	if (encoder)
+		out_resp->encoder_id = encoder->base.id;
+	else
+		out_resp->encoder_id = 0;
+
+	/* Only grab properties after probing, to make sure EDID and other
+	 * properties reflect the latest status. */
+	ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+			(uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+			(uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+			&out_resp->count_props);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
 	drm_connector_put(connector);
 
 	return ret;

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 3e5f521..213fb83 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c

@@ -1208,3 +1208,86 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux)
 	return 0;
 }
 EXPORT_SYMBOL(drm_dp_stop_crc);
+
+struct dpcd_quirk {
+	u8 oui[3];
+	bool is_branch;
+	u32 quirks;
+};
+
+#define OUI(first, second, third) { (first), (second), (third) }
+
+static const struct dpcd_quirk dpcd_quirk_list[] = {
+	/* Analogix 7737 needs reduced M and N at HBR2 link rates */
+	{ OUI(0x00, 0x22, 0xb9), true, BIT(DP_DPCD_QUIRK_LIMITED_M_N) },
+};
+
+#undef OUI
+
+/*
+ * Get a bit mask of DPCD quirks for the sink/branch device identified by
+ * ident. The quirk data is shared but it's up to the drivers to act on the
+ * data.
+ *
+ * For now, only the OUI (first three bytes) is used, but this may be extended
+ * to device identification string and hardware/firmware revisions later.
+ */
+static u32
+drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch)
+{
+	const struct dpcd_quirk *quirk;
+	u32 quirks = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpcd_quirk_list); i++) {
+		quirk = &dpcd_quirk_list[i];
+
+		if (quirk->is_branch != is_branch)
+			continue;
+
+		if (memcmp(quirk->oui, ident->oui, sizeof(ident->oui)) != 0)
+			continue;
+
+		quirks |= quirk->quirks;
+	}
+
+	return quirks;
+}
+
+/**
+ * drm_dp_read_desc - read sink/branch descriptor from DPCD
+ * @aux: DisplayPort AUX channel
+ * @desc: Device decriptor to fill from DPCD
+ * @is_branch: true for branch devices, false for sink devices
+ *
+ * Read DPCD 0x400 (sink) or 0x500 (branch) into @desc. Also debug log the
+ * identification.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
+		     bool is_branch)
+{
+	struct drm_dp_dpcd_ident *ident = &desc->ident;
+	unsigned int offset = is_branch ? DP_BRANCH_OUI : DP_SINK_OUI;
+	int ret, dev_id_len;
+
+	ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident));
+	if (ret < 0)
+		return ret;
+
+	desc->quirks = drm_dp_get_quirks(ident, is_branch);
+
+	dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id));
+
+	DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n",
+		      is_branch ? "branch" : "sink",
+		      (int)sizeof(ident->oui), ident->oui,
+		      dev_id_len, ident->device_id,
+		      ident->hw_rev >> 4, ident->hw_rev & 0xf,
+		      ident->sw_major_rev, ident->sw_minor_rev,
+		      desc->quirks);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_read_desc);

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index b5c6bb4..37b8ad3 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c

@@ -358,7 +358,12 @@ EXPORT_SYMBOL(drm_put_dev);
 void drm_unplug_dev(struct drm_device *dev)
 {
 	/* for a USB device */
-	drm_dev_unregister(dev);
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_modeset_unregister_all(dev);
+
+	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
+	drm_minor_unregister(dev, DRM_MINOR_RENDER);
+	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 
 	mutex_lock(&drm_global_mutex);
 

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index fedd4d6..5dc8c43 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c

@@ -948,8 +948,6 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	}
 
 out:
-	if (ret && crtc->funcs->page_flip_target)
-		drm_crtc_vblank_put(crtc);
 	if (fb)
 		drm_framebuffer_put(fb);
 	if (crtc->primary->old_fb)
@@ -964,5 +962,8 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
+	if (ret && crtc->funcs->page_flip_target)
+		drm_crtc_vblank_put(crtc);
+
 	return ret;
 }

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index c4a091e..e437fba 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h

@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
 	struct etnaviv_gpu *gpu;
 	struct ww_acquire_ctx ticket;
 	struct dma_fence *fence;
+	u32 flags;
 	unsigned int nr_bos;
 	struct etnaviv_gem_submit_bo bos[0];
-	u32 flags;
+	/* No new members here, the previous one is variable-length! */
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index de80ee1..1013765 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c

@@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-		bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+		bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
 		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
 						 explicit);

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 09d3c4c..50294a7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c

@@ -82,14 +82,9 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 	return ret;
 }
 
-static void exynos_drm_preclose(struct drm_device *dev,
-					struct drm_file *file)
-{
-	exynos_drm_subdrv_close(dev, file);
-}
-
 static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 {
+	exynos_drm_subdrv_close(dev, file);
 	kfree(file->driver_priv);
 	file->driver_priv = NULL;
 }
@@ -145,7 +140,6 @@ static struct drm_driver exynos_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME
 				  | DRIVER_ATOMIC | DRIVER_RENDER,
 	.open			= exynos_drm_open,
-	.preclose		= exynos_drm_preclose,
 	.lastclose		= exynos_drm_lastclose,
 	.postclose		= exynos_drm_postclose,
 	.gem_free_object_unlocked = exynos_drm_gem_free_object,

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cb31769..39c7405 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h

@@ -160,12 +160,9 @@ struct exynos_drm_clk {
  *	drm framework doesn't support multiple irq yet.
  *	we can refer to the crtc to current hardware interrupt occurred through
  *	this pipe value.
- * @enabled: if the crtc is enabled or not
- * @event: vblank event that is currently queued for flip
- * @wait_update: wait all pending planes updates to finish
- * @pending_update: number of pending plane updates in this crtc
  * @ops: pointer to callbacks for exynos drm specific functionality
  * @ctx: A pointer to the crtc's implementation specific context
+ * @pipe_clk: A pointer to the crtc's pipeline clock.
  */
 struct exynos_drm_crtc {
 	struct drm_crtc			base;

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index fc4fda7..d404de8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c

@@ -1633,7 +1633,6 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 {
 	struct device *dev = dsi->dev;
 	struct device_node *node = dev->of_node;
-	struct device_node *ep;
 	int ret;
 
 	ret = exynos_dsi_of_read_u32(node, "samsung,pll-clock-frequency",
@@ -1641,32 +1640,21 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 	if (ret < 0)
 		return ret;
 
-	ep = of_graph_get_endpoint_by_regs(node, DSI_PORT_OUT, 0);
-	if (!ep) {
-		dev_err(dev, "no output port with endpoint specified\n");
-		return -EINVAL;
-	}
-
-	ret = exynos_dsi_of_read_u32(ep, "samsung,burst-clock-frequency",
+	ret = exynos_dsi_of_read_u32(node, "samsung,burst-clock-frequency",
 				     &dsi->burst_clk_rate);
 	if (ret < 0)
-		goto end;
+		return ret;
 
-	ret = exynos_dsi_of_read_u32(ep, "samsung,esc-clock-frequency",
+	ret = exynos_dsi_of_read_u32(node, "samsung,esc-clock-frequency",
 				     &dsi->esc_clk_rate);
 	if (ret < 0)
-		goto end;
-
-	of_node_put(ep);
+		return ret;
 
 	dsi->bridge_node = of_graph_get_remote_node(node, DSI_PORT_OUT, 0);
 	if (!dsi->bridge_node)
 		return -EINVAL;
 
-end:
-	of_node_put(ep);
-
-	return ret;
+	return 0;
 }
 
 static int exynos_dsi_bind(struct device *dev, struct device *master,
@@ -1817,6 +1805,10 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 
 static int exynos_dsi_remove(struct platform_device *pdev)
 {
+	struct exynos_dsi *dsi = platform_get_drvdata(pdev);
+
+	of_node_put(dsi->bridge_node);
+
 	pm_runtime_disable(&pdev->dev);
 
 	component_del(&pdev->dev, &exynos_dsi_component_ops);

diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 0066fe7..be3eefe 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c

@@ -759,20 +759,23 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
 			mode_dev->panel_fixed_mode =
 			    drm_mode_duplicate(dev, scan);
+			DRM_DEBUG_KMS("Using mode from DDC\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}
 
 	/* Failed to get EDID, what about VBT? do we need this? */
-	if (mode_dev->vbt_mode)
+	if (dev_priv->lfp_lvds_vbt_mode) {
 		mode_dev->panel_fixed_mode =
-		    drm_mode_duplicate(dev, mode_dev->vbt_mode);
+			drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
 
-	if (!mode_dev->panel_fixed_mode)
-		if (dev_priv->lfp_lvds_vbt_mode)
-			mode_dev->panel_fixed_mode =
-				drm_mode_duplicate(dev,
-					dev_priv->lfp_lvds_vbt_mode);
+		if (mode_dev->panel_fixed_mode) {
+			mode_dev->panel_fixed_mode->type |=
+				DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using mode from VBT\n");
+			goto out;
+		}
+	}
 
 	/*
 	 * If we didn't get EDID, try checking if the panel is already turned
@@ -789,6 +792,7 @@ void psb_intel_lvds_init(struct drm_device *dev,
 		if (mode_dev->panel_fixed_mode) {
 			mode_dev->panel_fixed_mode->type |=
 			    DRM_MODE_TYPE_PREFERRED;
+			DRM_DEBUG_KMS("Using pre-programmed mode\n");
 			goto out;	/* FIXME: check for quirks */
 		}
 	}

diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index 5abc69c..f77dcfa 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c

@@ -760,7 +760,7 @@ static int dsi_parse_dt(struct platform_device *pdev, struct dw_dsi *dsi)
 	 * Get the endpoint node. In our case, dsi has one output port1
 	 * to which the external HDMI bridge is connected.
 	 */
-	ret = drm_of_find_panel_or_bridge(np, 0, 0, NULL, &dsi->bridge);
+	ret = drm_of_find_panel_or_bridge(np, 1, 0, NULL, &dsi->bridge);
 	if (ret)
 		return ret;
 

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index dca989e..24fe04d 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c

@@ -779,8 +779,26 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
+static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine;
+	struct intel_vgpu_workload *pos, *n;
+	unsigned int tmp;
+
+	/* free the unsubmited workloads in the queues. */
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+		list_for_each_entry_safe(pos, n,
+			&vgpu->workload_q_head[engine->id], list) {
+			list_del_init(&pos->list);
+			free_workload(pos);
+		}
+	}
+}
+
 void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
 {
+	clean_workloads(vgpu, ALL_ENGINES);
 	kmem_cache_destroy(vgpu->workloads);
 }
 
@@ -811,17 +829,9 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine;
-	struct intel_vgpu_workload *pos, *n;
 	unsigned int tmp;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-		/* free the unsubmited workload in the queue */
-		list_for_each_entry_safe(pos, n,
-			&vgpu->workload_q_head[engine->id], list) {
-			list_del_init(&pos->list);
-			free_workload(pos);
-		}
-
+	clean_workloads(vgpu, engine_mask);
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
 		init_vgpu_execlist(vgpu, engine->id);
-	}
 }

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index c995e54..0ffd696 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c

@@ -1366,18 +1366,28 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	i915_reg_t reg = {.reg = offset};
+	u32 v = *(u32 *)p_data;
+
+	if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
+		return intel_vgpu_default_mmio_write(vgpu,
+				offset, p_data, bytes);
 
 	switch (offset) {
 	case 0x4ddc:
-		vgpu_vreg(vgpu, offset) = 0x8000003c;
-		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */
-		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 31);
 		break;
 	case 0x42080:
-		vgpu_vreg(vgpu, offset) = 0x8000;
-		/* WaCompressedResourceDisplayNewHashMode:skl */
-		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		/* bypass WaCompressedResourceDisplayNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 15);
+		break;
+	case 0xe194:
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 8);
+		break;
+	case 0x7014:
+		/* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */
+		vgpu_vreg(vgpu, offset) = v & ~(1 << 13);
 		break;
 	default:
 		return -EINVAL;
@@ -1634,7 +1644,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
-	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		 skl_misc_ctl_write);
 	MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2568,7 +2579,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x6e570, D_BDW_PLUS);
 	MMIO_D(0x65f10, D_BDW_PLUS);
 
-	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		 skl_misc_ctl_write);
 	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e51..4bd1467 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
 	struct file_stats *stats = data;
 	struct i915_vma *vma;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	stats->count++;
 	stats->total += obj->base.size;
 	if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		struct drm_i915_gem_request *request;
 		struct task_struct *task;
 
+		mutex_lock(&dev->struct_mutex);
+
 		memset(&stats, 0, sizeof(stats));
 		stats.file_priv = file->driver_priv;
 		spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		 * still alive (e.g. get_pid(current) => fork() => exit()).
 		 * Therefore, we need to protect this ->comm access using RCU.
 		 */
-		mutex_lock(&dev->struct_mutex);
 		request = list_first_entry_or_null(&file_priv->mm.request_list,
 						   struct drm_i915_gem_request,
 						   client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 				PIDTYPE_PID);
 		print_file_stats(m, task ? task->comm : "<unknown>", stats);
 		rcu_read_unlock();
+
 		mutex_unlock(&dev->struct_mutex);
 	}
 	mutex_unlock(&dev->filelist_mutex);

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3036d48..4842867 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c

@@ -1235,6 +1235,15 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_fini;
 
 	pci_set_drvdata(pdev, &dev_priv->drm);
+	/*
+	 * Disable the system suspend direct complete optimization, which can
+	 * leave the device suspended skipping the driver's suspend handlers
+	 * if the device was already runtime suspended. This is needed due to
+	 * the difference in our runtime and system suspend sequence and
+	 * becaue the HDA driver may require us to enable the audio power
+	 * domain during system suspend.
+	 */
+	pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME;
 
 	ret = i915_driver_init_early(dev_priv, ent);
 	if (ret < 0)
@@ -1272,10 +1281,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev_priv->ipc_enabled = false;
 
-	/* Everything is in place, we can now relax! */
-	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-		 driver.name, driver.major, driver.minor, driver.patchlevel,
-		 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
 		DRM_INFO("DRM_I915_DEBUG enabled\n");
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9b0949..2c453a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -562,7 +562,8 @@ struct intel_link_m_n {
 
 void intel_link_compute_m_n(int bpp, int nlanes,
 			    int pixel_clock, int link_clock,
-			    struct intel_link_m_n *m_n);
+			    struct intel_link_m_n *m_n,
+			    bool reduce_m_n);
 
 /* Interface history:
  *
@@ -2990,6 +2991,16 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
 	return false;
 }
 
+static inline bool
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped)
+		return true;
+#endif
+	return false;
+}
+
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 				int enable_ppgtt);
 

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b6ac3df..615f0a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
 	unsigned int max_segment;
+	gfp_t noreclaim;
 	int ret;
-	gfp_t gfp;
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 * Fail silently without starting the shrinker
 	 */
 	mapping = obj->base.filp->f_mapping;
-	gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-	gfp |= __GFP_NORETRY | __GFP_NOWARN;
+	noreclaim = mapping_gfp_constraint(mapping,
+					   ~(__GFP_IO | __GFP_RECLAIM));
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
 	sg = st->sgl;
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
-		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		if (unlikely(IS_ERR(page))) {
-			i915_gem_shrink(dev_priv,
-					page_count,
-					I915_SHRINK_BOUND |
-					I915_SHRINK_UNBOUND |
-					I915_SHRINK_PURGEABLE);
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		}
-		if (unlikely(IS_ERR(page))) {
-			gfp_t reclaim;
+			if (likely(!IS_ERR(page)))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+			cond_resched();
 
 			/* We've tried hard to allocate the memory by reaping
 			 * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 			 * defer the oom here by reporting the ENOMEM back
 			 * to userspace.
 			 */
-			reclaim = mapping_gfp_mask(mapping);
-			reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
 
-			page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-			if (IS_ERR(page)) {
-				ret = PTR_ERR(page);
-				goto err_sg;
+				/* Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want the future __GFP_MAYFAIL.
+				 */
 			}
-		}
+		} while (1);
+
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
@@ -3298,6 +3318,10 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret;
 
+	/* If the device is asleep, we have no requests outstanding */
+	if (!READ_ONCE(i915->gt.awake))
+		return 0;
+
 	if (flags & I915_WAIT_LOCKED) {
 		struct i915_gem_timeline *tl;
 
@@ -4218,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 
 	mapping = obj->base.filp->f_mapping;
 	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
 	i915_gem_object_init(obj, &i915_gem_object_ops);
 

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8..9ad13ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -546,11 +546,12 @@ relocate_entry(struct drm_i915_gem_object *obj,
 }
 
 static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
 				   struct eb_vmas *eb,
 				   struct drm_i915_gem_relocation_entry *reloc,
 				   struct reloc_cache *cache)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct drm_gem_object *target_obj;
 	struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 	}
 
+	/*
+	 * If we write into the object, we need to force the synchronisation
+	 * barrier, either with an asynchronous clflush or if we executed the
+	 * patching using the GPU (though that should be serialised by the
+	 * timeline). To be completely sure, and since we are required to
+	 * do relocations we are already stalling, disable the user's opt
+	 * of our synchronisation.
+	 */
+	vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
 	ret = relocate_entry(obj, reloc, cache, target_offset);
 	if (ret)
 		return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+			ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
 			if (ret)
 				goto out;
 
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 
 	reloc_cache_init(&cache, eb->i915);
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+		ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
 		if (ret)
 			break;
 	}

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a0563e1..f1989b8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -2191,6 +2191,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = vm->i915;
+
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct sg_table *st;
+	u64 start;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct sg_table *st,
+					     u64 start,
+					     enum i915_cache_level level,
+					     u32 unused)
+{
+	struct insert_entries arg = { vm, st, start, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -2313,7 +2408,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		    appgtt->base.allocate_va_range) {
 			ret = appgtt->base.allocate_va_range(&appgtt->base,
 							     vma->node.start,
-							     vma->node.size);
+							     vma->size);
 			if (ret)
 				goto err_pages;
 		}
@@ -2785,6 +2880,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 
 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
 
+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
+		ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->base.clear_range != nop_clear_range)
+			ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
 	ggtt->invalidate = gen6_ggtt_invalidate;
 
 	return ggtt_probe_common(ggtt, size);
@@ -2997,7 +3100,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915)
 
 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
 {
-	i915->ggtt.invalidate = gen6_ggtt_invalidate;
+	if (i915->ggtt.invalidate == guc_ggtt_invalidate)
+		i915->ggtt.invalidate = gen6_ggtt_invalidate;
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ddbc94..a74d0ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c

@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;
 
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 129c58b..a4a920c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h

@@ -123,7 +123,7 @@ struct drm_i915_gem_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_t submitq;
+	wait_queue_entry_t submitq;
 	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 129ed30..57d9f7f 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c

@@ -59,9 +59,6 @@ static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
 		return;
 
 	mutex_unlock(&dev->struct_mutex);
-
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
 }
 
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
@@ -274,8 +271,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_ACTIVE);
 	intel_runtime_pm_put(dev_priv);
 
-	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
-
 	return freed;
 }
 

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a0d6d43..fb5231f 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c

@@ -278,7 +278,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			obj->mm.quirked = false;
 		}
 		if (!i915_gem_object_is_tiled(obj)) {
-			GEM_BUG_ON(!obj->mm.quirked);
+			GEM_BUG_ON(obj->mm.quirked);
 			__i915_gem_object_pin_pages(obj);
 			obj->mm.quirked = true;
 		}

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff..ab5140b 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c

@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fd97fe0..190f6aa 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
-	u32 val;
 
 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT |
 		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT;
+
 	if (IS_CHERRYVIEW(dev_priv))
-		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+			I915_LPE_PIPE_C_INTERRUPT;
 
 	WARN_ON(dev_priv->irq_mask != ~0);
 
-	val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT |
-		I915_LPE_PIPE_C_INTERRUPT);
-
-	enable_mask |= val;
-
 	dev_priv->irq_mask = ~enable_mask;
 
 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f87b0c4..1a78363 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c

@@ -208,7 +208,7 @@ static const struct intel_device_info intel_ironlake_d_info = {
 static const struct intel_device_info intel_ironlake_m_info = {
 	GEN5_FEATURES,
 	.platform = INTEL_IRONLAKE,
-	.is_mobile = 1,
+	.is_mobile = 1, .has_fbc = 1,
 };
 
 #define GEN6_FEATURES \
@@ -390,7 +390,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
 	.has_hw_contexts = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_guc = 1, \
-	.has_decoupled_mmio = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	.has_full_48bit_ppgtt = 1, \

diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index c0cb297..2cfe96d3 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h

@@ -36,10 +36,6 @@
 #define VGT_VERSION_MAJOR 1
 #define VGT_VERSION_MINOR 0
 
-#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
-#define INTEL_VGT_IF_VERSION \
-	INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
-
 /*
  * notifications from guest to vgpu device model
  */
@@ -55,8 +51,8 @@ enum vgt_g2v_type {
 
 struct vgt_if {
 	u64 magic;		/* VGT_MAGIC */
-	uint16_t version_major;
-	uint16_t version_minor;
+	u16 version_major;
+	u16 version_minor;
 	u32 vgt_id;		/* ID of vGT instance */
 	u32 rsv1[12];		/* pad to offset 0x40 */
 	/*

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5a7c63e..65b837e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h

@@ -8280,7 +8280,7 @@ enum {
 
 /* MIPI DSI registers */
 
-#define _MIPI_PORT(port, a, c)	((port) ? c : a)	/* ports A and C only */
+#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
 #define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
 
 #define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8e..380de43 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c

@@ -152,7 +152,7 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
 	wait_queue_head_t *x = &fence->wait;
-	wait_queue_t *pos, *next;
+	wait_queue_entry_t *pos, *next;
 	unsigned long flags;
 
 	debug_fence_deactivate(fence);
@@ -160,31 +160,30 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 
 	/*
 	 * To prevent unbounded recursion as we traverse the graph of
-	 * i915_sw_fences, we move the task_list from this, the next ready
-	 * fence, to the tail of the original fence's task_list
+	 * i915_sw_fences, we move the entry list from this, the next ready
+	 * fence, to the tail of the original fence's entry list
 	 * (and so added to the list to be woken).
 	 */
 
 	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
 	if (continuation) {
-		list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
+		list_for_each_entry_safe(pos, next, &x->head, entry) {
 			if (pos->func == autoremove_wake_function)
 				pos->func(pos, TASK_NORMAL, 0, continuation);
 			else
-				list_move_tail(&pos->task_list, continuation);
+				list_move_tail(&pos->entry, continuation);
 		}
 	} else {
 		LIST_HEAD(extra);
 
 		do {
-			list_for_each_entry_safe(pos, next,
-						 &x->task_list, task_list)
+			list_for_each_entry_safe(pos, next, &x->head, entry)
 				pos->func(pos, TASK_NORMAL, 0, &extra);
 
 			if (list_empty(&extra))
 				break;
 
-			list_splice_tail_init(&extra, &x->task_list);
+			list_splice_tail_init(&extra, &x->head);
 		} while (1);
 	}
 	spin_unlock_irqrestore(&x->lock, flags);
@@ -254,9 +253,9 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence)
 	__i915_sw_fence_commit(fence);
 }
 
-static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags, void *key)
 {
-	list_del(&wq->task_list);
+	list_del(&wq->entry);
 	__i915_sw_fence_complete(wq->private, key);
 	i915_sw_fence_put(wq->private);
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
@@ -267,7 +266,7 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
 static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 				    const struct i915_sw_fence * const signaler)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return false;
@@ -275,7 +274,7 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	if (fence == signaler)
 		return true;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -288,12 +287,12 @@ static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 
 	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
 		return;
 
-	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+	list_for_each_entry(wq, &fence->wait.head, entry) {
 		if (wq->func != i915_sw_fence_wake)
 			continue;
 
@@ -320,7 +319,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 
 static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 					  struct i915_sw_fence *signaler,
-					  wait_queue_t *wq, gfp_t gfp)
+					  wait_queue_entry_t *wq, gfp_t gfp)
 {
 	unsigned long flags;
 	int pending;
@@ -350,7 +349,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 		pending |= I915_SW_FENCE_FLAG_ALLOC;
 	}
 
-	INIT_LIST_HEAD(&wq->task_list);
+	INIT_LIST_HEAD(&wq->entry);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
 	wq->private = i915_sw_fence_get(fence);
@@ -359,7 +358,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 	spin_lock_irqsave(&signaler->wait.lock, flags);
 	if (likely(!i915_sw_fence_done(signaler))) {
-		__add_wait_queue_tail(&signaler->wait, wq);
+		__add_wait_queue_entry_tail(&signaler->wait, wq);
 		pending = 1;
 	} else {
 		i915_sw_fence_wake(wq, 0, 0, NULL);
@@ -372,7 +371,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *signaler,
-				 wait_queue_t *wq)
+				 wait_queue_entry_t *wq)
 {
 	return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0);
 }

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbb..fd3c3bf 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h

@@ -66,7 +66,7 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence);
 
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *after,
-				 wait_queue_t *wq);
+				 wait_queue_entry_t *wq);
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);

diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 4ab8a97..2e73901 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c

@@ -60,8 +60,8 @@
  */
 void i915_check_vgpu(struct drm_i915_private *dev_priv)
 {
-	uint64_t magic;
-	uint32_t version;
+	u64 magic;
+	u16 version_major;
 
 	BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
@@ -69,10 +69,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 	if (magic != VGT_MAGIC)
 		return;
 
-	version = INTEL_VGT_IF_VERSION_ENCODE(
-		__raw_i915_read16(dev_priv, vgtif_reg(version_major)),
-		__raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
-	if (version != INTEL_VGT_IF_VERSION) {
+	version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major));
+	if (version_major < VGT_VERSION_MAJOR) {
 		DRM_INFO("VGT interface version mismatch!\n");
 		return;
 	}

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1aba470..f066e2d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c

@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 				break;
 		}
 
+		if (!ret) {
+			ret = i915_gem_active_retire(&vma->last_fence,
+						     &vma->vm->i915->drm.struct_mutex);
+		}
+
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;

diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index eb638a1..42fb436 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c

@@ -15,13 +15,9 @@ static struct intel_dsm_priv {
 	acpi_handle dhandle;
 } intel_dsm_priv;
 
-static const u8 intel_dsm_guid[] = {
-	0xd3, 0x73, 0xd8, 0x7e,
-	0xd0, 0xc2,
-	0x4f, 0x4e,
-	0xa8, 0x54,
-	0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
-};
+static const guid_t intel_dsm_guid =
+	GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f,
+		  0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c);
 
 static char *intel_dsm_port_name(u8 id)
 {
@@ -80,7 +76,7 @@ static void intel_dsm_platform_mux_info(void)
 	int i;
 	union acpi_object *pkg, *connector_count;
 
-	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid,
+	pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid,
 			INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO,
 			NULL, ACPI_TYPE_PACKAGE);
 	if (!pkg) {
@@ -118,7 +114,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
 	if (!dhandle)
 		return false;
 
-	if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID,
+	if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID,
 			    1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) {
 		DRM_DEBUG_KMS("no _DSM method for intel device\n");
 		return false;

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3617927..9106ea3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c

@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
 static void skylake_pfit_enable(struct intel_crtc *crtc);
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
 static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+					 struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 
 struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	int i, ret;
 
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, ctx);
 	i915_redisable_vga(to_i915(dev));
 
 	if (!state)
@@ -4598,7 +4599,7 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe)
 
 static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
-		  unsigned scaler_user, int *scaler_id, unsigned int rotation,
+		  unsigned int scaler_user, int *scaler_id,
 		  int src_w, int src_h, int dst_w, int dst_h)
 {
 	struct intel_crtc_scaler_state *scaler_state =
@@ -4607,9 +4608,12 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		to_intel_crtc(crtc_state->base.crtc);
 	int need_scaling;
 
-	need_scaling = drm_rotation_90_or_270(rotation) ?
-		(src_h != dst_w || src_w != dst_h):
-		(src_w != dst_w || src_h != dst_h);
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
+	need_scaling = src_w != dst_w || src_h != dst_h;
 
 	/*
 	 * if plane is being disabled or scaler is no more required or force detach
@@ -4671,7 +4675,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id,
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -4700,7 +4704,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 	ret = skl_update_scaler(crtc_state, force_detach,
 				drm_plane_index(&intel_plane->base),
 				&plane_state->scaler_id,
-				plane_state->base.rotation,
 				drm_rect_width(&plane_state->base.src) >> 16,
 				drm_rect_height(&plane_state->base.src) >> 16,
 				drm_rect_width(&plane_state->base.dst),
@@ -5823,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
 		intel_update_watermarks(intel_crtc);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+					struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5853,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 		return;
 	}
 
-	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+	state->acquire_ctx = ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
 	crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -6101,7 +6105,7 @@ static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc,
 	pipe_config->fdi_lanes = lane;
 
 	intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock,
-			       link_bw, &pipe_config->fdi_m_n);
+			       link_bw, &pipe_config->fdi_m_n, false);
 
 	ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config);
 	if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) {
@@ -6277,7 +6281,8 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den)
 }
 
 static void compute_m_n(unsigned int m, unsigned int n,
-			uint32_t *ret_m, uint32_t *ret_n)
+			uint32_t *ret_m, uint32_t *ret_n,
+			bool reduce_m_n)
 {
 	/*
 	 * Reduce M/N as much as possible without loss in precision. Several DP
@@ -6285,9 +6290,11 @@ static void compute_m_n(unsigned int m, unsigned int n,
 	 * values. The passed in values are more likely to have the least
 	 * significant bits zero than M after rounding below, so do this first.
 	 */
-	while ((m & 1) == 0 && (n & 1) == 0) {
-		m >>= 1;
-		n >>= 1;
+	if (reduce_m_n) {
+		while ((m & 1) == 0 && (n & 1) == 0) {
+			m >>= 1;
+			n >>= 1;
+		}
 	}
 
 	*ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX);
@@ -6298,16 +6305,19 @@ static void compute_m_n(unsigned int m, unsigned int n,
 void
 intel_link_compute_m_n(int bits_per_pixel, int nlanes,
 		       int pixel_clock, int link_clock,
-		       struct intel_link_m_n *m_n)
+		       struct intel_link_m_n *m_n,
+		       bool reduce_m_n)
 {
 	m_n->tu = 64;
 
 	compute_m_n(bits_per_pixel * pixel_clock,
 		    link_clock * nlanes * 8,
-		    &m_n->gmch_m, &m_n->gmch_n);
+		    &m_n->gmch_m, &m_n->gmch_n,
+		    reduce_m_n);
 
 	compute_m_n(pixel_clock, link_clock,
-		    &m_n->link_m, &m_n->link_n);
+		    &m_n->link_m, &m_n->link_n,
+		    reduce_m_n);
 }
 
 static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
@@ -12197,6 +12207,15 @@ static void update_scanline_offset(struct intel_crtc *crtc)
 	 * type. For DP ports it behaves like most other platforms, but on HDMI
 	 * there's an extra 1 line difference. So we need to add two instead of
 	 * one to the value.
+	 *
+	 * On VLV/CHV DSI the scanline counter would appear to increment
+	 * approx. 1/3 of a scanline before start of vblank. Unfortunately
+	 * that means we can't tell whether we're in vblank or not while
+	 * we're on that particular line. We must still set scanline_offset
+	 * to 1 so that the vblank timestamps come out correct when we query
+	 * the scanline counter from within the vblank interrupt handler.
+	 * However if queried just before the start of vblank we'll get an
+	 * answer that's slightly in the future.
 	 */
 	if (IS_GEN2(dev_priv)) {
 		const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode;
@@ -15013,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
 	intel_setup_outputs(dev_priv);
 
 	drm_modeset_lock_all(dev);
-	intel_modeset_setup_hw_state(dev);
+	intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
 	drm_modeset_unlock_all(dev);
 
 	for_each_intel_crtc(dev, crtc) {
@@ -15050,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
 	return 0;
 }
 
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
-	struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
 	int ret;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
@@ -15128,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
 		(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15174,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		plane = crtc->plane;
 		crtc->base.primary->state->visible = true;
 		crtc->plane = !plane;
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 		crtc->plane = plane;
 	}
 
@@ -15184,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 * resume. Force-enable the pipe to fix this, the update_dpms
 		 * call below we restore the pipe to the right state, but leave
 		 * the required bits on. */
-		intel_enable_pipe_a(dev);
+		intel_enable_pipe_a(dev, ctx);
 	}
 
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc->active && !intel_crtc_has_encoders(crtc))
-		intel_crtc_disable_noatomic(&crtc->base);
+		intel_crtc_disable_noatomic(&crtc->base, ctx);
 
 	if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
 		/*
@@ -15488,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
  * and sanitizes it to the current state
  */
 static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum pipe pipe;
@@ -15508,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
 	for_each_pipe(dev_priv, pipe) {
 		crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
-		intel_sanitize_crtc(crtc);
+		intel_sanitize_crtc(crtc, ctx);
 		intel_dump_pipe_config(crtc, crtc->config,
 				       "[setup_hw_state]");
 	}

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ee77b51..fc691b8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c

@@ -1507,37 +1507,6 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp)
 	DRM_DEBUG_KMS("common rates: %s\n", str);
 }
 
-bool
-__intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc)
-{
-	u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI :
-						      DP_SINK_OUI;
-
-	return drm_dp_dpcd_read(&intel_dp->aux, base, desc, sizeof(*desc)) ==
-	       sizeof(*desc);
-}
-
-bool intel_dp_read_desc(struct intel_dp *intel_dp)
-{
-	struct intel_dp_desc *desc = &intel_dp->desc;
-	bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] &
-		       DP_OUI_SUPPORT;
-	int dev_id_len;
-
-	if (!__intel_dp_read_desc(intel_dp, desc))
-		return false;
-
-	dev_id_len = strnlen(desc->device_id, sizeof(desc->device_id));
-	DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n",
-		      drm_dp_is_branch(intel_dp->dpcd) ? "branch" : "sink",
-		      (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)",
-		      dev_id_len, desc->device_id,
-		      desc->hw_rev >> 4, desc->hw_rev & 0xf,
-		      desc->sw_major_rev, desc->sw_minor_rev);
-
-	return true;
-}
-
 static int rate_to_index(int find, const int *rates)
 {
 	int i = 0;
@@ -1624,6 +1593,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	int common_rates[DP_MAX_SUPPORTED_RATES] = {};
 	int common_len;
 	uint8_t link_bw, rate_select;
+	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
+					   DP_DPCD_QUIRK_LIMITED_M_N);
 
 	common_len = intel_dp_common_rates(intel_dp, common_rates);
 
@@ -1753,7 +1724,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	intel_link_compute_m_n(bpp, lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
-			       &pipe_config->dp_m_n);
+			       &pipe_config->dp_m_n,
+			       reduce_m_n);
 
 	if (intel_connector->panel.downclock_mode != NULL &&
 		dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) {
@@ -1761,7 +1733,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 			intel_link_compute_m_n(bpp, lane_count,
 				intel_connector->panel.downclock_mode->clock,
 				pipe_config->port_clock,
-				&pipe_config->dp_m2_n2);
+				&pipe_config->dp_m2_n2,
+				reduce_m_n);
 	}
 
 	/*
@@ -3622,7 +3595,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
 	if (!intel_dp_read_dpcd(intel_dp))
 		return false;
 
-	intel_dp_read_desc(intel_dp);
+	drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc,
+			 drm_dp_is_branch(intel_dp->dpcd));
 
 	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
 		dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
@@ -4624,7 +4598,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 
 	intel_dp_print_rates(intel_dp);
 
-	intel_dp_read_desc(intel_dp);
+	drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc,
+			 drm_dp_is_branch(intel_dp->dpcd));
 
 	intel_dp_configure_mst(intel_dp);
 

diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
index 6532e22..40ba313 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c

@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
 	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
 	struct intel_panel *panel = &connector->panel;
 
-	intel_dp_aux_enable_backlight(connector);
-
 	if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
 		panel->backlight.max = 0xFFFF;
 	else

diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index c1f62eb..989e255 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c

@@ -44,6 +44,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	int lane_count, slots;
 	const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
 	int mst_pbn;
+	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
+					   DP_DPCD_QUIRK_LIMITED_M_N);
 
 	pipe_config->has_pch_encoder = false;
 	bpp = 24;
@@ -75,7 +77,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	intel_link_compute_m_n(bpp, lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
-			       &pipe_config->dp_m_n);
+			       &pipe_config->dp_m_n,
+			       reduce_m_n);
 
 	pipe_config->dp_m_n.tu = slots;
 

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index aaee394..f630c7a 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h

@@ -906,14 +906,6 @@ enum link_m_n_set {
 	M2_N2
 };
 
-struct intel_dp_desc {
-	u8 oui[3];
-	u8 device_id[6];
-	u8 hw_rev;
-	u8 sw_major_rev;
-	u8 sw_minor_rev;
-} __packed;
-
 struct intel_dp_compliance_data {
 	unsigned long edid;
 	uint8_t video_pattern;
@@ -957,7 +949,7 @@ struct intel_dp {
 	/* Max link BW for the sink as per DPCD registers */
 	int max_sink_link_bw;
 	/* sink or branch descriptor */
-	struct intel_dp_desc desc;
+	struct drm_dp_desc desc;
 	struct drm_dp_aux aux;
 	enum intel_display_power_domain aux_power_domain;
 	uint8_t train_set[4];
@@ -1532,9 +1524,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 }
 
 bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
-bool __intel_dp_read_desc(struct intel_dp *intel_dp,
-			  struct intel_dp_desc *desc);
-bool intel_dp_read_desc(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
 bool intel_digital_port_connected(struct drm_i915_private *dev_priv,

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0..f94eacf 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c

@@ -1075,6 +1075,22 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	return 0;
 }
 
+static bool ring_is_idle(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	bool idle = true;
+
+	intel_runtime_pm_get(dev_priv);
+
+	/* No bit for gen2, so assume the CS parser is idle */
+	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
+		idle = false;
+
+	intel_runtime_pm_put(dev_priv);
+
+	return idle;
+}
+
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1084,8 +1100,6 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
  */
 bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
 	/* Any inflight/incomplete requests? */
 	if (!i915_seqno_passed(intel_engine_get_seqno(engine),
 			       intel_engine_last_submit(engine)))
@@ -1100,7 +1114,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return false;
 
 	/* Ring stopped? */
-	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
+	if (!ring_is_idle(engine))
 		return false;
 
 	return true;

diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index ded2add..d93c584 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c

@@ -82,20 +82,10 @@ static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc)
 static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache,
 					    int *width, int *height)
 {
-	int w, h;
-
-	if (drm_rotation_90_or_270(cache->plane.rotation)) {
-		w = cache->plane.src_h;
-		h = cache->plane.src_w;
-	} else {
-		w = cache->plane.src_w;
-		h = cache->plane.src_h;
-	}
-
 	if (width)
-		*width = w;
+		*width = cache->plane.src_w;
 	if (height)
-		*height = h;
+		*height = cache->plane.src_h;
 }
 
 static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
@@ -746,6 +736,11 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 		cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate;
 
 	cache->plane.rotation = plane_state->base.rotation;
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16;
 	cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16;
 	cache->plane.visible = plane_state->base.visible;

diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 668f004..292fedf 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c

@@ -149,44 +149,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
 
 static void lpe_audio_irq_unmask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask &= ~val;
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	POSTING_READ(VLV_IMR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static void lpe_audio_irq_mask(struct irq_data *d)
 {
-	struct drm_i915_private *dev_priv = d->chip_data;
-	unsigned long irqflags;
-	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT);
-
-	if (IS_CHERRYVIEW(dev_priv))
-		val |= I915_LPE_PIPE_C_INTERRUPT;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-
-	dev_priv->irq_mask |= val;
-	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
-	I915_WRITE(VLV_IIR, val);
-	I915_WRITE(VLV_IIR, val);
-	POSTING_READ(VLV_IIR);
-
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
 static struct irq_chip lpe_audio_irqchip = {
@@ -330,8 +296,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 
 	desc = irq_to_desc(dev_priv->lpe_audio.irq);
 
-	lpe_audio_irq_mask(&desc->irq_data);
-
 	lpe_audio_platdev_destroy(dev_priv);
 
 	irq_free_desc(dev_priv->lpe_audio.irq);

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c63..62f44d3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c

@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	reg_state[CTX_RING_TAIL+1] = rq->tail;
+	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -1989,7 +1988,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	ce->ring = ring;
 	ce->state = vma;
-	ce->initialised = engine->init_context == NULL;
+	ce->initialised |= engine->init_context == NULL;
 
 	return 0;
 
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
-			ce->ring->head = ce->ring->tail = 0;
-			intel_ring_update_space(ce->ring);
+			intel_ring_reset(ce->ring, 0);
 		}
 	}
 }

diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index 71cbe9c..5abef482 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c

@@ -240,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
 		return false;
 	}
 
-	intel_dp_read_desc(dp);
+	drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd));
 
 	DRM_DEBUG_KMS("Success: LSPCON init\n");
 	return true;

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 570bd60..078fd1b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c

@@ -3373,20 +3373,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 
 	/* n.b., src is 16.16 fixed point, dst is whole integer */
 	if (plane->id == PLANE_CURSOR) {
+		/*
+		 * Cursors only support 0/180 degree rotation,
+		 * hence no need to account for rotation here.
+		 */
 		src_w = pstate->base.src_w;
 		src_h = pstate->base.src_h;
 		dst_w = pstate->base.crtc_w;
 		dst_h = pstate->base.crtc_h;
 	} else {
+		/*
+		 * Src coordinates are already rotated by 270 degrees for
+		 * the 90/270 degree plane rotation cases (to match the
+		 * GTT mapping), hence no need to account for rotation here.
+		 */
 		src_w = drm_rect_width(&pstate->base.src);
 		src_h = drm_rect_height(&pstate->base.src);
 		dst_w = drm_rect_width(&pstate->base.dst);
 		dst_h = drm_rect_height(&pstate->base.dst);
 	}
 
-	if (drm_rotation_90_or_270(pstate->base.rotation))
-		swap(dst_w, dst_h);
-
 	downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
 	downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
 
@@ -3417,12 +3423,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 	if (y && format != DRM_FORMAT_NV12)
 		return 0;
 
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(width, height);
-
 	/* for planar format */
 	if (format == DRM_FORMAT_NV12) {
 		if (y)  /* y-plane data rate */
@@ -3505,12 +3513,14 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate,
 	    fb->modifier != I915_FORMAT_MOD_Yf_TILED)
 		return 8;
 
+	/*
+	 * Src coordinates are already rotated by 270 degrees for
+	 * the 90/270 degree plane rotation cases (to match the
+	 * GTT mapping), hence no need to account for rotation here.
+	 */
 	src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
 	src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(src_w, src_h);
-
 	/* Halve UV plane width and height for NV12 */
 	if (fb->format->format == DRM_FORMAT_NV12 && !y) {
 		src_w /= 2;
@@ -3794,13 +3804,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		width = intel_pstate->base.crtc_w;
 		height = intel_pstate->base.crtc_h;
 	} else {
+		/*
+		 * Src coordinates are already rotated by 270 degrees for
+		 * the 90/270 degree plane rotation cases (to match the
+		 * GTT mapping), hence no need to account for rotation here.
+		 */
 		width = drm_rect_width(&intel_pstate->base.src) >> 16;
 		height = drm_rect_height(&intel_pstate->base.src) >> 16;
 	}
 
-	if (drm_rotation_90_or_270(pstate->rotation))
-		swap(width, height);
-
 	cpp = fb->format->cpp[0];
 	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
 
@@ -4335,11 +4347,19 @@ skl_compute_wm(struct drm_atomic_state *state)
 	struct drm_crtc_state *cstate;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct skl_wm_values *results = &intel_state->wm_results;
+	struct drm_device *dev = state->dev;
 	struct skl_pipe_wm *pipe_wm;
 	bool changed = false;
 	int ret, i;
 
 	/*
+	 * When we distrust bios wm we always need to recompute to set the
+	 * expected DDB allocations for each CRTC.
+	 */
+	if (to_i915(dev)->wm.distrust_bios_wm)
+		changed = true;
+
+	/*
 	 * If this transaction isn't actually touching any CRTC's, don't
 	 * bother with watermark calculation.  Note that if we pass this
 	 * test, we're guaranteed to hold at least one CRTC state mutex,
@@ -4349,6 +4369,7 @@ skl_compute_wm(struct drm_atomic_state *state)
 	 */
 	for_each_new_crtc_in_state(state, crtc, cstate, i)
 		changed = true;
+
 	if (!changed)
 		return 0;
 

diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index c3780d0..559f1ab 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c

@@ -435,8 +435,9 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
 	}
 
 	/* PSR2 is restricted to work with panel resolutions upto 3200x2000 */
-	if (intel_crtc->config->pipe_src_w > 3200 ||
-				intel_crtc->config->pipe_src_h > 2000) {
+	if (dev_priv->psr.psr2_support &&
+	    (intel_crtc->config->pipe_src_w > 3200 ||
+	     intel_crtc->config->pipe_src_h > 2000)) {
 		dev_priv->psr.psr2_support = false;
 		return false;
 	}

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 66a2b8b..513a0f4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	assert_ring_tail_valid(request->ring, request->tail);
-	I915_WRITE_TAIL(request->engine, request->tail);
+	I915_WRITE_TAIL(request->engine,
+			intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
 	return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	GEM_BUG_ON(!list_empty(&ring->request_list));
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->tail);
+
 	if (i915_vma_is_map_and_fenceable(ring->vma))
 		i915_vma_unpin_iomap(ring->vma);
 	else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Restart from the beginning of the rings for convenience */
 	for_each_engine(engine, dev_priv, id)
-		engine->buffer->head = engine->buffer->tail;
+		intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 		unsigned space;
 
 		/* Would completion of this request free enough space? */
-		space = __intel_ring_space(target->postfix, ring->tail,
+		space = __intel_ring_space(target->postfix, ring->emit,
 					   ring->size);
 		if (space >= bytes)
 			break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
 	struct intel_ring *ring = req->ring;
-	int remain_actual = ring->size - ring->tail;
-	int remain_usable = ring->effective_size - ring->tail;
+	int remain_actual = ring->size - ring->emit;
+	int remain_usable = ring->effective_size - ring->emit;
 	int bytes = num_dwords * sizeof(u32);
 	int total_bytes, wait_bytes;
 	bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
 	if (unlikely(need_wrap)) {
 		GEM_BUG_ON(remain_actual > ring->space);
-		GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+		GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->tail, 0, remain_actual);
-		ring->tail = 0;
+		memset(ring->vaddr + ring->emit, 0, remain_actual);
+		ring->emit = 0;
 		ring->space -= remain_actual;
 	}
 
-	GEM_BUG_ON(ring->tail > ring->size - bytes);
-	cs = ring->vaddr + ring->tail;
-	ring->tail += bytes;
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	cs = ring->vaddr + ring->emit;
+	ring->emit += bytes;
 	ring->space -= bytes;
 	GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
 	int num_dwords =
-		(req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		(req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
 	u32 *cs;
 
 	if (num_dwords == 0)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a080..f7144fe 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

@@ -145,6 +145,7 @@ struct intel_ring {
 
 	u32 head;
 	u32 tail;
+	u32 emit;
 
 	int space;
 	int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 	 * reserved for the command packet (i.e. the value passed to
 	 * intel_ring_begin()).
 	 */
-	GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+	GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 	GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_gem_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 8c87c71..e6517ed 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c

@@ -83,10 +83,13 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode,
  */
 void intel_pipe_update_start(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode;
 	long timeout = msecs_to_jiffies_timeout(1);
 	int scanline, min, max, vblank_start;
 	wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base);
+	bool need_vlv_dsi_wa = (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+		intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI);
 	DEFINE_WAIT(wait);
 
 	vblank_start = adjusted_mode->crtc_vblank_start;
@@ -139,6 +142,24 @@ void intel_pipe_update_start(struct intel_crtc *crtc)
 
 	drm_crtc_vblank_put(&crtc->base);
 
+	/*
+	 * On VLV/CHV DSI the scanline counter would appear to
+	 * increment approx. 1/3 of a scanline before start of vblank.
+	 * The registers still get latched at start of vblank however.
+	 * This means we must not write any registers on the first
+	 * line of vblank (since not the whole line is actually in
+	 * vblank). And unfortunately we can't use the interrupt to
+	 * wait here since it will fire too soon. We could use the
+	 * frame start interrupt instead since it will fire after the
+	 * critical scanline, but that would require more changes
+	 * in the interrupt code. So for now we'll just do the nasty
+	 * thing and poll for the bad scanline to pass us by.
+	 *
+	 * FIXME figure out if BXT+ DSI suffers from this as well
+	 */
+	while (need_vlv_dsi_wa && scanline == vblank_start)
+		scanline = intel_get_crtc_scanline(crtc);
+
 	crtc->debug.scanline_start = scanline;
 	crtc->debug.start_vbl_time = ktime_get();
 	crtc->debug.start_vbl_count = intel_crtc_get_vblank_counter(crtc);

diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 4b7f73a..f841152 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h

@@ -59,8 +59,6 @@ struct drm_i915_gem_request;
  *                available in the work queue (note, the queue is shared,
  *                not per-engine). It is OK for this to be nonzero, but
  *                it should not be huge!
- *   q_fail: failed to enqueue a work item. This should never happen,
- *           because we check for space beforehand.
  *   b_fail: failed to ring the doorbell. This should never happen, unless
  *           somehow the hardware misbehaves, or maybe if the GuC firmware
  *           crashes? We probably need to reset the GPU to recover.

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1afb8b06..12b85b3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c

@@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = NULL;
 	struct drm_file *file;
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
@@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg)
 		}
 
 		for_each_engine(engine, i915, id) {
-			if (dw == 0) {
+			if (!obj) {
 				obj = create_test_object(ctx, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
@@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg)
 				goto out_unlock;
 			}
 
-			if (++dw == max_dwords(obj))
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
 				dw = 0;
+			}
 			ndwords++;
 		}
 		ncontexts++;

diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 8fb801f..8b05ecb 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c

@@ -673,7 +673,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		ret = drm_of_find_panel_or_bridge(child,
 						  imx_ldb->lvds_mux ? 4 : 2, 0,
 						  &channel->panel, &channel->bridge);
-		if (ret)
+		if (ret && ret != -ENODEV)
 			return ret;
 
 		/* panel ddc only if there is no bridge */

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 808b995..b5cc6e1 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c

@@ -19,6 +19,7 @@
 #include <drm/drm_of.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/iopoll.h>
 #include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -900,16 +901,12 @@ static int mtk_dsi_host_detach(struct mipi_dsi_host *host,
 
 static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi)
 {
-	u32 timeout_ms = 500000; /* total 1s ~ 2s timeout */
+	int ret;
+	u32 val;
 
-	while (timeout_ms--) {
-		if (!(readl(dsi->regs + DSI_INTSTA) & DSI_BUSY))
-			break;
-
-		usleep_range(2, 4);
-	}
-
-	if (timeout_ms == 0) {
+	ret = readl_poll_timeout(dsi->regs + DSI_INTSTA, val, !(val & DSI_BUSY),
+				 4, 2000000);
+	if (ret) {
 		DRM_WARN("polling dsi wait not busy timeout!\n");
 
 		mtk_dsi_enable(dsi);

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 41a1c03..0a4ffd7 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c

@@ -1062,7 +1062,7 @@ static int mtk_hdmi_setup_vendor_specific_infoframe(struct mtk_hdmi *hdmi,
 	}
 
 	err = hdmi_vendor_infoframe_pack(&frame, buffer, sizeof(buffer));
-	if (err) {
+	if (err < 0) {
 		dev_err(hdmi->dev, "Failed to pack vendor infoframe: %zd\n",
 			err);
 		return err;

diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 75382f5..10b227d 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c

@@ -152,7 +152,7 @@ static struct regmap_config meson_regmap_config = {
 	.max_register   = 0x1000,
 };
 
-static int meson_drv_bind(struct device *dev)
+static int meson_drv_bind_master(struct device *dev, bool has_components)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct meson_drm *priv;
@@ -233,10 +233,12 @@ static int meson_drv_bind(struct device *dev)
 	if (ret)
 		goto free_drm;
 
-	ret = component_bind_all(drm->dev, drm);
-	if (ret) {
-		dev_err(drm->dev, "Couldn't bind all components\n");
-		goto free_drm;
+	if (has_components) {
+		ret = component_bind_all(drm->dev, drm);
+		if (ret) {
+			dev_err(drm->dev, "Couldn't bind all components\n");
+			goto free_drm;
+		}
 	}
 
 	ret = meson_plane_create(priv);
@@ -276,6 +278,11 @@ static int meson_drv_bind(struct device *dev)
 	return ret;
 }
 
+static int meson_drv_bind(struct device *dev)
+{
+	return meson_drv_bind_master(dev, true);
+}
+
 static void meson_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
@@ -357,6 +364,9 @@ static int meson_drv_probe(struct platform_device *pdev)
 		count += meson_probe_remote(pdev, &match, np, remote);
 	}
 
+	if (count && !match)
+		return meson_drv_bind_master(&pdev->dev, false);
+
 	/* If some endpoints were found, initialize the nodes */
 	if (count) {
 		dev_info(&pdev->dev, "Queued %d outputs on vpu\n", count);

diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index adb411a..f4b5358 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c

@@ -1173,7 +1173,10 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc,
 
 
 	if (IS_G200_SE(mdev)) {
-		if (mdev->unique_rev_id >= 0x02) {
+		if  (mdev->unique_rev_id >= 0x04) {
+			WREG8(MGAREG_CRTCEXT_INDEX, 0x06);
+			WREG8(MGAREG_CRTCEXT_DATA, 0);
+		} else if (mdev->unique_rev_id >= 0x02) {
 			u8 hi_pri_lvl;
 			u32 bpp;
 			u32 mb;
@@ -1639,6 +1642,10 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 			if (mga_vga_calculate_mode_bandwidth(mode, bpp)
 				> (30100 * 1024))
 				return MODE_BANDWIDTH;
+		} else {
+			if (mga_vga_calculate_mode_bandwidth(mode, bpp)
+				> (55000 * 1024))
+				return MODE_BANDWIDTH;
 		}
 	} else if (mdev->type == G200_WB) {
 		if (mode->hdisplay > 1280)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 5b8e23d..0a31cd6 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig

@@ -13,6 +13,7 @@
 	select QCOM_SCM
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	select SYNC_FILE
+	select PM_OPP
 	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
index f8f48d0..9c34d78 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c

@@ -116,7 +116,7 @@ static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-static struct irq_domain_ops mdss_hw_irqdomain_ops = {
+static const struct irq_domain_ops mdss_hw_irqdomain_ops = {
 	.map = mdss_hw_irqdomain_map,
 	.xlate = irq_domain_xlate_onecell,
 };

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index a38c5fe..7d37412 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c

@@ -225,9 +225,10 @@ mdp5_plane_duplicate_state(struct drm_plane *plane)
 
 	mdp5_state = kmemdup(to_mdp5_plane_state(plane->state),
 			sizeof(*mdp5_state), GFP_KERNEL);
+	if (!mdp5_state)
+		return NULL;
 
-	if (mdp5_state && mdp5_state->base.fb)
-		drm_framebuffer_reference(mdp5_state->base.fb);
+	__drm_atomic_helper_plane_duplicate_state(plane, &mdp5_state->base);
 
 	return &mdp5_state->base;
 }
@@ -444,6 +445,10 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 			mdp5_pipe_release(state->state, old_hwpipe);
 			mdp5_pipe_release(state->state, old_right_hwpipe);
 		}
+	} else {
+		mdp5_pipe_release(state->state, mdp5_state->hwpipe);
+		mdp5_pipe_release(state->state, mdp5_state->r_hwpipe);
+		mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL;
 	}
 
 	return 0;

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 87b5695..9d498eb 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c

@@ -830,6 +830,7 @@ static struct drm_driver msm_driver = {
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export   = drm_gem_prime_export,
 	.gem_prime_import   = drm_gem_prime_import,
+	.gem_prime_res_obj  = msm_gem_prime_res_obj,
 	.gem_prime_pin      = msm_gem_prime_pin,
 	.gem_prime_unpin    = msm_gem_prime_unpin,
 	.gem_prime_get_sg_table = msm_gem_prime_get_sg_table,

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 28b6f9b..1b26ca6 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h

@@ -224,6 +224,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *msm_gem_prime_vmap(struct drm_gem_object *obj);
 void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj);
 struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 		struct dma_buf_attachment *attach, struct sg_table *sg);
 int msm_gem_prime_pin(struct drm_gem_object *obj);

diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 3f299c5..a2f89ba 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c

@@ -99,8 +99,8 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence)
 }
 
 struct msm_fence {
-	struct msm_fence_context *fctx;
 	struct dma_fence base;
+	struct msm_fence_context *fctx;
 };
 
 static inline struct msm_fence *to_msm_fence(struct dma_fence *fence)
@@ -130,19 +130,13 @@ static bool msm_fence_signaled(struct dma_fence *fence)
 	return fence_completed(f->fctx, f->base.seqno);
 }
 
-static void msm_fence_release(struct dma_fence *fence)
-{
-	struct msm_fence *f = to_msm_fence(fence);
-	kfree_rcu(f, base.rcu);
-}
-
 static const struct dma_fence_ops msm_fence_ops = {
 	.get_driver_name = msm_fence_get_driver_name,
 	.get_timeline_name = msm_fence_get_timeline_name,
 	.enable_signaling = msm_fence_enable_signaling,
 	.signaled = msm_fence_signaled,
 	.wait = dma_fence_default_wait,
-	.release = msm_fence_release,
+	.release = dma_fence_free,
 };
 
 struct dma_fence *

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 68e509b..50289a2 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c

@@ -758,6 +758,8 @@ static int msm_gem_new_impl(struct drm_device *dev,
 	struct msm_gem_object *msm_obj;
 	bool use_vram = false;
 
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
 	switch (flags & MSM_BO_CACHE_MASK) {
 	case MSM_BO_UNCACHED:
 	case MSM_BO_CACHED:
@@ -853,7 +855,11 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 
 	size = PAGE_ALIGN(dmabuf->size);
 
+	/* Take mutex so we can modify the inactive list in msm_gem_new_impl */
+	mutex_lock(&dev->struct_mutex);
 	ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj);
+	mutex_unlock(&dev->struct_mutex);
+
 	if (ret)
 		goto fail;
 

diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 60bb290..13403c6 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c

@@ -70,3 +70,10 @@ void msm_gem_prime_unpin(struct drm_gem_object *obj)
 	if (!obj->import_attach)
 		msm_gem_put_pages(obj);
 }
+
+struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	return msm_obj->resv;
+}

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 1c545eb..7832e64 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c

@@ -410,12 +410,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		if (!in_fence)
 			return -EINVAL;
 
-		/* TODO if we get an array-fence due to userspace merging multiple
-		 * fences, we need a way to determine if all the backing fences
-		 * are from our own context..
+		/*
+		 * Wait if the fence is from a foreign context, or if the fence
+		 * array contains any fence from a foreign context.
 		 */
-
-		if (in_fence->context != gpu->fctx->context) {
+		if (!dma_fence_match_context(in_fence, gpu->fctx->context)) {
 			ret = dma_fence_wait(in_fence, true);
 			if (ret)
 				return ret;
@@ -496,8 +495,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 			goto out;
 		}
 
-		if ((submit_cmd.size + submit_cmd.submit_offset) >=
-				msm_obj->base.size) {
+		if (!submit_cmd.size ||
+			((submit_cmd.size + submit_cmd.submit_offset) >
+				msm_obj->base.size)) {
 			DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size);
 			ret = -EINVAL;
 			goto out;

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 97b9c38..0fdc88d 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c

@@ -549,9 +549,9 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
 		gpu->grp_clks[i] = get_clock(dev, name);
 
 		/* Remember the key clocks that we need to control later */
-		if (!strcmp(name, "core"))
+		if (!strcmp(name, "core") || !strcmp(name, "core_clk"))
 			gpu->core_clk = gpu->grp_clks[i];
-		else if (!strcmp(name, "rbbmtimer"))
+		else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk"))
 			gpu->rbbmtimer_clk = gpu->grp_clks[i];
 
 		++i;

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 1144e0c..0abe776 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c

@@ -35,6 +35,13 @@
 #include "mxsfb_drv.h"
 #include "mxsfb_regs.h"
 
+#define MXS_SET_ADDR		0x4
+#define MXS_CLR_ADDR		0x8
+#define MODULE_CLKGATE		BIT(30)
+#define MODULE_SFTRST		BIT(31)
+/* 1 second delay should be plenty of time for block reset */
+#define RESET_TIMEOUT		1000000
+
 static u32 set_hsync_pulse_width(struct mxsfb_drm_private *mxsfb, u32 val)
 {
 	return (val & mxsfb->devdata->hs_wdth_mask) <<
@@ -159,6 +166,36 @@ static void mxsfb_disable_controller(struct mxsfb_drm_private *mxsfb)
 		clk_disable_unprepare(mxsfb->clk_disp_axi);
 }
 
+/*
+ * Clear the bit and poll it cleared.  This is usually called with
+ * a reset address and mask being either SFTRST(bit 31) or CLKGATE
+ * (bit 30).
+ */
+static int clear_poll_bit(void __iomem *addr, u32 mask)
+{
+	u32 reg;
+
+	writel(mask, addr + MXS_CLR_ADDR);
+	return readl_poll_timeout(addr, reg, !(reg & mask), 0, RESET_TIMEOUT);
+}
+
+static int mxsfb_reset_block(void __iomem *reset_addr)
+{
+	int ret;
+
+	ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+	if (ret)
+		return ret;
+
+	writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR);
+
+	ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+	if (ret)
+		return ret;
+
+	return clear_poll_bit(reset_addr, MODULE_CLKGATE);
+}
+
 static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 {
 	struct drm_display_mode *m = &mxsfb->pipe.crtc.state->adjusted_mode;
@@ -173,6 +210,11 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 	 */
 	mxsfb_enable_axi_clk(mxsfb);
 
+	/* Mandatory eLCDIF reset as per the Reference Manual */
+	err = mxsfb_reset_block(mxsfb->base);
+	if (err)
+		return;
+
 	/* Clear the FIFOs */
 	writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_SET);
 

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 6a567fe..820a480 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h

@@ -4,6 +4,7 @@
 
 struct nvkm_alarm {
 	struct list_head head;
+	struct list_head exec;
 	u64 timestamp;
 	void (*func)(struct nvkm_alarm *);
 };

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 39468c21..7459ef9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c

@@ -60,15 +60,13 @@ bool nouveau_is_v1_dsm(void) {
 }
 
 #ifdef CONFIG_VGA_SWITCHEROO
-static const char nouveau_dsm_muid[] = {
-	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-};
+static const guid_t nouveau_dsm_muid =
+	GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48,
+		  0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4);
 
-static const char nouveau_op_dsm_muid[] = {
-	0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
-	0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
-};
+static const guid_t nouveau_op_dsm_muid =
+	GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B,
+		  0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0);
 
 static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 {
@@ -86,7 +84,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *
 		args_buff[i] = (arg >> i * 8) & 0xFF;
 
 	*result = 0;
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100,
 				      func, &argv4, ACPI_TYPE_BUFFER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -138,7 +136,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg)
 		.integer.value = arg,
 	};
 
-	obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102,
+	obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102,
 				      func, &argv4, ACPI_TYPE_INTEGER);
 	if (!obj) {
 		acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -259,7 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
 	if (!acpi_has_method(dhandle, "_DSM"))
 		return;
 
-	supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+	supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102,
 				      1 << NOUVEAU_DSM_POWER);
 	optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 36268e1..15a13d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c

@@ -80,7 +80,7 @@ int nouveau_modeset = -1;
 module_param_named(modeset, nouveau_modeset, int, 0400);
 
 MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)");
-int nouveau_runtime_pm = -1;
+static int nouveau_runtime_pm = -1;
 module_param_named(runpm, nouveau_runtime_pm, int, 0400);
 
 static struct drm_driver driver_stub;
@@ -495,7 +495,7 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_fbcon_init(dev);
 	nouveau_led_init(dev);
 
-	if (nouveau_runtime_pm != 0) {
+	if (nouveau_pmops_runtime()) {
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
 		pm_runtime_set_active(dev->dev);
@@ -527,7 +527,7 @@ nouveau_drm_unload(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (nouveau_runtime_pm != 0) {
+	if (nouveau_pmops_runtime()) {
 		pm_runtime_get_sync(dev->dev);
 		pm_runtime_forbid(dev->dev);
 	}
@@ -726,6 +726,14 @@ nouveau_pmops_thaw(struct device *dev)
 	return nouveau_do_resume(drm_dev, false);
 }
 
+bool
+nouveau_pmops_runtime()
+{
+	if (nouveau_runtime_pm == -1)
+		return nouveau_is_optimus() || nouveau_is_v1_dsm();
+	return nouveau_runtime_pm == 1;
+}
+
 static int
 nouveau_pmops_runtime_suspend(struct device *dev)
 {
@@ -733,14 +741,7 @@ nouveau_pmops_runtime_suspend(struct device *dev)
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (nouveau_runtime_pm == 0) {
-		pm_runtime_forbid(dev);
-		return -EBUSY;
-	}
-
-	/* are we optimus enabled? */
-	if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) {
-		DRM_DEBUG_DRIVER("failing to power off - not optimus\n");
+	if (!nouveau_pmops_runtime()) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}
@@ -765,8 +766,10 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	struct nvif_device *device = &nouveau_drm(drm_dev)->client.device;
 	int ret;
 
-	if (nouveau_runtime_pm == 0)
-		return -EINVAL;
+	if (!nouveau_pmops_runtime()) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
 
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
@@ -796,14 +799,7 @@ nouveau_pmops_runtime_idle(struct device *dev)
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
 	struct drm_crtc *crtc;
 
-	if (nouveau_runtime_pm == 0) {
-		pm_runtime_forbid(dev);
-		return -EBUSY;
-	}
-
-	/* are we optimus enabled? */
-	if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) {
-		DRM_DEBUG_DRIVER("failing to power off - not optimus\n");
+	if (!nouveau_pmops_runtime()) {
 		pm_runtime_forbid(dev);
 		return -EBUSY;
 	}

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index eadec2f..a11b6aa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h

@@ -108,8 +108,6 @@ nouveau_cli(struct drm_file *fpriv)
 #include <nvif/object.h>
 #include <nvif/device.h>
 
-extern int nouveau_runtime_pm;
-
 struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
@@ -195,6 +193,7 @@ nouveau_drm(struct drm_device *dev)
 
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
+bool nouveau_pmops_runtime(void);
 
 #include <nvkm/core/tegra.h>
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index a4aacbc..02fe0ef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c

@@ -87,7 +87,7 @@ void
 nouveau_vga_init(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
-	bool runtime = false;
+	bool runtime = nouveau_pmops_runtime();
 
 	/* only relevant for PCI devices */
 	if (!dev->pdev)
@@ -99,10 +99,6 @@ nouveau_vga_init(struct nouveau_drm *drm)
 	if (pci_is_thunderbolt_attached(dev->pdev))
 		return;
 
-	if (nouveau_runtime_pm == 1)
-		runtime = true;
-	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
-		runtime = true;
 	vga_switcheroo_register_client(dev->pdev, &nouveau_switcheroo_ops, runtime);
 
 	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
@@ -113,18 +109,13 @@ void
 nouveau_vga_fini(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
-	bool runtime = false;
+	bool runtime = nouveau_pmops_runtime();
 
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 
 	if (pci_is_thunderbolt_attached(dev->pdev))
 		return;
 
-	if (nouveau_runtime_pm == 1)
-		runtime = true;
-	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
-		runtime = true;
-
 	vga_switcheroo_unregister_client(dev->pdev);
 	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
 		vga_switcheroo_fini_domain_pm_ops(drm->dev->dev);

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index a766324..06e564a 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c

@@ -2107,7 +2107,8 @@ nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state)
 					asyc->set.dither = true;
 			}
 		} else {
-			asyc->set.mask = ~0;
+			if (asyc)
+				asyc->set.mask = ~0;
 			asyh->set.mask = ~0;
 		}
 

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
index e3e2f5e..f44682d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c

@@ -81,10 +81,9 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 {
 	struct nvkm_subdev *subdev = &mxm->subdev;
 	struct nvkm_device *device = subdev->device;
-	static char muid[] = {
-		0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C,
-		0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65
-	};
+	static guid_t muid =
+		GUID_INIT(0x4004A400, 0x917D, 0x4CF2,
+			  0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65);
 	u32 mxms_args[] = { 0x00000000 };
 	union acpi_object argv4 = {
 		.buffer.type = ACPI_TYPE_BUFFER,
@@ -105,7 +104,7 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 	 * unless you pass in exactly the version it supports..
 	 */
 	rev = (version & 0xf0) << 4 | (version & 0x0f);
-	obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4);
+	obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4);
 	if (!obj) {
 		nvkm_debug(subdev, "DSM MXMS failed\n");
 		return false;

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index f2a86ea..2437f7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c

@@ -50,7 +50,8 @@ nvkm_timer_alarm_trigger(struct nvkm_timer *tmr)
 		/* Move to completed list.  We'll drop the lock before
 		 * executing the callback so it can reschedule itself.
 		 */
-		list_move_tail(&alarm->head, &exec);
+		list_del_init(&alarm->head);
+		list_add(&alarm->exec, &exec);
 	}
 
 	/* Shut down interrupt if no more pending alarms. */
@@ -59,8 +60,8 @@ nvkm_timer_alarm_trigger(struct nvkm_timer *tmr)
 	spin_unlock_irqrestore(&tmr->lock, flags);
 
 	/* Execute completed callbacks. */
-	list_for_each_entry_safe(alarm, atemp, &exec, head) {
-		list_del_init(&alarm->head);
+	list_for_each_entry_safe(alarm, atemp, &exec, exec) {
+		list_del(&alarm->exec);
 		alarm->func(alarm);
 	}
 }

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 058340a..4a340ef 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c

@@ -575,8 +575,6 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	if (ret)
 		return;
 
-	cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
-
 	if (fb != old_state->fb) {
 		obj = to_qxl_framebuffer(fb)->obj;
 		user_bo = gem_to_qxl_bo(obj);
@@ -614,6 +612,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		qxl_bo_kunmap(cursor_bo);
 		qxl_bo_kunmap(user_bo);
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->u.set.visible = 1;
 		cmd->u.set.shape = qxl_bo_physical_address(qdev,
 							   cursor_bo, 0);
@@ -624,6 +623,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 		if (ret)
 			goto out_free_release;
 
+		cmd = (struct qxl_cursor_cmd *) qxl_release_map(qdev, release);
 		cmd->type = QXL_CURSOR_MOVE;
 	}
 

diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 7ba4508..ea36dc4 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c

@@ -776,6 +776,12 @@ bool ci_dpm_vblank_too_short(struct radeon_device *rdev)
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
 	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
 
+	/* disable mclk switching if the refresh is >120Hz, even if the
+        * blanking period would allow it
+        */
+	if (r600_dpm_get_vrefresh(rdev) > 120)
+		return true;
+
 	if (vblank_time < switch_limit)
 		return true;
 	else

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ccebe0f..ca44233 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c

@@ -7401,7 +7401,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -7431,7 +7431,7 @@ static inline void cik_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -9267,8 +9267,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
 	u32 tmp, wm_mask;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 
 		/* watermark for high clocks */
 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f130ec4..5346372 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c

@@ -2266,8 +2266,11 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 		dram_channels = evergreen_get_number_of_dram_channels(rdev);
@@ -4927,7 +4930,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -4958,7 +4961,7 @@ static void evergreen_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0a08517..e06e2d8 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c

@@ -3988,7 +3988,7 @@ static void r600_irq_ack(struct radeon_device *rdev)
 			WREG32(DC_HPD5_INT_CONTROL, tmp);
 		}
 		if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) {
-			tmp = RREG32(DC_HPD5_INT_CONTROL);
+			tmp = RREG32(DC_HPD6_INT_CONTROL);
 			tmp |= DC_HPDx_INT_ACK;
 			WREG32(DC_HPD6_INT_CONTROL, tmp);
 		}

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1c8e22..e562a78 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h

@@ -375,7 +375,7 @@ struct radeon_fence {
 	unsigned		ring;
 	bool			is_vm_update;
 
-	wait_queue_t		fence_wake;
+	wait_queue_entry_t		fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 432480f..3178ba0 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c

@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
 	    rdev->pdev->subsystem_vendor == 0x103c &&
 	    rdev->pdev->subsystem_device == 0x280a)
 		return;
+	/* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+	 * - it hangs on resume inside the dynclk 1 table.
+	 */
+	if (rdev->family == CHIP_RS400 &&
+	    rdev->pdev->subsystem_vendor == 0x1179 &&
+	    rdev->pdev->subsystem_device == 0xff31)
+	        return;
 
 	/* DYN CLK 1 */
 	table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6ecf427..0a6444d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c

@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=51381
 	 */
 	{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+	/* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+	 */
+	{ PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
 	/* macbook pro 8.2 */
 	{ PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
 	{ 0, 0, 0, 0, 0 },

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index ef09f0a6..e86f2bd 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c

@@ -158,7 +158,7 @@ int radeon_fence_emit(struct radeon_device *rdev,
  * for the fence locking itself, so unlocked variants are used for
  * fence_signal, and remove_wait_queue.
  */
-static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
 {
 	struct radeon_fence *fence;
 	u64 seq;

diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index e3e7cb1..4761f27 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c

@@ -116,7 +116,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	if ((radeon_runtime_pm != 0) &&
 	    radeon_has_atpx() &&
 	    ((flags & RADEON_IS_IGP) == 0) &&
-	    !pci_is_thunderbolt_attached(rdev->pdev))
+	    !pci_is_thunderbolt_attached(dev->pdev))
 		flags |= RADEON_IS_PX;
 
 	/* radeon_device_init should report only fatal error

diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 7431eb4..d34d1cf 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c

@@ -621,7 +621,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 	}
 
 	/* TODO: is this still necessary on NI+ ? */
-	if ((cmd == 0 || cmd == 1 || cmd == 0x3) &&
+	if ((cmd == 0 || cmd == 0x3) &&
 	    (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
 		DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 			  start, end);

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ceee87f..5303f25 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c

@@ -2284,8 +2284,11 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
 	fixed20_12 a, b, c;
 
 	if (radeon_crtc->base.enabled && num_heads && mode) {
-		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min(line_time, (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 
@@ -6317,7 +6320,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}
@@ -6348,7 +6351,7 @@ static inline void si_irq_ack(struct radeon_device *rdev)
 		WREG32(DC_HPD5_INT_CONTROL, tmp);
 	}
 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp = RREG32(DC_HPD6_INT_CONTROL);
 		tmp |= DC_HPDx_RX_INT_ACK;
 		WREG32(DC_HPD6_INT_CONTROL, tmp);
 	}

diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index d8fa7a9..ce5f2d1 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c

@@ -245,8 +245,6 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 				      struct drm_connector_state *conn_state)
 {
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
-	struct rockchip_dp_device *dp = to_dp(encoder);
-	int ret;
 
 	/*
 	 * The hardware IC designed that VOP must output the RGB10 video
@@ -258,16 +256,6 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 
 	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
 	s->output_type = DRM_MODE_CONNECTOR_eDP;
-	if (dp->data->chip_type == RK3399_EDP) {
-		/*
-		 * For RK3399, VOP Lit must code the out mode to RGB888,
-		 * VOP Big must code the out mode to RGB10.
-		 */
-		ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node,
-							encoder);
-		if (ret > 0)
-			s->output_mode = ROCKCHIP_OUT_MODE_P888;
-	}
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index a2169dd..14fa1f8 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c

@@ -615,7 +615,6 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder)
 {
 	struct cdn_dp_device *dp = encoder_to_dp(encoder);
 	int ret, val;
-	struct rockchip_crtc_state *state;
 
 	ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node, encoder);
 	if (ret < 0) {
@@ -625,14 +624,10 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder)
 
 	DRM_DEV_DEBUG_KMS(dp->dev, "vop %s output to cdn-dp\n",
 			  (ret) ? "LIT" : "BIG");
-	state = to_rockchip_crtc_state(encoder->crtc->state);
-	if (ret) {
+	if (ret)
 		val = DP_SEL_VOP_LIT | (DP_SEL_VOP_LIT << 16);
-		state->output_mode = ROCKCHIP_OUT_MODE_P888;
-	} else {
+	else
 		val = DP_SEL_VOP_LIT << 16;
-		state->output_mode = ROCKCHIP_OUT_MODE_AAAA;
-	}
 
 	ret = cdn_dp_grf_write(dp, GRF_SOC_CON9, val);
 	if (ret)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 3f7a82d..45589d6 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c

@@ -875,6 +875,7 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
 static void vop_crtc_enable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
+	const struct vop_data *vop_data = vop->data;
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc->state);
 	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
 	u16 hsync_len = adjusted_mode->hsync_end - adjusted_mode->hsync_start;
@@ -967,6 +968,13 @@ static void vop_crtc_enable(struct drm_crtc *crtc)
 		DRM_DEV_ERROR(vop->dev, "unsupported connector_type [%d]\n",
 			      s->output_type);
 	}
+
+	/*
+	 * if vop is not support RGB10 output, need force RGB10 to RGB888.
+	 */
+	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
+	    !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10))
+		s->output_mode = ROCKCHIP_OUT_MODE_P888;
 	VOP_CTRL_SET(vop, out_mode, s->output_mode);
 
 	VOP_CTRL_SET(vop, htotal_pw, (htotal << 16) | hsync_len);

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 5a4faa85..9979fd0 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h

@@ -142,6 +142,9 @@ struct vop_data {
 	const struct vop_intr *intr;
 	const struct vop_win_data *win;
 	unsigned int win_size;
+
+#define VOP_FEATURE_OUTPUT_RGB10	BIT(0)
+	u64 feature;
 };
 
 /* interrupt define */

diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 0da4444..bafd698 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c

@@ -275,6 +275,7 @@ static const struct vop_intr rk3288_vop_intr = {
 static const struct vop_data rk3288_vop = {
 	.init_table = rk3288_init_reg_table,
 	.table_size = ARRAY_SIZE(rk3288_init_reg_table),
+	.feature = VOP_FEATURE_OUTPUT_RGB10,
 	.intr = &rk3288_vop_intr,
 	.ctrl = &rk3288_ctrl_data,
 	.win = rk3288_vop_win_data,
@@ -343,6 +344,7 @@ static const struct vop_reg_data rk3399_init_reg_table[] = {
 static const struct vop_data rk3399_vop_big = {
 	.init_table = rk3399_init_reg_table,
 	.table_size = ARRAY_SIZE(rk3399_init_reg_table),
+	.feature = VOP_FEATURE_OUTPUT_RGB10,
 	.intr = &rk3399_vop_intr,
 	.ctrl = &rk3399_ctrl_data,
 	/*

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 9a1e34e..81f86a6 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c

@@ -451,18 +451,6 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
 
 #ifdef CONFIG_DRM_TEGRA_STAGING
-static struct tegra_drm_context *
-tegra_drm_file_get_context(struct tegra_drm_file *file, u32 id)
-{
-	struct tegra_drm_context *context;
-
-	mutex_lock(&file->lock);
-	context = idr_find(&file->contexts, id);
-	mutex_unlock(&file->lock);
-
-	return context;
-}
-
 static int tegra_gem_create(struct drm_device *drm, void *data,
 			    struct drm_file *file)
 {
@@ -551,7 +539,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
 	if (err < 0)
 		return err;
 
-	err = idr_alloc(&fpriv->contexts, context, 0, 0, GFP_KERNEL);
+	err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
 	if (err < 0) {
 		client->ops->close_channel(context);
 		return err;
@@ -606,7 +594,7 @@ static int tegra_close_channel(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -EINVAL;
 		goto unlock;
@@ -631,7 +619,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -660,7 +648,7 @@ static int tegra_submit(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -685,7 +673,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = tegra_drm_file_get_context(fpriv, args->context);
+	context = idr_find(&fpriv->contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
index 13db8a2..1f013d4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c

@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
 	list_for_each_entry_safe(entry, next, &man->list, head)
 		vmw_cmdbuf_res_free(man, entry);
 
+	drm_ht_remove(&man->resources);
 	kfree(man);
 }
 

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 130d51c..4b948fba 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h

@@ -41,9 +41,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20170221"
+#define VMWGFX_DRIVER_DATE "20170607"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 12
+#define VMWGFX_DRIVER_MINOR 13
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index b6a0806..a1c68e6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c

@@ -368,6 +368,8 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
 				return fifo_state->static_buffer;
 			else {
 				fifo_state->dynamic_buffer = vmalloc(bytes);
+				if (!fifo_state->dynamic_buffer)
+					goto out_err;
 				return fifo_state->dynamic_buffer;
 			}
 		}

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ef9f3a2..1d2db5d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c

@@ -274,108 +274,6 @@ void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
 }
 
 
-
-/**
- * vmw_du_cursor_plane_update() - Update cursor image and location
- *
- * @plane: plane object to update
- * @crtc: owning CRTC of @plane
- * @fb: framebuffer to flip onto plane
- * @crtc_x: x offset of plane on crtc
- * @crtc_y: y offset of plane on crtc
- * @crtc_w: width of plane rectangle on crtc
- * @crtc_h: height of plane rectangle on crtc
- * @src_x: Not used
- * @src_y: Not used
- * @src_w: Not used
- * @src_h: Not used
- *
- *
- * RETURNS:
- * Zero on success, error code on failure
- */
-int vmw_du_cursor_plane_update(struct drm_plane *plane,
-			       struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb,
-			       int crtc_x, int crtc_y,
-			       unsigned int crtc_w,
-			       unsigned int crtc_h,
-			       uint32_t src_x, uint32_t src_y,
-			       uint32_t src_w, uint32_t src_h)
-{
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
-	struct vmw_surface *surface = NULL;
-	struct vmw_dma_buffer *dmabuf = NULL;
-	s32 hotspot_x, hotspot_y;
-	int ret;
-
-	hotspot_x = du->hotspot_x + fb->hot_x;
-	hotspot_y = du->hotspot_y + fb->hot_y;
-
-	/* A lot of the code assumes this */
-	if (crtc_w != 64 || crtc_h != 64) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (vmw_framebuffer_to_vfb(fb)->dmabuf)
-		dmabuf = vmw_framebuffer_to_vfbd(fb)->buffer;
-	else
-		surface = vmw_framebuffer_to_vfbs(fb)->surface;
-
-	if (surface && !surface->snooper.image) {
-		DRM_ERROR("surface not suitable for cursor\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* setup new image */
-	ret = 0;
-	if (surface) {
-		/* vmw_user_surface_lookup takes one reference */
-		du->cursor_surface = surface;
-
-		du->cursor_age = du->cursor_surface->snooper.age;
-
-		ret = vmw_cursor_update_image(dev_priv, surface->snooper.image,
-					      64, 64, hotspot_x, hotspot_y);
-	} else if (dmabuf) {
-		/* vmw_user_surface_lookup takes one reference */
-		du->cursor_dmabuf = dmabuf;
-
-		ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, crtc_w, crtc_h,
-					       hotspot_x, hotspot_y);
-	} else {
-		vmw_cursor_update_position(dev_priv, false, 0, 0);
-		goto out;
-	}
-
-	if (!ret) {
-		du->cursor_x = crtc_x + du->set_gui_x;
-		du->cursor_y = crtc_y + du->set_gui_y;
-
-		vmw_cursor_update_position(dev_priv, true,
-					   du->cursor_x + hotspot_x,
-					   du->cursor_y + hotspot_y);
-	}
-
-out:
-	return ret;
-}
-
-
-int vmw_du_cursor_plane_disable(struct drm_plane *plane)
-{
-	if (plane->fb) {
-		drm_framebuffer_unreference(plane->fb);
-		plane->fb = NULL;
-	}
-
-	return -EINVAL;
-}
-
-
 void vmw_du_cursor_plane_destroy(struct drm_plane *plane)
 {
 	vmw_cursor_update_position(plane->dev->dev_private, false, 0, 0);
@@ -473,18 +371,6 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
 
 
 void
-vmw_du_cursor_plane_atomic_disable(struct drm_plane *plane,
-				   struct drm_plane_state *old_state)
-{
-	struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc;
-	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
-
-	drm_atomic_set_fb_for_plane(plane->state, NULL);
-	vmw_cursor_update_position(dev_priv, false, 0, 0);
-}
-
-
-void
 vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 				  struct drm_plane_state *old_state)
 {
@@ -1498,6 +1384,7 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
 	 */
 	if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)  &&
 	    dmabuf && only_2d &&
+	    mode_cmd->width > 64 &&  /* Don't create a proxy for cursor */
 	    dev_priv->active_display_unit == vmw_du_screen_target) {
 		ret = vmw_create_dmabuf_proxy(dev_priv->dev, mode_cmd,
 					      dmabuf, &surface);

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 13f2f1d2..5f8d678 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h

@@ -256,10 +256,6 @@ int vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
 			   uint32_t size,
 			   struct drm_modeset_acquire_ctx *ctx);
-int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
-			    uint32_t handle, uint32_t width, uint32_t height,
-			    int32_t hot_x, int32_t hot_y);
-int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
 int vmw_du_connector_set_property(struct drm_connector *connector,
 				  struct drm_property *property,
 				  uint64_t val);
@@ -339,15 +335,6 @@ void vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
 /* Universal Plane Helpers */
 void vmw_du_primary_plane_destroy(struct drm_plane *plane);
 void vmw_du_cursor_plane_destroy(struct drm_plane *plane);
-int vmw_du_cursor_plane_disable(struct drm_plane *plane);
-int vmw_du_cursor_plane_update(struct drm_plane *plane,
-			       struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb,
-			       int crtc_x, int crtc_y,
-			       unsigned int crtc_w,
-			       unsigned int crtc_h,
-			       uint32_t src_x, uint32_t src_y,
-			       uint32_t src_w, uint32_t src_h);
 
 /* Atomic Helpers */
 int vmw_du_primary_plane_atomic_check(struct drm_plane *plane,
@@ -356,8 +343,6 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
 				     struct drm_plane_state *state);
 void vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 				       struct drm_plane_state *old_state);
-void vmw_du_cursor_plane_atomic_disable(struct drm_plane *plane,
-					struct drm_plane_state *old_state);
 int vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
 				   struct drm_plane_state *new_state);
 void vmw_du_plane_cleanup_fb(struct drm_plane *plane,

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index bad31bd..50be1f0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c

@@ -56,6 +56,8 @@ enum stdu_content_type {
  * @right: Right side of bounding box.
  * @top: Top side of bounding box.
  * @bottom: Bottom side of bounding box.
+ * @fb_left: Left side of the framebuffer/content bounding box
+ * @fb_top: Top of the framebuffer/content bounding box
  * @buf: DMA buffer when DMA-ing between buffer and screen targets.
  * @sid: Surface ID when copying between surface and screen targets.
  */
@@ -63,6 +65,7 @@ struct vmw_stdu_dirty {
 	struct vmw_kms_dirty base;
 	SVGA3dTransferType  transfer;
 	s32 left, right, top, bottom;
+	s32 fb_left, fb_top;
 	u32 pitch;
 	union {
 		struct vmw_dma_buffer *buf;
@@ -647,7 +650,7 @@ static void vmw_stdu_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
  *
  * @dirty: The closure structure.
  *
- * This function calculates the bounding box for all the incoming clips
+ * This function calculates the bounding box for all the incoming clips.
  */
 static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
 {
@@ -656,11 +659,19 @@ static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
 
 	dirty->num_hits = 1;
 
-	/* Calculate bounding box */
+	/* Calculate destination bounding box */
 	ddirty->left = min_t(s32, ddirty->left, dirty->unit_x1);
 	ddirty->top = min_t(s32, ddirty->top, dirty->unit_y1);
 	ddirty->right = max_t(s32, ddirty->right, dirty->unit_x2);
 	ddirty->bottom = max_t(s32, ddirty->bottom, dirty->unit_y2);
+
+	/*
+	 * Calculate content bounding box.  We only need the top-left
+	 * coordinate because width and height will be the same as the
+	 * destination bounding box above
+	 */
+	ddirty->fb_left = min_t(s32, ddirty->fb_left, dirty->fb_x);
+	ddirty->fb_top  = min_t(s32, ddirty->fb_top, dirty->fb_y);
 }
 
 
@@ -697,11 +708,11 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
 	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 	src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
-	src += dirty->unit_y1 * src_pitch + dirty->unit_x1 * stdu->cpp;
+	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
 	dst_pitch = ddirty->pitch;
 	dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
-	dst += dirty->fb_y * dst_pitch + dirty->fb_x * stdu->cpp;
+	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
 	/* Figure out the real direction */
@@ -760,7 +771,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	}
 
 out_cleanup:
-	ddirty->left = ddirty->top = S32_MAX;
+	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
 }
 
@@ -812,6 +823,7 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 		SVGA3D_READ_HOST_VRAM;
 	ddirty.left = ddirty.top = S32_MAX;
 	ddirty.right = ddirty.bottom = S32_MIN;
+	ddirty.fb_left = ddirty.fb_top = S32_MAX;
 	ddirty.pitch = vfb->base.pitches[0];
 	ddirty.buf = buf;
 	ddirty.base.fifo_commit = vmw_stdu_dmabuf_fifo_commit;
@@ -1355,6 +1367,11 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 		DRM_ERROR("Failed to bind surface to STDU.\n");
 	else
 		crtc->primary->fb = plane->state->fb;
+
+	ret = vmw_stdu_update_st(dev_priv, stdu);
+
+	if (ret)
+		DRM_ERROR("Failed to update STDU.\n");
 }
 
 

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 7681341..6b70bd2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c

@@ -1274,11 +1274,14 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	int ret;
 	uint32_t size;
-	uint32_t backup_handle;
+	uint32_t backup_handle = 0;
 
 	if (req->multisample_count != 0)
 		return -EINVAL;
 
+	if (req->mip_levels > DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
 	if (unlikely(vmw_user_surface_size == 0))
 		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
 			128;
@@ -1314,12 +1317,16 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 		ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle,
 					     &res->backup,
 					     &user_srf->backup_base);
-		if (ret == 0 && res->backup->base.num_pages * PAGE_SIZE <
-		    res->backup_size) {
-			DRM_ERROR("Surface backup buffer is too small.\n");
-			vmw_dmabuf_unreference(&res->backup);
-			ret = -EINVAL;
-			goto out_unlock;
+		if (ret == 0) {
+			if (res->backup->base.num_pages * PAGE_SIZE <
+			    res->backup_size) {
+				DRM_ERROR("Surface backup buffer is too small.\n");
+				vmw_dmabuf_unreference(&res->backup);
+				ret = -EINVAL;
+				goto out_unlock;
+			} else {
+				backup_handle = req->buffer_handle;
+			}
 		}
 	} else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer)
 		ret = vmw_user_dmabuf_alloc(dev_priv, tfile,
@@ -1491,7 +1498,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 				 dev_priv->stdu_max_height);
 
 		if (size.width > max_width || size.height > max_height) {
-			DRM_ERROR("%ux%u\n, exeeds max surface size %ux%u",
+			DRM_ERROR("%ux%u\n, exceeds max surface size %ux%u",
 				  size.width, size.height,
 				  max_width, max_height);
 			return -EINVAL;

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index f05ebb1..ac65f52 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c

@@ -172,7 +172,7 @@ static int host1x_probe(struct platform_device *pdev)
 
 	host->rst = devm_reset_control_get(&pdev->dev, "host1x");
 	if (IS_ERR(host->rst)) {
-		err = PTR_ERR(host->clk);
+		err = PTR_ERR(host->rst);
 		dev_err(&pdev->dev, "failed to get reset: %d\n", err);
 		return err;
 	}

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 16d5568..2fb5f43 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c

@@ -725,15 +725,16 @@ void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi)
 	spin_lock_irqsave(&ipu->lock, flags);
 
 	val = ipu_cm_read(ipu, IPU_CONF);
-	if (vdi) {
+	if (vdi)
 		val |= IPU_CONF_IC_INPUT;
-	} else {
+	else
 		val &= ~IPU_CONF_IC_INPUT;
-		if (csi_id == 1)
-			val |= IPU_CONF_CSI_SEL;
-		else
-			val &= ~IPU_CONF_CSI_SEL;
-	}
+
+	if (csi_id == 1)
+		val |= IPU_CONF_CSI_SEL;
+	else
+		val &= ~IPU_CONF_CSI_SEL;
+
 	ipu_cm_write(ipu, val, IPU_CONF);
 
 	spin_unlock_irqrestore(&ipu->lock, flags);

diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index c555633..c35f74c 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c

@@ -131,8 +131,6 @@ int ipu_pre_get(struct ipu_pre *pre)
 	if (pre->in_use)
 		return -EBUSY;
 
-	clk_prepare_enable(pre->clk_axi);
-
 	/* first get the engine out of reset and remove clock gating */
 	writel(0, pre->regs + IPU_PRE_CTRL);
 
@@ -149,12 +147,7 @@ int ipu_pre_get(struct ipu_pre *pre)
 
 void ipu_pre_put(struct ipu_pre *pre)
 {
-	u32 val;
-
-	val = IPU_PRE_CTRL_SFTRST | IPU_PRE_CTRL_CLKGATE;
-	writel(val, pre->regs + IPU_PRE_CTRL);
-
-	clk_disable_unprepare(pre->clk_axi);
+	writel(IPU_PRE_CTRL_SFTRST, pre->regs + IPU_PRE_CTRL);
 
 	pre->in_use = false;
 }
@@ -249,6 +242,8 @@ static int ipu_pre_probe(struct platform_device *pdev)
 	if (!pre->buffer_virt)
 		return -ENOMEM;
 
+	clk_prepare_enable(pre->clk_axi);
+
 	pre->dev = dev;
 	platform_set_drvdata(pdev, pre);
 	mutex_lock(&ipu_pre_list_mutex);
@@ -268,6 +263,8 @@ static int ipu_pre_remove(struct platform_device *pdev)
 	available_pres--;
 	mutex_unlock(&ipu_pre_list_mutex);
 
+	clk_disable_unprepare(pre->clk_axi);
+
 	if (pre->buffer_virt)
 		gen_pool_free(pre->iram, (unsigned long)pre->buffer_virt,
 			      IPU_PRE_MAX_WIDTH * IPU_PRE_NUM_SCANLINES * 4);

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 92f1452..76875f62 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c

@@ -417,7 +417,7 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
 {
 	struct vga_device *vgadev, *conflict;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int rc = 0;
 
 	vga_check_first_use();

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index fe40e5e..687705c 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig

@@ -275,10 +275,12 @@
 	 - Trio Linker Plus II
 
 config HID_ELECOM
-	tristate "ELECOM BM084 bluetooth mouse"
+	tristate "ELECOM HID devices"
 	depends on HID
 	---help---
-	Support for the ELECOM BM084 (bluetooth mouse).
+	Support for ELECOM devices:
+	  - BM084 Bluetooth Mouse
+	  - DEFT Trackball (Wired and wireless)
 
 config HID_ELO
 	tristate "ELO USB 4000/4500 touchscreen"

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 16df6cc9..a6268f2 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c

@@ -69,6 +69,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_IS_MULTITOUCH		BIT(3)
 #define QUIRK_NO_CONSUMER_USAGES	BIT(4)
 #define QUIRK_USE_KBD_BACKLIGHT		BIT(5)
+#define QUIRK_T100_KEYBOARD		BIT(6)
 
 #define I2C_KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
 						 QUIRK_NO_INIT_REPORTS | \
@@ -536,6 +537,8 @@ static void asus_remove(struct hid_device *hdev)
 		drvdata->kbd_backlight->removed = true;
 		cancel_work_sync(&drvdata->kbd_backlight->work);
 	}
+
+	hid_hw_stop(hdev);
 }
 
 static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
@@ -548,6 +551,12 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		hid_info(hdev, "Fixing up Asus notebook report descriptor\n");
 		rdesc[55] = 0xdd;
 	}
+	if (drvdata->quirks & QUIRK_T100_KEYBOARD &&
+		 *rsize == 76 && rdesc[73] == 0x81 && rdesc[74] == 0x01) {
+		hid_info(hdev, "Fixing up Asus T100 keyb report descriptor\n");
+		rdesc[74] &= ~HID_MAIN_ITEM_CONSTANT;
+	}
+
 	return rdesc;
 }
 
@@ -560,6 +569,9 @@ static const struct hid_device_id asus_devices[] = {
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2), QUIRK_USE_KBD_BACKLIGHT },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
+		USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD),
+	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, asus_devices);

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 37084b6..6e04069 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c

@@ -826,11 +826,35 @@ static int hid_scan_report(struct hid_device *hid)
 				 * hid-rmi should take care of them,
 				 * not hid-generic
 				 */
-				if (IS_ENABLED(CONFIG_HID_RMI))
-					hid->group = HID_GROUP_RMI;
+				hid->group = HID_GROUP_RMI;
 		break;
 	}
 
+	/* fall back to generic driver in case specific driver doesn't exist */
+	switch (hid->group) {
+	case HID_GROUP_MULTITOUCH_WIN_8:
+		/* fall-through */
+	case HID_GROUP_MULTITOUCH:
+		if (!IS_ENABLED(CONFIG_HID_MULTITOUCH))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_SENSOR_HUB:
+		if (!IS_ENABLED(CONFIG_HID_SENSOR_HUB))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_RMI:
+		if (!IS_ENABLED(CONFIG_HID_RMI))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_WACOM:
+		if (!IS_ENABLED(CONFIG_HID_WACOM))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	case HID_GROUP_LOGITECH_DJ_DEVICE:
+		if (!IS_ENABLED(CONFIG_HID_LOGITECH_DJ))
+			hid->group = HID_GROUP_GENERIC;
+		break;
+	}
 	vfree(parser);
 	return 0;
 }
@@ -1763,15 +1787,23 @@ EXPORT_SYMBOL_GPL(hid_disconnect);
  * used as a driver. See hid_scan_report().
  */
 static const struct hid_device_id hid_have_special_driver[] = {
+#if IS_ENABLED(CONFIG_HID_A4TECH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACCUTOUCH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACRUX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ALPS)
 	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
@@ -1792,11 +1824,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
@@ -1851,59 +1878,100 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLEIR)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ASUS)
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_AUREAL)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BELKIN)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BETOP_FF)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHERRY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHICONY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CMEDIA)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CORSAIR)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CP2112)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CYPRESS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+#endif
+#if IS_ENABLED(CONFIG_HID_DRAGONRISE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011) },
-#if IS_ENABLED(CONFIG_HID_MAYFLASH)
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
 #endif
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+#if IS_ENABLED(CONFIG_HID_ELECOM)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ELO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EMS_FF)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EZKEY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GEMBIRD)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GFRM)
+        { HID_BLUETOOTH_DEVICE(0x58, 0x2000) },
+        { HID_BLUETOOTH_DEVICE(0x471, 0x2210) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GREENASIA)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GT683R)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GYRATION)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_3) },
+#endif
+#if IS_ENABLED(CONFIG_HID_HOLTEK)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
@@ -1912,12 +1980,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ICADE)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KENSINGTON)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KEYTOUCH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KEYTOUCH, USB_DEVICE_ID_KEYTOUCH_IEC) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KYE)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) },
@@ -1927,21 +2000,29 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LCPOWER)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LED)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
 #if IS_ENABLED(CONFIG_HID_LENOVO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
 #endif
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#if IS_ENABLED(CONFIG_HID_LOGITECH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
@@ -1954,7 +2035,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) },
@@ -1966,17 +2046,30 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL) },
-#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
-#endif
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_HIDPP)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAYFLASH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MICROSOFT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
@@ -1992,9 +2085,22 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MONTEREY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MULTITOUCH)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WIIMOTE)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTI)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTRIG)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2) },
@@ -2014,13 +2120,41 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ORTEK)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_PKB1700) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PANTHERLORD)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PENMOUNT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PETALYNX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PICOLCD)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PLANTRONICS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRIMAX)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRODIKEYS)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_RMI)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
+#endif
 #if IS_ENABLED(CONFIG_HID_ROCCAT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) },
@@ -2048,9 +2182,21 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) },
 #endif
+#if IS_ENABLED(CONFIG_HID_SAMSUNG)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SMARTJOYPLUS)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SONY)
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) },
@@ -2069,9 +2215,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SPEEDLINK)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_STEELSERIES)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SUNPLUS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_THRUSTMASTER)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323) },
@@ -2080,12 +2234,25 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TIVO)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TOPSEED)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TWINHAN)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UCLOGIC)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
@@ -2093,20 +2260,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_YIYNOVA_TABLET) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_81) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UDRAW_PS3)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WALTOP)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_Q_PAD) },
@@ -2114,19 +2278,18 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_XINMO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZEROPLUS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZYDACRON)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
-
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+#endif
 	{ }
 };
 

diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c
index 6e3848a..e2c7465 100644
--- a/drivers/hid/hid-elecom.c
+++ b/drivers/hid/hid-elecom.c

@@ -1,10 +1,8 @@
 /*
- *  HID driver for Elecom BM084 (bluetooth mouse).
- *  Removes a non-existing horizontal wheel from
- *  the HID descriptor.
- *  (This module is based on "hid-ortek".)
- *
+ *  HID driver for ELECOM devices.
  *  Copyright (c) 2010 Richard Nauber <Richard.Nauber@gmail.com>
+ *  Copyright (c) 2016 Yuxuan Shui <yshuiv7@gmail.com>
+ *  Copyright (c) 2017 Diego Elio Pettenò <flameeyes@flameeyes.eu>
  */
 
 /*
@@ -23,15 +21,61 @@
 static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 		unsigned int *rsize)
 {
-	if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
-		hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n");
-		rdesc[47] = 0x00;
+	switch (hdev->product) {
+	case USB_DEVICE_ID_ELECOM_BM084:
+		/* The BM084 Bluetooth mouse includes a non-existing horizontal
+		 * wheel in the HID descriptor. */
+		if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) {
+			hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n");
+			rdesc[47] = 0x00;
+		}
+		break;
+	case USB_DEVICE_ID_ELECOM_DEFT_WIRED:
+	case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS:
+		/* The DEFT trackball has eight buttons, but its descriptor only
+		 * reports five, disabling the three Fn buttons on the top of
+		 * the mouse.
+		 *
+		 * Apply the following diff to the descriptor:
+		 *
+		 * Collection (Physical),              Collection (Physical),
+		 *     Report ID (1),                      Report ID (1),
+		 *     Report Count (5),           ->      Report Count (8),
+		 *     Report Size (1),                    Report Size (1),
+		 *     Usage Page (Button),                Usage Page (Button),
+		 *     Usage Minimum (01h),                Usage Minimum (01h),
+		 *     Usage Maximum (05h),        ->      Usage Maximum (08h),
+		 *     Logical Minimum (0),                Logical Minimum (0),
+		 *     Logical Maximum (1),                Logical Maximum (1),
+		 *     Input (Variable),                   Input (Variable),
+		 *     Report Count (1),           ->      Report Count (0),
+		 *     Report Size (3),                    Report Size (3),
+		 *     Input (Constant),                   Input (Constant),
+		 *     Report Size (16),                   Report Size (16),
+		 *     Report Count (2),                   Report Count (2),
+		 *     Usage Page (Desktop),               Usage Page (Desktop),
+		 *     Usage (X),                          Usage (X),
+		 *     Usage (Y),                          Usage (Y),
+		 *     Logical Minimum (-32768),           Logical Minimum (-32768),
+		 *     Logical Maximum (32767),            Logical Maximum (32767),
+		 *     Input (Variable, Relative),         Input (Variable, Relative),
+		 * End Collection,                     End Collection,
+		 */
+		if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) {
+			hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n");
+			rdesc[13] = 8; /* Button/Variable Report Count */
+			rdesc[21] = 8; /* Button/Variable Usage Maximum */
+			rdesc[29] = 0; /* Button/Constant Report Count */
+		}
+		break;
 	}
 	return rdesc;
 }
 
 static const struct hid_device_id elecom_devices[] = {
-	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084)},
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, elecom_devices);

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 643390b..4f9a393 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h

@@ -173,6 +173,7 @@
 #define USB_VENDOR_ID_ASUSTEK		0x0b05
 #define USB_DEVICE_ID_ASUSTEK_LCM	0x1726
 #define USB_DEVICE_ID_ASUSTEK_LCM2	0x175b
+#define USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD	0x17e0
 #define USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD	0x8585
 #define USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD	0x0101
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1 0x1854
@@ -318,6 +319,9 @@
 #define USB_VENDOR_ID_DELCOM		0x0fc5
 #define USB_DEVICE_ID_DELCOM_VISUAL_IND	0xb080
 
+#define USB_VENDOR_ID_DELL				0x413c
+#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE	0x301a
+
 #define USB_VENDOR_ID_DELORME		0x1163
 #define USB_DEVICE_ID_DELORME_EARTHMATE	0x0100
 #define USB_DEVICE_ID_DELORME_EM_LT20	0x0200
@@ -358,6 +362,8 @@
 
 #define USB_VENDOR_ID_ELECOM		0x056e
 #define USB_DEVICE_ID_ELECOM_BM084	0x0061
+#define USB_DEVICE_ID_ELECOM_DEFT_WIRED	0x00fe
+#define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS	0x00ff
 
 #define USB_VENDOR_ID_DREAM_CHEEKY	0x1d34
 #define USB_DEVICE_ID_DREAM_CHEEKY_WN	0x0004

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 8daa8ce..0401503 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c

@@ -872,10 +872,9 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
 static int i2c_hid_acpi_pdata(struct i2c_client *client,
 		struct i2c_hid_platform_data *pdata)
 {
-	static u8 i2c_hid_guid[] = {
-		0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45,
-		0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE,
-	};
+	static guid_t i2c_hid_guid =
+		GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+			  0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
 	union acpi_object *obj;
 	struct acpi_device *adev;
 	acpi_handle handle;
@@ -884,7 +883,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
 	if (!handle || acpi_bus_get_device(handle, &adev))
 		return -ENODEV;
 
-	obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL,
+	obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
 				      ACPI_TYPE_INTEGER);
 	if (!obj) {
 		dev_err(&client->dev, "device _DSM execution failed\n");
@@ -897,6 +896,15 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
 	return 0;
 }
 
+static void i2c_hid_acpi_fix_up_power(struct device *dev)
+{
+	acpi_handle handle = ACPI_HANDLE(dev);
+	struct acpi_device *adev;
+
+	if (handle && acpi_bus_get_device(handle, &adev) == 0)
+		acpi_device_fix_up_power(adev);
+}
+
 static const struct acpi_device_id i2c_hid_acpi_match[] = {
 	{"ACPI0C50", 0 },
 	{"PNP0C50", 0 },
@@ -909,6 +917,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client,
 {
 	return -ENODEV;
 }
+
+static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {}
 #endif
 
 #ifdef CONFIG_OF
@@ -1030,6 +1040,8 @@ static int i2c_hid_probe(struct i2c_client *client,
 	if (ret < 0)
 		goto err_regulator;
 
+	i2c_hid_acpi_fix_up_power(&client->dev);
+
 	pm_runtime_get_noresume(&client->dev);
 	pm_runtime_set_active(&client->dev);
 	pm_runtime_enable(&client->dev);

diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 6316498..a88e7c7 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c

@@ -85,6 +85,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT },

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4b225fb..e274c9d 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c

@@ -1571,37 +1571,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len)
 {
 	unsigned char *data = wacom->data;
 
-	if (wacom->pen_input)
+	if (wacom->pen_input) {
 		dev_dbg(wacom->pen_input->dev.parent,
 			"%s: received report #%d\n", __func__, data[0]);
-	else if (wacom->touch_input)
+
+		if (len == WACOM_PKGLEN_PENABLED ||
+		    data[0] == WACOM_REPORT_PENABLED)
+			return wacom_tpc_pen(wacom);
+	}
+	else if (wacom->touch_input) {
 		dev_dbg(wacom->touch_input->dev.parent,
 			"%s: received report #%d\n", __func__, data[0]);
 
-	switch (len) {
-	case WACOM_PKGLEN_TPC1FG:
-		return wacom_tpc_single_touch(wacom, len);
-
-	case WACOM_PKGLEN_TPC2FG:
-		return wacom_tpc_mt_touch(wacom);
-
-	case WACOM_PKGLEN_PENABLED:
-		return wacom_tpc_pen(wacom);
-
-	default:
-		switch (data[0]) {
-		case WACOM_REPORT_TPC1FG:
-		case WACOM_REPORT_TPCHID:
-		case WACOM_REPORT_TPCST:
-		case WACOM_REPORT_TPC1FGE:
+		switch (len) {
+		case WACOM_PKGLEN_TPC1FG:
 			return wacom_tpc_single_touch(wacom, len);
 
-		case WACOM_REPORT_TPCMT:
-		case WACOM_REPORT_TPCMT2:
-			return wacom_mt_touch(wacom);
+		case WACOM_PKGLEN_TPC2FG:
+			return wacom_tpc_mt_touch(wacom);
 
-		case WACOM_REPORT_PENABLED:
-			return wacom_tpc_pen(wacom);
+		default:
+			switch (data[0]) {
+			case WACOM_REPORT_TPC1FG:
+			case WACOM_REPORT_TPCHID:
+			case WACOM_REPORT_TPCST:
+			case WACOM_REPORT_TPC1FGE:
+				return wacom_tpc_single_touch(wacom, len);
+
+			case WACOM_REPORT_TPCMT:
+			case WACOM_REPORT_TPCMT2:
+				return wacom_mt_touch(wacom);
+
+			}
 		}
 	}
 

diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 26b0510..93d28c0 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c

@@ -1066,7 +1066,7 @@ static void ssip_pn_setup(struct net_device *dev)
 	dev->addr_len		= 1;
 	dev->tx_queue_len	= SSIP_TXQUEUE_LEN;
 
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &phonet_header_ops;
 }
 

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 22d5eaf..5ef2814 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig

@@ -343,6 +343,7 @@
 
 config SENSORS_ASPEED
 	tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver"
+	select REGMAP
 	help
 	  This driver provides support for ASPEED AST2400/AST2500 PWM
 	  and Fan Tacho controllers.

diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c
index 48403a2..9de13d6 100644
--- a/drivers/hwmon/aspeed-pwm-tacho.c
+++ b/drivers/hwmon/aspeed-pwm-tacho.c

@@ -7,6 +7,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/errno.h>
 #include <linux/gpio/consumer.h>
 #include <linux/delay.h>
 #include <linux/hwmon.h>
@@ -494,7 +495,7 @@ static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data
 	return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit);
 }
 
-static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
+static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
 				      u8 fan_tach_ch)
 {
 	u32 raw_data, tach_div, clk_source, sec, val;
@@ -510,6 +511,9 @@ static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv,
 	msleep(sec);
 
 	regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val);
+	if (!(val & RESULT_STATUS_MASK))
+		return -ETIMEDOUT;
+
 	raw_data = val & RESULT_VALUE_MASK;
 	tach_div = priv->type_fan_tach_clock_division[type];
 	tach_div = 0x4 << (tach_div * 2);
@@ -561,12 +565,14 @@ static ssize_t show_rpm(struct device *dev, struct device_attribute *attr,
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	int index = sensor_attr->index;
-	u32 rpm;
+	int rpm;
 	struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev);
 
 	rpm = aspeed_get_fan_tach_ch_rpm(priv, index);
+	if (rpm < 0)
+		return rpm;
 
-	return sprintf(buf, "%u\n", rpm);
+	return sprintf(buf, "%d\n", rpm);
 }
 
 static umode_t pwm_is_visible(struct kobject *kobj,
@@ -591,24 +597,23 @@ static umode_t fan_dev_is_visible(struct kobject *kobj,
 	return a->mode;
 }
 
-static SENSOR_DEVICE_ATTR(pwm0, 0644,
-			show_pwm, set_pwm, 0);
 static SENSOR_DEVICE_ATTR(pwm1, 0644,
-			show_pwm, set_pwm, 1);
+			show_pwm, set_pwm, 0);
 static SENSOR_DEVICE_ATTR(pwm2, 0644,
-			show_pwm, set_pwm, 2);
+			show_pwm, set_pwm, 1);
 static SENSOR_DEVICE_ATTR(pwm3, 0644,
-			show_pwm, set_pwm, 3);
+			show_pwm, set_pwm, 2);
 static SENSOR_DEVICE_ATTR(pwm4, 0644,
-			show_pwm, set_pwm, 4);
+			show_pwm, set_pwm, 3);
 static SENSOR_DEVICE_ATTR(pwm5, 0644,
-			show_pwm, set_pwm, 5);
+			show_pwm, set_pwm, 4);
 static SENSOR_DEVICE_ATTR(pwm6, 0644,
-			show_pwm, set_pwm, 6);
+			show_pwm, set_pwm, 5);
 static SENSOR_DEVICE_ATTR(pwm7, 0644,
+			show_pwm, set_pwm, 6);
+static SENSOR_DEVICE_ATTR(pwm8, 0644,
 			show_pwm, set_pwm, 7);
 static struct attribute *pwm_dev_attrs[] = {
-	&sensor_dev_attr_pwm0.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm2.dev_attr.attr,
 	&sensor_dev_attr_pwm3.dev_attr.attr,
@@ -616,6 +621,7 @@ static struct attribute *pwm_dev_attrs[] = {
 	&sensor_dev_attr_pwm5.dev_attr.attr,
 	&sensor_dev_attr_pwm6.dev_attr.attr,
 	&sensor_dev_attr_pwm7.dev_attr.attr,
+	&sensor_dev_attr_pwm8.dev_attr.attr,
 	NULL,
 };
 
@@ -624,40 +630,39 @@ static const struct attribute_group pwm_dev_group = {
 	.is_visible = pwm_is_visible,
 };
 
-static SENSOR_DEVICE_ATTR(fan0_input, 0444,
-		show_rpm, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan1_input, 0444,
-		show_rpm, NULL, 1);
+		show_rpm, NULL, 0);
 static SENSOR_DEVICE_ATTR(fan2_input, 0444,
-		show_rpm, NULL, 2);
+		show_rpm, NULL, 1);
 static SENSOR_DEVICE_ATTR(fan3_input, 0444,
-		show_rpm, NULL, 3);
+		show_rpm, NULL, 2);
 static SENSOR_DEVICE_ATTR(fan4_input, 0444,
-		show_rpm, NULL, 4);
+		show_rpm, NULL, 3);
 static SENSOR_DEVICE_ATTR(fan5_input, 0444,
-		show_rpm, NULL, 5);
+		show_rpm, NULL, 4);
 static SENSOR_DEVICE_ATTR(fan6_input, 0444,
-		show_rpm, NULL, 6);
+		show_rpm, NULL, 5);
 static SENSOR_DEVICE_ATTR(fan7_input, 0444,
-		show_rpm, NULL, 7);
+		show_rpm, NULL, 6);
 static SENSOR_DEVICE_ATTR(fan8_input, 0444,
-		show_rpm, NULL, 8);
+		show_rpm, NULL, 7);
 static SENSOR_DEVICE_ATTR(fan9_input, 0444,
-		show_rpm, NULL, 9);
+		show_rpm, NULL, 8);
 static SENSOR_DEVICE_ATTR(fan10_input, 0444,
-		show_rpm, NULL, 10);
+		show_rpm, NULL, 9);
 static SENSOR_DEVICE_ATTR(fan11_input, 0444,
-		show_rpm, NULL, 11);
+		show_rpm, NULL, 10);
 static SENSOR_DEVICE_ATTR(fan12_input, 0444,
-		show_rpm, NULL, 12);
+		show_rpm, NULL, 11);
 static SENSOR_DEVICE_ATTR(fan13_input, 0444,
-		show_rpm, NULL, 13);
+		show_rpm, NULL, 12);
 static SENSOR_DEVICE_ATTR(fan14_input, 0444,
-		show_rpm, NULL, 14);
+		show_rpm, NULL, 13);
 static SENSOR_DEVICE_ATTR(fan15_input, 0444,
+		show_rpm, NULL, 14);
+static SENSOR_DEVICE_ATTR(fan16_input, 0444,
 		show_rpm, NULL, 15);
 static struct attribute *fan_dev_attrs[] = {
-	&sensor_dev_attr_fan0_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan2_input.dev_attr.attr,
 	&sensor_dev_attr_fan3_input.dev_attr.attr,
@@ -673,6 +678,7 @@ static struct attribute *fan_dev_attrs[] = {
 	&sensor_dev_attr_fan13_input.dev_attr.attr,
 	&sensor_dev_attr_fan14_input.dev_attr.attr,
 	&sensor_dev_attr_fan15_input.dev_attr.attr,
+	&sensor_dev_attr_fan16_input.dev_attr.attr,
 	NULL
 };
 
@@ -802,7 +808,6 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 	}
-	of_node_put(np);
 
 	priv->groups[0] = &pwm_dev_group;
 	priv->groups[1] = &fan_dev_group;

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6283b99..d1263b8 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c

@@ -94,9 +94,9 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
 static int dw_i2c_acpi_configure(struct platform_device *pdev)
 {
 	struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+	u32 ss_ht = 0, fp_ht = 0, hs_ht = 0, fs_ht = 0;
 	acpi_handle handle = ACPI_HANDLE(&pdev->dev);
 	const struct acpi_device_id *id;
-	u32 ss_ht, fp_ht, hs_ht, fs_ht;
 	struct acpi_device *adev;
 	const char *uid;
 

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 95ed171..54a47b4 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c

@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
 		 * the first read operation, otherwise the first read cost
 		 * one extra clock cycle.
 		 */
-		temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+		temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 		temp |= I2CR_MTX;
-		writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+		imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 	}
 	msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
 
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 				 * the first read operation, otherwise the first read cost
 				 * one extra clock cycle.
 				 */
-				temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+				temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 				temp |= I2CR_MTX;
-				writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+				imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
 			}
 		} else if (i == (msgs->len - 2)) {
 			dev_dbg(&i2c_imx->adapter.dev,

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index f573448..e98e44e 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c

@@ -584,7 +584,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
 	/* unmap the data buffer */
 	if (dma_size != 0)
-		dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+		dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
 
 	if (unlikely(!time_left)) {
 		dev_err(dev, "completion wait timed out\n");

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 214bf28..8be3e6c 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c

@@ -319,7 +319,7 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
 	rcar_i2c_write(priv, ICFBSCR, TCYC06);
 
 	dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
-			 priv->msg->len, priv->dma_direction);
+			 sg_dma_len(&priv->sg), priv->dma_direction);
 
 	priv->dma_direction = DMA_NONE;
 }

diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c
index 0ed77ee..a2e3dd7 100644
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c

@@ -178,22 +178,39 @@ static int usb_read(struct i2c_adapter *adapter, int cmd,
 		    int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmalloc(len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_rcvctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE |
-			       USB_DIR_IN, value, index, data, len, 2000);
+			       USB_DIR_IN, value, index, dmadata, len, 2000);
+
+	memcpy(data, dmadata, len);
+	kfree(dmadata);
+	return ret;
 }
 
 static int usb_write(struct i2c_adapter *adapter, int cmd,
 		     int value, int index, void *data, int len)
 {
 	struct i2c_tiny_usb *dev = (struct i2c_tiny_usb *)adapter->algo_data;
+	void *dmadata = kmemdup(data, len, GFP_KERNEL);
+	int ret;
+
+	if (!dmadata)
+		return -ENOMEM;
 
 	/* do control transfer */
-	return usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
+	ret = usb_control_msg(dev->usb_dev, usb_sndctrlpipe(dev->usb_dev, 0),
 			       cmd, USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
-			       value, index, data, len, 2000);
+			       value, index, dmadata, len, 2000);
+
+	kfree(dmadata);
+	return ret;
 }
 
 static void i2c_tiny_usb_free(struct i2c_tiny_usb *dev)

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937..14d1e7d 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c

@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	int error;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->special = (char *)pc;
 
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	scsi_req_init(sense_rq);
+	scsi_req_init(req);
 
 	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
 			      GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_requeue_and_plug(drive, failed_rq);
 	if (ide_queue_sense_rq(drive, pc)) {
 		blk_start_request(failed_rq);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
 	}
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
-		int uptodate, error;
+		int uptodate;
+		blk_status_t error;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			error = 0;
+			error = BLK_STS_OK;
 		} else {
 
 			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					scsi_req(rq)->result = -EIO;
 			}
 
-			error = uptodate ? 0 : -EIO;
+			error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
 		}
 
 		ide_complete_rq(drive, error, blk_rq_bytes(rq));

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3..81e18f96 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c

@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
 		cdrom_analyze_sense_data(drive, failed);
 
-		if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+		if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
 			BUG();
 	} else
 		cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
 		rq = blk_get_request(drive->queue,
 			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
-		scsi_req_init(rq);
 		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
 		ide_req(rq)->type = ATA_PRIV_PC;
 		rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		nr_bytes -= cmd->last_xfer_len;
 
 	if (nr_bytes > 0) {
-		ide_complete_rq(drive, 0, nr_bytes);
+		ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 		return true;
 	}
 
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 out_end:
 	if (blk_rq_is_scsi(rq) && rc == 0) {
 		scsi_req(rq)->resid_len = 0;
-		blk_end_request_all(rq, 0);
+		blk_end_request_all(rq, BLK_STS_OK);
 		hwif->rq = NULL;
 	} else {
 		if (sense && uptodate)
@@ -699,7 +698,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 				scsi_req(rq)->resid_len += cmd->last_xfer_len;
 		}
 
-		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
 
 		if (sense && rc == 2)
 			ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 	if (nsectors == 0)
 		nsectors = 1;
 
-	ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+	ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
 
 	return ide_stopped;
 }

diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736..9d26c97 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c

@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	int ret;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->rq_flags = RQF_QUIET;
 	blk_execute_rq(drive->queue, cd->disk, rq, 0);

diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32..ef7c8c4 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c

@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 		return setting->set(drive, arg);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 5;
 	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237..241983d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c

@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	drive->mult_req = arg;

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c8122..54d4d78 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c

@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 			if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 				ide_finish_cmd(drive, cmd, stat);
 			else
-				ide_complete_rq(drive, 0,
+				ide_complete_rq(drive, BLK_STS_OK,
 						blk_rq_sectors(cmd->rq) << 9);
 			return ide_stopped;
 		}

diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7..47d5f337 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c

@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 			return ide_stopped;
 		}
 		scsi_req(rq)->result = err;
-		ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 }
 EXPORT_SYMBOL_GPL(ide_error);
 
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
 {
 	struct request *rq = drive->hwif->rq;
 
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && scsi_req(rq)->result == 0)
 			scsi_req(rq)->result = -EIO;
-		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+		ide_complete_rq(drive, err, blk_rq_bytes(rq));
 	}
 }
 
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
 	}
 	/* done polling */
 	hwif->polling = 0;
-	ide_complete_drive_reset(drive, 0);
+	ide_complete_drive_reset(drive, BLK_STS_OK);
 	return ide_stopped;
 }
 
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
-	int err = 0;
+	blk_status_t err = BLK_STS_OK;
 
 	if (port_ops && port_ops->reset_poll) {
 		err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
 			hwif->name, tmp);
 		drive->failures++;
-		err = -EIO;
+		err = BLK_STS_IOERR;
 	} else  {
 		tmp = ide_read_error(drive);
 
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
 		} else {
 			ide_reset_report_error(hwif, tmp);
 			drive->failures++;
-			err = -EIO;
+			err = BLK_STS_IOERR;
 		}
 	}
 out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
-		ide_complete_drive_reset(drive, -ENXIO);
+		ide_complete_drive_reset(drive, BLK_STS_IOERR);
 		return ide_stopped;
 	}
 

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048..627b1f6 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c

@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, done);
+		ide_complete_rq(drive, BLK_STS_IOERR, done);
 		return ide_stopped;
 	}
 
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
 		if (ata_misc_request(rq)) {
 			scsi_req(rq)->result = 0;
-			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+			ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
 			goto out_end;
@@ -303,7 +303,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 	drive->failed_pc = NULL;
 	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
 		scsi_req(rq)->result = -EIO;
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 	return ide_stopped;
 }
 

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af72..3a234701 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c

@@ -54,7 +54,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
 	       unsigned int nr_bytes)
 {
 	/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 	}
 }
 
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * if failfast is set on a request, override number of sectors
 	 * and complete the whole request right now
 	 */
-	if (blk_noretry_request(rq) && error <= 0)
+	if (blk_noretry_request(rq) && error)
 		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 			scsi_req(rq)->result = -EIO;
 	}
 
-	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  	printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
 	scsi_req(rq)->result = 0;
-	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 
  	return ide_stopped;
 }

diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d172..3661abb 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c

@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 		struct request *rq;
 
 		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-		scsi_req_init(rq);
 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
 		blk_execute_rq(drive->queue, NULL, rq, 0);
 		err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
 	int ret = 0;
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 1;
 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;

diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107..1f264d5 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c

@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	 * timeout has expired, so power management will be reenabled.
 	 */
 	rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
-	scsi_req_init(rq);
 	if (IS_ERR(rq))
 		goto out;
 

diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1..544f02d 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c

@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	return ret;
 }
 
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
 {
 	complete(rq->end_io_data);
 }
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
 	if (unlikely(blk_queue_dying(q))) {
 		rq->rq_flags |= RQF_QUIET;
 		scsi_req(rq)->result = -ENXIO;
-		__blk_end_request_all(rq, 0);
+		__blk_end_request_all(rq, BLK_STS_OK);
 		spin_unlock_irq(q->queue_lock);
 		return -ENXIO;
 	}
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
 	rq->rq_flags |= RQF_PREEMPT;
 	rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 	drive->hwif->rq = NULL;
 
-	if (blk_end_request(rq, 0, 0))
+	if (blk_end_request(rq, BLK_STS_OK, 0))
 		BUG();
 }
 

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 0235625..01b2adf 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c

@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
 {
 	struct ide_request *req = blk_mq_rq_to_pdu(rq);
 
+	scsi_req_init(&req->sreq);
 	req->sreq.sense = req->sense;
-	return 0;
 }
 
 /*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
 		return 1;
 
 	q->request_fn = do_ide_request;
-	q->init_rq_fn = ide_init_rq;
+	q->initialize_rq_fn = ide_initialize_rq;
 	q->cmd_size = sizeof(struct ide_request);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return 1;

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f9..fd57e8c 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c

@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
 		drive->failed_pc = NULL;
 		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 	ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;

diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d..4efe4c6 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c

@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 		}
 
 		if (nr_bytes > 0)
-			ide_complete_rq(drive, 0, nr_bytes);
+			ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
 	}
 }
 
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 		ide_driveid_update(drive);
 	}
 
-	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+	ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 }
 
 /*
@@ -394,7 +394,7 @@ static ide_startstop_t task_pio_intr(ide_drive_t *drive)
 	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
 		ide_finish_cmd(drive, cmd, stat);
 	else
-		ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+		ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
 	return ide_stopped;
 out_err:
 	ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	rq = blk_get_request(drive->queue,
 		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
 			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
-	scsi_req_init(rq);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	/*

diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849b..57eea5a 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c

@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
  *	yet.
  */
 
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 		if ((sata_stat & 0x03) != 0x03) {
 			printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
 					    hwif->name, sata_stat);
-			return -ENXIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
-	return 0;
+	return BLK_STS_OK;
 }
 
 /**

diff --git a/drivers/iio/adc/bcm_iproc_adc.c b/drivers/iio/adc/bcm_iproc_adc.c
index 21d38c8..7f4f9c4 100644
--- a/drivers/iio/adc/bcm_iproc_adc.c
+++ b/drivers/iio/adc/bcm_iproc_adc.c

@@ -143,7 +143,7 @@ static void iproc_adc_reg_dump(struct iio_dev *indio_dev)
 	iproc_adc_dbg_reg(dev, adc_priv, IPROC_SOFT_BYPASS_DATA);
 }
 
-static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
+static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
 {
 	u32 channel_intr_status;
 	u32 intr_status;
@@ -167,7 +167,7 @@ static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
 	return IRQ_NONE;
 }
 
-static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
+static irqreturn_t iproc_adc_interrupt_handler(int irq, void *data)
 {
 	irqreturn_t retval = IRQ_NONE;
 	struct iproc_adc_priv *adc_priv;
@@ -181,7 +181,7 @@ static irqreturn_t iproc_adc_interrupt_thread(int irq, void *data)
 	adc_priv = iio_priv(indio_dev);
 
 	regmap_read(adc_priv->regmap, IPROC_INTERRUPT_STATUS, &intr_status);
-	dev_dbg(&indio_dev->dev, "iproc_adc_interrupt_thread(),INTRPT_STS:%x\n",
+	dev_dbg(&indio_dev->dev, "iproc_adc_interrupt_handler(),INTRPT_STS:%x\n",
 			intr_status);
 
 	intr_channels = (intr_status & IPROC_ADC_INTR_MASK) >> IPROC_ADC_INTR;
@@ -566,8 +566,8 @@ static int iproc_adc_probe(struct platform_device *pdev)
 	}
 
 	ret = devm_request_threaded_irq(&pdev->dev, adc_priv->irqno,
-				iproc_adc_interrupt_thread,
 				iproc_adc_interrupt_handler,
+				iproc_adc_interrupt_thread,
 				IRQF_SHARED, "iproc-adc", indio_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "request_irq error %d\n", ret);

diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index ec82106..b0526e4 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c

@@ -438,10 +438,10 @@ static ssize_t max9611_shunt_resistor_show(struct device *dev,
 	struct max9611_dev *max9611 = iio_priv(dev_to_iio_dev(dev));
 	unsigned int i, r;
 
-	i = max9611->shunt_resistor_uohm / 1000;
-	r = max9611->shunt_resistor_uohm % 1000;
+	i = max9611->shunt_resistor_uohm / 1000000;
+	r = max9611->shunt_resistor_uohm % 1000000;
 
-	return sprintf(buf, "%u.%03u\n", i, r);
+	return sprintf(buf, "%u.%06u\n", i, r);
 }
 
 static IIO_DEVICE_ATTR(in_power_shunt_resistor, 0444,
@@ -536,8 +536,8 @@ static int max9611_probe(struct i2c_client *client,
 	int ret;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*max9611));
-	if (IS_ERR(indio_dev))
-		return PTR_ERR(indio_dev);
+	if (!indio_dev)
+		return -ENOMEM;
 
 	i2c_set_clientdata(client, indio_dev);
 

diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index dd4190b..6066bbf 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c

@@ -468,13 +468,13 @@ static void meson_sar_adc_unlock(struct iio_dev *indio_dev)
 static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev)
 {
 	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
-	int count;
+	unsigned int count, tmp;
 
 	for (count = 0; count < MESON_SAR_ADC_MAX_FIFO_SIZE; count++) {
 		if (!meson_sar_adc_get_fifo_count(indio_dev))
 			break;
 
-		regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, 0);
+		regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, &tmp);
 	}
 }
 

diff --git a/drivers/iio/adc/mxs-lradc-adc.c b/drivers/iio/adc/mxs-lradc-adc.c
index b0c7d8e..6888167 100644
--- a/drivers/iio/adc/mxs-lradc-adc.c
+++ b/drivers/iio/adc/mxs-lradc-adc.c

@@ -718,9 +718,12 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
 	adc->dev = dev;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
 	adc->base = devm_ioremap(dev, iores->start, resource_size(iores));
-	if (IS_ERR(adc->base))
-		return PTR_ERR(adc->base);
+	if (!adc->base)
+		return -ENOMEM;
 
 	init_completion(&adc->completion);
 	spin_lock_init(&adc->lock);

diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c
index b235273..81d4c39 100644
--- a/drivers/iio/adc/sun4i-gpadc-iio.c
+++ b/drivers/iio/adc/sun4i-gpadc-iio.c

@@ -105,6 +105,8 @@ struct sun4i_gpadc_iio {
 	bool				no_irq;
 	/* prevents concurrent reads of temperature and ADC */
 	struct mutex			mutex;
+	struct thermal_zone_device	*tzd;
+	struct device			*sensor_device;
 };
 
 #define SUN4I_GPADC_ADC_CHANNEL(_channel, _name) {		\
@@ -502,7 +504,6 @@ static int sun4i_gpadc_probe_dt(struct platform_device *pdev,
 {
 	struct sun4i_gpadc_iio *info = iio_priv(indio_dev);
 	const struct of_device_id *of_dev;
-	struct thermal_zone_device *tzd;
 	struct resource *mem;
 	void __iomem *base;
 	int ret;
@@ -532,13 +533,14 @@ static int sun4i_gpadc_probe_dt(struct platform_device *pdev,
 	if (!IS_ENABLED(CONFIG_THERMAL_OF))
 		return 0;
 
-	tzd = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, info,
-						   &sun4i_ts_tz_ops);
-	if (IS_ERR(tzd))
+	info->sensor_device = &pdev->dev;
+	info->tzd = thermal_zone_of_sensor_register(info->sensor_device, 0,
+						    info, &sun4i_ts_tz_ops);
+	if (IS_ERR(info->tzd))
 		dev_err(&pdev->dev, "could not register thermal sensor: %ld\n",
-			PTR_ERR(tzd));
+			PTR_ERR(info->tzd));
 
-	return PTR_ERR_OR_ZERO(tzd);
+	return PTR_ERR_OR_ZERO(info->tzd);
 }
 
 static int sun4i_gpadc_probe_mfd(struct platform_device *pdev,
@@ -584,15 +586,15 @@ static int sun4i_gpadc_probe_mfd(struct platform_device *pdev,
 		 * of_node, and the device from this driver as third argument to
 		 * return the temperature.
 		 */
-		struct thermal_zone_device *tzd;
-		tzd = devm_thermal_zone_of_sensor_register(pdev->dev.parent, 0,
-							   info,
-							   &sun4i_ts_tz_ops);
-		if (IS_ERR(tzd)) {
+		info->sensor_device = pdev->dev.parent;
+		info->tzd = thermal_zone_of_sensor_register(info->sensor_device,
+							    0, info,
+							    &sun4i_ts_tz_ops);
+		if (IS_ERR(info->tzd)) {
 			dev_err(&pdev->dev,
 				"could not register thermal sensor: %ld\n",
-				PTR_ERR(tzd));
-			return PTR_ERR(tzd);
+				PTR_ERR(info->tzd));
+			return PTR_ERR(info->tzd);
 		}
 	} else {
 		indio_dev->num_channels =
@@ -688,7 +690,13 @@ static int sun4i_gpadc_remove(struct platform_device *pdev)
 
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	if (!info->no_irq && IS_ENABLED(CONFIG_THERMAL_OF))
+
+	if (!IS_ENABLED(CONFIG_THERMAL_OF))
+		return 0;
+
+	thermal_zone_of_sensor_unregister(info->sensor_device, info->tzd);
+
+	if (!info->no_irq)
 		iio_map_array_unregister(indio_dev);
 
 	return 0;
@@ -700,6 +708,7 @@ static const struct platform_device_id sun4i_gpadc_id[] = {
 	{ "sun6i-a31-gpadc-iio", (kernel_ulong_t)&sun6i_gpadc_data },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(platform, sun4i_gpadc_id);
 
 static struct platform_driver sun4i_gpadc_driver = {
 	.driver = {
@@ -711,6 +720,7 @@ static struct platform_driver sun4i_gpadc_driver = {
 	.probe = sun4i_gpadc_probe,
 	.remove = sun4i_gpadc_remove,
 };
+MODULE_DEVICE_TABLE(of, sun4i_gpadc_of_id);
 
 module_platform_driver(sun4i_gpadc_driver);
 

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 4282cec..6cbed7e 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c

@@ -614,7 +614,7 @@ static int tiadc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*indio_dev));
+	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*adc_dev));
 	if (indio_dev == NULL) {
 		dev_err(&pdev->dev, "failed to allocate iio device\n");
 		return -ENOMEM;

diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c
index dd99d27..ff03324 100644
--- a/drivers/iio/buffer/industrialio-buffer-dma.c
+++ b/drivers/iio/buffer/industrialio-buffer-dma.c

@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/poll.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/dma-mapping.h>
 #include <linux/sizes.h>

diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index 9fabed4..2b5a320 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c

@@ -14,6 +14,7 @@
 
 #include <linux/iio/iio.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/iio/buffer-dmaengine.h>
 

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index 96dabbd..88a7c5d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c

@@ -41,6 +41,7 @@ static const int accel_scale[] = {598, 1196, 2392, 4785};
 static const struct inv_mpu6050_reg_map reg_set_6500 = {
 	.sample_rate_div	= INV_MPU6050_REG_SAMPLE_RATE_DIV,
 	.lpf                    = INV_MPU6050_REG_CONFIG,
+	.accel_lpf              = INV_MPU6500_REG_ACCEL_CONFIG_2,
 	.user_ctrl              = INV_MPU6050_REG_USER_CTRL,
 	.fifo_en                = INV_MPU6050_REG_FIFO_EN,
 	.gyro_config            = INV_MPU6050_REG_GYRO_CONFIG,
@@ -211,6 +212,37 @@ int inv_mpu6050_set_power_itg(struct inv_mpu6050_state *st, bool power_on)
 EXPORT_SYMBOL_GPL(inv_mpu6050_set_power_itg);
 
 /**
+ *  inv_mpu6050_set_lpf_regs() - set low pass filter registers, chip dependent
+ *
+ *  MPU60xx/MPU9150 use only 1 register for accelerometer + gyroscope
+ *  MPU6500 and above have a dedicated register for accelerometer
+ */
+static int inv_mpu6050_set_lpf_regs(struct inv_mpu6050_state *st,
+				    enum inv_mpu6050_filter_e val)
+{
+	int result;
+
+	result = regmap_write(st->map, st->reg->lpf, val);
+	if (result)
+		return result;
+
+	switch (st->chip_type) {
+	case INV_MPU6050:
+	case INV_MPU6000:
+	case INV_MPU9150:
+		/* old chips, nothing to do */
+		result = 0;
+		break;
+	default:
+		/* set accel lpf */
+		result = regmap_write(st->map, st->reg->accel_lpf, val);
+		break;
+	}
+
+	return result;
+}
+
+/**
  *  inv_mpu6050_init_config() - Initialize hardware, disable FIFO.
  *
  *  Initial configuration:
@@ -233,8 +265,7 @@ static int inv_mpu6050_init_config(struct iio_dev *indio_dev)
 	if (result)
 		return result;
 
-	d = INV_MPU6050_FILTER_20HZ;
-	result = regmap_write(st->map, st->reg->lpf, d);
+	result = inv_mpu6050_set_lpf_regs(st, INV_MPU6050_FILTER_20HZ);
 	if (result)
 		return result;
 
@@ -537,6 +568,8 @@ static int inv_mpu6050_write_raw(struct iio_dev *indio_dev,
  *                  would be alising. This function basically search for the
  *                  correct low pass parameters based on the fifo rate, e.g,
  *                  sampling frequency.
+ *
+ *  lpf is set automatically when setting sampling rate to avoid any aliases.
  */
 static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
 {
@@ -552,7 +585,7 @@ static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
 	while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1))
 		i++;
 	data = d[i];
-	result = regmap_write(st->map, st->reg->lpf, data);
+	result = inv_mpu6050_set_lpf_regs(st, data);
 	if (result)
 		return result;
 	st->chip_config.lpf = data;

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index ef13de7..953a0c0 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h

@@ -28,6 +28,7 @@
  *  struct inv_mpu6050_reg_map - Notable registers.
  *  @sample_rate_div:	Divider applied to gyro output rate.
  *  @lpf:		Configures internal low pass filter.
+ *  @accel_lpf:		Configures accelerometer low pass filter.
  *  @user_ctrl:		Enables/resets the FIFO.
  *  @fifo_en:		Determines which data will appear in FIFO.
  *  @gyro_config:	gyro config register.
@@ -47,6 +48,7 @@
 struct inv_mpu6050_reg_map {
 	u8 sample_rate_div;
 	u8 lpf;
+	u8 accel_lpf;
 	u8 user_ctrl;
 	u8 fifo_en;
 	u8 gyro_config;
@@ -188,6 +190,7 @@ struct inv_mpu6050_state {
 #define INV_MPU6050_FIFO_THRESHOLD           500
 
 /* mpu6500 registers */
+#define INV_MPU6500_REG_ACCEL_CONFIG_2      0x1D
 #define INV_MPU6500_REG_ACCEL_OFFSET        0x77
 
 /* delay time in milliseconds */

diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 978e1592..4061fed 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c

@@ -451,7 +451,8 @@ static ssize_t iio_trigger_write_current(struct device *dev,
 	return len;
 
 out_trigger_put:
-	iio_trigger_put(trig);
+	if (trig)
+		iio_trigger_put(trig);
 	return ret;
 }
 

diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index b30e0c1..67838ed 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c

@@ -74,9 +74,9 @@ static const int int_time_mapping[] = {100000, 50000, 200000, 400000};
 static const struct reg_field reg_field_it =
 				REG_FIELD(LTR501_ALS_MEAS_RATE, 3, 4);
 static const struct reg_field reg_field_als_intr =
-				REG_FIELD(LTR501_INTR, 0, 0);
-static const struct reg_field reg_field_ps_intr =
 				REG_FIELD(LTR501_INTR, 1, 1);
+static const struct reg_field reg_field_ps_intr =
+				REG_FIELD(LTR501_INTR, 0, 0);
 static const struct reg_field reg_field_als_rate =
 				REG_FIELD(LTR501_ALS_MEAS_RATE, 0, 2);
 static const struct reg_field reg_field_ps_rate =

diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index ddf9bee..aa4df0d 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c

@@ -40,9 +40,9 @@
 #define AS3935_AFE_PWR_BIT	BIT(0)
 
 #define AS3935_INT		0x03
-#define AS3935_INT_MASK		0x07
+#define AS3935_INT_MASK		0x0f
 #define AS3935_EVENT_INT	BIT(3)
-#define AS3935_NOISE_INT	BIT(1)
+#define AS3935_NOISE_INT	BIT(0)
 
 #define AS3935_DATA		0x07
 #define AS3935_DATA_MASK	0x3F
@@ -215,7 +215,7 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
 
 	st->buffer[0] = val & AS3935_DATA_MASK;
 	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
-					   pf->timestamp);
+					   iio_get_time_ns(indio_dev));
 err_read:
 	iio_trigger_notify_done(indio_dev->trig);
 
@@ -244,7 +244,7 @@ static void as3935_event_work(struct work_struct *work)
 
 	switch (val) {
 	case AS3935_EVENT_INT:
-		iio_trigger_poll(st->trig);
+		iio_trigger_poll_chained(st->trig);
 		break;
 	case AS3935_NOISE_INT:
 		dev_warn(&st->spi->dev, "noise level is too high\n");
@@ -269,8 +269,6 @@ static irqreturn_t as3935_interrupt_handler(int irq, void *private)
 
 static void calibrate_as3935(struct as3935_state *st)
 {
-	mutex_lock(&st->lock);
-
 	/* mask disturber interrupt bit */
 	as3935_write(st, AS3935_INT, BIT(5));
 
@@ -280,8 +278,6 @@ static void calibrate_as3935(struct as3935_state *st)
 
 	mdelay(2);
 	as3935_write(st, AS3935_TUNE_CAP, (st->tune_cap / TUNE_CAP_DIV));
-
-	mutex_unlock(&st->lock);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -318,6 +314,8 @@ static int as3935_resume(struct device *dev)
 	val &= ~AS3935_AFE_PWR_BIT;
 	ret = as3935_write(st, AS3935_AFE_GAIN, val);
 
+	calibrate_as3935(st);
+
 err_resume:
 	mutex_unlock(&st->lock);
 

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 02971e2..ece6926 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c

@@ -449,12 +449,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 		return ret;
 
 	rt = (struct rt6_info *)dst;
-	if (ipv6_addr_any(&fl6.saddr)) {
-		ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
-					 &fl6.daddr, 0, &fl6.saddr);
-		if (ret)
-			goto put;
-
+	if (ipv6_addr_any(&src_in->sin6_addr)) {
 		src_in->sin6_family = AF_INET6;
 		src_in->sin6_addr = fl6.saddr;
 	}
@@ -471,9 +466,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 
 	*pdst = dst;
 	return 0;
-put:
-	dst_release(dst);
-	return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1844770..2b4d613 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c

@@ -1429,7 +1429,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
 	primary_path->packet_life_time =
 		cm_req_get_primary_local_ack_timeout(req_msg);
 	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
-	sa_path_set_service_id(primary_path, req_msg->service_id);
+	primary_path->service_id = req_msg->service_id;
 
 	if (req_msg->alt_local_lid) {
 		alt_path->dgid = req_msg->alt_local_gid;
@@ -1452,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
 		alt_path->packet_life_time =
 			cm_req_get_alt_local_ack_timeout(req_msg);
 		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
-		sa_path_set_service_id(alt_path, req_msg->service_id);
+		alt_path->service_id = req_msg->service_id;
 	}
 }
 

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 91b7a2f..31bb82d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c

@@ -1140,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
 			ib->sib_pkey = path->pkey;
 			ib->sib_flowinfo = path->flow_label;
 			memcpy(&ib->sib_addr, &path->sgid, 16);
-			ib->sib_sid = sa_path_get_service_id(path);
+			ib->sib_sid = path->service_id;
 			ib->sib_scope_id = 0;
 		} else {
 			ib->sib_pkey = listen_ib->sib_pkey;
@@ -1274,8 +1274,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
 		memcpy(&req->local_gid, &req_param->primary_path->sgid,
 		       sizeof(req->local_gid));
 		req->has_gid	= true;
-		req->service_id	=
-			sa_path_get_service_id(req_param->primary_path);
+		req->service_id = req_param->primary_path->service_id;
 		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
 		if (req->pkey != req_param->bth_pkey)
 			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1827,7 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	struct rdma_route *rt;
 	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
 	struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
-	const __be64 service_id = sa_path_get_service_id(path);
+	const __be64 service_id =
+		ib_event->param.req_rcvd.primary_path->service_id;
 	int ret;
 
 	id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -2345,9 +2345,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
-	sa_path_set_service_id(&path_rec,
-			       rdma_get_service_id(&id_priv->id,
-						   cma_dst_addr(id_priv)));
+	path_rec.service_id = rdma_get_service_id(&id_priv->id,
+						  cma_dst_addr(id_priv));
 
 	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cb7d372..d92ab4e 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h

@@ -169,6 +169,16 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns 0 on success or a negative for no listeners.
+ */
+int ibnl_chk_listeners(unsigned int group);
+
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
 			      struct netlink_callback *cb);
 int ib_nl_handle_set_timeout(struct sk_buff *skb,

diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index b784055..94931c4 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c

@@ -37,6 +37,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <rdma/rdma_netlink.h>
+#include "core_priv.h"
 
 struct ibnl_client {
 	struct list_head		list;
@@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group)
 		return -1;
 	return 0;
 }
-EXPORT_SYMBOL(ibnl_chk_listeners);
 
 int ibnl_add_client(int index, int nops,
 		    const struct ibnl_client_cbs cb_table[])

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e335b09c..fb7aec4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c

@@ -194,7 +194,7 @@ static u32 tid;
 	.field_name          = "sa_path_rec:" #field
 
 static const struct ib_field path_rec_table[] = {
-	{ PATH_REC_FIELD(ib.service_id),
+	{ PATH_REC_FIELD(service_id),
 	  .offset_words = 0,
 	  .offset_bits  = 0,
 	  .size_bits    = 64 },
@@ -296,7 +296,7 @@ static const struct ib_field path_rec_table[] = {
 	.field_name          = "sa_path_rec:" #field
 
 static const struct ib_field opa_path_rec_table[] = {
-	{ OPA_PATH_REC_FIELD(opa.service_id),
+	{ OPA_PATH_REC_FIELD(service_id),
 	  .offset_words = 0,
 	  .offset_bits  = 0,
 	  .size_bits    = 64 },
@@ -774,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
 
 	/* Now build the attributes */
 	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
-		val64 = be64_to_cpu(sa_path_get_service_id(sa_rec));
+		val64 = be64_to_cpu(sa_rec->service_id);
 		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
 			sizeof(val64), &val64);
 	}

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 3dbf811..21e60b1 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c

@@ -58,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
 
 		page = sg_page(sg);
-		if (umem->writable && dirty)
+		if (!PageDirty(page) && umem->writable && dirty)
 			set_page_dirty_lock(page);
 		put_page(page);
 	}

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 0780b1a..8c4ec56 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c

@@ -321,11 +321,15 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
 		struct vm_area_struct *vma;
 		struct hstate *h;
 
+		down_read(&mm->mmap_sem);
 		vma = find_vma(mm, ib_umem_start(umem));
-		if (!vma || !is_vm_hugetlb_page(vma))
+		if (!vma || !is_vm_hugetlb_page(vma)) {
+			up_read(&mm->mmap_sem);
 			return -EINVAL;
+		}
 		h = hstate_vma(vma);
 		umem->page_shift = huge_page_shift(h);
+		up_read(&mm->mmap_sem);
 		umem->hugetlb = 1;
 	} else {
 		umem->hugetlb = 0;

diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 8b9587f..94fd989 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c

@@ -96,11 +96,11 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
 }
 EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
 
-void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
-				struct sa_path_rec *src)
+static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+				       struct sa_path_rec *src)
 {
-	memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
-	memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+	memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid));
+	memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid));
 
 	dst->dlid		= htons(ntohl(sa_path_get_dlid(src)));
 	dst->slid		= htons(ntohl(sa_path_get_slid(src)));

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ebf7be8..0877283 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h

@@ -56,6 +56,10 @@
 #define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_CQ_COUNT		(64 * 1024)
 
+#define BNXT_RE_UD_QP_HW_STALL		0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD	32
+
 struct bnxt_re_work {
 	struct work_struct	work;
 	unsigned long		event;

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7ba9e69..c7bd683 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c

@@ -61,6 +61,48 @@
 #include "ib_verbs.h"
 #include <rdma/bnxt_re-abi.h>
 
+static int __from_ib_access_flags(int iflags)
+{
+	int qflags = 0;
+
+	if (iflags & IB_ACCESS_LOCAL_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_READ)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+	if (iflags & IB_ACCESS_REMOTE_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+	if (iflags & IB_ACCESS_MW_BIND)
+		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+	if (iflags & IB_ZERO_BASED)
+		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+	if (iflags & IB_ACCESS_ON_DEMAND)
+		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+	return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+	enum ib_access_flags iflags = 0;
+
+	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+		iflags |= IB_ACCESS_LOCAL_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+		iflags |= IB_ACCESS_REMOTE_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+		iflags |= IB_ACCESS_REMOTE_READ;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+		iflags |= IB_ACCESS_REMOTE_ATOMIC;
+	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+		iflags |= IB_ACCESS_MW_BIND;
+	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+		iflags |= IB_ZERO_BASED;
+	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+		iflags |= IB_ACCESS_ON_DEMAND;
+	return iflags;
+};
+
 static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
 			     struct bnxt_qplib_sge *sg_list, int num)
 {
@@ -149,8 +191,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 	ib_attr->max_total_mcast_qp_attach = 0;
 	ib_attr->max_ah = dev_attr->max_ah;
 
-	ib_attr->max_fmr = dev_attr->max_fmr;
-	ib_attr->max_map_per_fmr = 1;	/* ? */
+	ib_attr->max_fmr = 0;
+	ib_attr->max_map_per_fmr = 0;
 
 	ib_attr->max_srq = dev_attr->max_srq;
 	ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
@@ -410,6 +452,158 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
 	return IB_LINK_LAYER_ETHERNET;
 }
 
+#define	BNXT_RE_FENCE_PBL_SIZE	DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct ib_mr *ib_mr = &fence->mr->ib_mr;
+	struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+	memset(wqe, 0, sizeof(*wqe));
+	wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+	wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	wqe->bind.zero_based = false;
+	wqe->bind.parent_l_key = ib_mr->lkey;
+	wqe->bind.va = (u64)(unsigned long)fence->va;
+	wqe->bind.length = fence->size;
+	wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+	wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+	/* Save the initial rkey in fence structure for now;
+	 * wqe->bind.r_key will be set at (re)bind time.
+	 */
+	fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+	struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+					     qplib_qp);
+	struct ib_pd *ib_pd = qp->ib_qp.pd;
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+	struct bnxt_qplib_swqe wqe;
+	int rc;
+
+	memcpy(&wqe, fence_wqe, sizeof(wqe));
+	wqe.bind.r_key = fence->bind_rkey;
+	fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+	dev_dbg(rdev_to_dev(qp->rdev),
+		"Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+		wqe.bind.r_key, qp->qplib_qp.id, pd);
+	rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+	if (rc) {
+		dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+		return rc;
+	}
+	bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+	return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = fence->mr;
+
+	if (fence->mw) {
+		bnxt_re_dealloc_mw(fence->mw);
+		fence->mw = NULL;
+	}
+	if (mr) {
+		if (mr->ib_mr.rkey)
+			bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+					     true);
+		if (mr->ib_mr.lkey)
+			bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+		kfree(mr);
+		fence->mr = NULL;
+	}
+	if (fence->dma_addr) {
+		dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+				 DMA_BIDIRECTIONAL);
+		fence->dma_addr = 0;
+	}
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+	int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+	struct bnxt_re_fence_data *fence = &pd->fence;
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct device *dev = &rdev->en_dev->pdev->dev;
+	struct bnxt_re_mr *mr = NULL;
+	dma_addr_t dma_addr = 0;
+	struct ib_mw *mw;
+	u64 pbl_tbl;
+	int rc;
+
+	dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+				  DMA_BIDIRECTIONAL);
+	rc = dma_mapping_error(dev, dma_addr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+		rc = -EIO;
+		fence->dma_addr = 0;
+		goto fail;
+	}
+	fence->dma_addr = dma_addr;
+
+	/* Allocate a MR */
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	fence->mr = mr;
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+		goto fail;
+	}
+
+	/* Register MR */
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+	mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+	pbl_tbl = dma_addr;
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+			       BNXT_RE_FENCE_PBL_SIZE, false);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+		goto fail;
+	}
+	mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+	/* Create a fence MW only for kernel consumers */
+	mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+	if (!mw) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to create fence-MW for PD: %p\n", pd);
+		rc = -EINVAL;
+		goto fail;
+	}
+	fence->mw = mw;
+
+	bnxt_re_create_fence_wqe(pd);
+	return 0;
+
+fail:
+	bnxt_re_destroy_fence_mr(pd);
+	return rc;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
@@ -417,6 +611,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 	struct bnxt_re_dev *rdev = pd->rdev;
 	int rc;
 
+	bnxt_re_destroy_fence_mr(pd);
 	if (ib_pd->uobject && pd->dpi.dbr) {
 		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
 		struct bnxt_re_ucontext *ucntx;
@@ -498,6 +693,10 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 		}
 	}
 
+	if (!udata)
+		if (bnxt_re_create_fence_mr(pd))
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to create Fence-MR\n");
 	return &pd->ib_pd;
 dbfail:
 	(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -849,12 +1048,16 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
 	/* Shadow QP SQ depth should be same as QP1 RQ depth */
 	qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.sq.max_sge = 2;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.sq.q_full_delta = 1;
 
 	qp->qplib_qp.scq = qp1_qp->scq;
 	qp->qplib_qp.rcq = qp1_qp->rcq;
 
 	qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
 	qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+	/* Q full delta can be 1 since it is internal QP */
+	qp->qplib_qp.rq.q_full_delta = 1;
 
 	qp->qplib_qp.mtu = qp1_qp->mtu;
 
@@ -917,10 +1120,6 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
 				  IB_SIGNAL_ALL_WR) ? true : false);
 
-	entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
-	qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-					dev_attr->max_qp_wqes + 1);
-
 	qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
 	if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
 		qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
@@ -959,6 +1158,9 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
 
+		qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						qp_init_attr->cap.max_recv_wr;
+
 		qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -967,6 +1169,12 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 
 	if (qp_init_attr->qp_type == IB_QPT_GSI) {
+		/* Allocate 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_init_attr->cap.max_send_wr;
 		qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
 		if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
 			qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -1006,6 +1214,22 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 		}
 
 	} else {
+		/* Allocate 128 + 1 more than what's provided */
+		entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+					     BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+						dev_attr->max_qp_wqes +
+						BNXT_QPLIB_RESERVED_QP_WRS + 1);
+		qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+		/*
+		 * Reserving one slot for Phantom WQE. Application can
+		 * post one extra entry in this case. But allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+
+		qp->qplib_qp.sq.q_full_delta -= 1;
+
 		qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
 		qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
 		if (udata) {
@@ -1025,6 +1249,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 
 	qp->ib_qp.qp_num = qp->qplib_qp.id;
 	spin_lock_init(&qp->sq_lock);
+	spin_lock_init(&qp->rq_lock);
 
 	if (udata) {
 		struct bnxt_re_qp_resp resp;
@@ -1129,48 +1354,6 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
 	}
 }
 
-static int __from_ib_access_flags(int iflags)
-{
-	int qflags = 0;
-
-	if (iflags & IB_ACCESS_LOCAL_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_READ)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
-	if (iflags & IB_ACCESS_REMOTE_WRITE)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
-	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
-		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
-	if (iflags & IB_ACCESS_MW_BIND)
-		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
-	if (iflags & IB_ZERO_BASED)
-		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
-	if (iflags & IB_ACCESS_ON_DEMAND)
-		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
-	return qflags;
-};
-
-static enum ib_access_flags __to_ib_access_flags(int qflags)
-{
-	enum ib_access_flags iflags = 0;
-
-	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
-		iflags |= IB_ACCESS_LOCAL_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
-		iflags |= IB_ACCESS_REMOTE_WRITE;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
-		iflags |= IB_ACCESS_REMOTE_READ;
-	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
-		iflags |= IB_ACCESS_REMOTE_ATOMIC;
-	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
-		iflags |= IB_ACCESS_MW_BIND;
-	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
-		iflags |= IB_ZERO_BASED;
-	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
-		iflags |= IB_ACCESS_ON_DEMAND;
-	return iflags;
-};
-
 static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
 				    struct bnxt_re_qp *qp1_qp,
 				    int qp_attr_mask)
@@ -1378,11 +1561,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
 		qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
 						dev_attr->max_qp_wqes + 1);
+		qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+						qp_attr->cap.max_send_wr;
+		/*
+		 * Reserving one slot for Phantom WQE. Some application can
+		 * post one extra entry in this case. Allowing this to avoid
+		 * unexpected Queue full condition
+		 */
+		qp->qplib_qp.sq.q_full_delta -= 1;
 		qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
 		if (qp->qplib_qp.rq.max_wqe) {
 			entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
 			qp->qplib_qp.rq.max_wqe =
 				min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+			qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+						       qp_attr->cap.max_recv_wr;
 			qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
 		} else {
 			/* SRQ was used prior, just ignore the RQ caps */
@@ -1883,6 +2076,22 @@ static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
 	return payload_sz;
 }
 
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+	if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+	     qp->ib_qp.qp_type == IB_QPT_GSI ||
+	     qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+	     qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+		int qp_attr_mask;
+		struct ib_qp_attr qp_attr;
+
+		qp_attr_mask = IB_QP_STATE;
+		qp_attr.qp_state = IB_QPS_RTS;
+		bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+		qp->qplib_qp.wqe_cnt = 0;
+	}
+}
+
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
 				struct ib_send_wr *wr)
@@ -1928,6 +2137,7 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 	return rc;
 }
@@ -2024,6 +2234,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
 		wr = wr->next;
 	}
 	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	bnxt_ud_qp_hw_stall_workaround(qp);
 	spin_unlock_irqrestore(&qp->sq_lock, flags);
 
 	return rc;
@@ -2071,7 +2282,10 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0, payload_sz = 0;
+	unsigned long flags;
+	u32 count = 0;
 
+	spin_lock_irqsave(&qp->rq_lock, flags);
 	while (wr) {
 		/* House keeping */
 		memset(&wqe, 0, sizeof(wqe));
@@ -2100,9 +2314,21 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
 			*bad_wr = wr;
 			break;
 		}
+
+		/* Ring DB if the RQEs posted reaches a threshold value */
+		if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+			bnxt_qplib_post_recv_db(&qp->qplib_qp);
+			count = 0;
+		}
+
 		wr = wr->next;
 	}
-	bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	if (count)
+		bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+	spin_unlock_irqrestore(&qp->rq_lock, flags);
+
 	return rc;
 }
 
@@ -2643,12 +2869,36 @@ static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
 		wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 }
 
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+	struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+	unsigned long flags;
+	int rc = 0;
+
+	spin_lock_irqsave(&qp->sq_lock, flags);
+
+	rc = bnxt_re_bind_fence_mw(lib_qp);
+	if (!rc) {
+		lib_qp->sq.phantom_wqe_cnt++;
+		dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+			"qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+			lib_qp->id, lib_qp->sq.hwq.prod,
+			HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+			lib_qp->sq.phantom_wqe_cnt);
+	}
+
+	spin_unlock_irqrestore(&qp->sq_lock, flags);
+	return rc;
+}
+
 int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 {
 	struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
 	struct bnxt_re_qp *qp;
 	struct bnxt_qplib_cqe *cqe;
 	int i, ncqe, budget;
+	struct bnxt_qplib_q *sq;
+	struct bnxt_qplib_qp *lib_qp;
 	u32 tbl_idx;
 	struct bnxt_re_sqp_entries *sqp_entry = NULL;
 	unsigned long flags;
@@ -2661,7 +2911,21 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 	}
 	cqe = &cq->cql[0];
 	while (budget) {
-		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+		lib_qp = NULL;
+		ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+		if (lib_qp) {
+			sq = &lib_qp->sq;
+			if (sq->send_phantom) {
+				qp = container_of(lib_qp,
+						  struct bnxt_re_qp, qplib_qp);
+				if (send_phantom_wqe(qp) == -ENOMEM)
+					dev_err(rdev_to_dev(cq->rdev),
+						"Phantom failed! Scheduled to send again\n");
+				else
+					sq->send_phantom = false;
+			}
+		}
+
 		if (!ncqe)
 			break;
 
@@ -2822,6 +3086,12 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 	struct bnxt_re_dev *rdev = mr->rdev;
 	int rc;
 
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+		return rc;
+	}
+
 	if (mr->npages && mr->pages) {
 		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
 							&mr->qplib_frpl);
@@ -2829,8 +3099,6 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 		mr->npages = 0;
 		mr->pages = NULL;
 	}
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-
 	if (!IS_ERR_OR_NULL(mr->ib_umem))
 		ib_umem_release(mr->ib_umem);
 
@@ -2914,97 +3182,52 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
 	return ERR_PTR(rc);
 }
 
-/* Fast Memory Regions */
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr)
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
 {
 	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
 	struct bnxt_re_dev *rdev = pd->rdev;
-	struct bnxt_re_fmr *fmr;
+	struct bnxt_re_mw *mw;
 	int rc;
 
-	if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
-	    fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
-		dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+	if (!mw)
 		return ERR_PTR(-ENOMEM);
-	}
-	fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
-	if (!fmr)
-		return ERR_PTR(-ENOMEM);
+	mw->rdev = rdev;
+	mw->qplib_mw.pd = &pd->qplib_pd;
 
-	fmr->rdev = rdev;
-	fmr->qplib_fmr.pd = &pd->qplib_pd;
-	fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
-
-	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
+	mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+			       CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
 		goto fail;
+	}
+	mw->ib_mw.rkey = mw->qplib_mw.rkey;
 
-	fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
-	fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
-	fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+	atomic_inc(&rdev->mw_count);
+	return &mw->ib_mw;
 
-	atomic_inc(&rdev->mr_count);
-	return &fmr->ib_fmr;
 fail:
-	kfree(fmr);
+	kfree(mw);
 	return ERR_PTR(rc);
 }
 
-int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
-			 u64 iova)
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
 {
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					     ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
+	struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+	struct bnxt_re_dev *rdev = mw->rdev;
 	int rc;
 
-	fmr->qplib_fmr.va = iova;
-	fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
-
-	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
-			       list_len, true);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
-			fmr->ib_fmr.lkey);
-	return rc;
-}
-
-int bnxt_re_unmap_fmr(struct list_head *fmr_list)
-{
-	struct bnxt_re_dev *rdev;
-	struct bnxt_re_fmr *fmr;
-	struct ib_fmr *ib_fmr;
-	int rc = 0;
-
-	/* Validate each FMRs inside the fmr_list */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
-		rdev = fmr->rdev;
-
-		if (rdev) {
-			rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
-						  &fmr->qplib_fmr, true);
-			if (rc)
-				break;
-		}
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+		return rc;
 	}
-	return rc;
-}
 
-int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
-{
-	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-					       ib_fmr);
-	struct bnxt_re_dev *rdev = fmr->rdev;
-	int rc;
-
-	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-	if (rc)
-		dev_err(rdev_to_dev(rdev), "Failed to free FMR");
-
-	kfree(fmr);
-	atomic_dec(&rdev->mr_count);
+	kfree(mw);
+	atomic_dec(&rdev->mw_count);
 	return rc;
 }
 

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c3d717..6c160f6 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h

@@ -44,11 +44,23 @@ struct bnxt_re_gid_ctx {
 	u32			refcnt;
 };
 
+#define BNXT_RE_FENCE_BYTES	64
+struct bnxt_re_fence_data {
+	u32 size;
+	u8 va[BNXT_RE_FENCE_BYTES];
+	dma_addr_t dma_addr;
+	struct bnxt_re_mr *mr;
+	struct ib_mw *mw;
+	struct bnxt_qplib_swqe bind_wqe;
+	u32 bind_rkey;
+};
+
 struct bnxt_re_pd {
 	struct bnxt_re_dev	*rdev;
 	struct ib_pd		ib_pd;
 	struct bnxt_qplib_pd	qplib_pd;
 	struct bnxt_qplib_dpi	dpi;
+	struct bnxt_re_fence_data fence;
 };
 
 struct bnxt_re_ah {
@@ -62,6 +74,7 @@ struct bnxt_re_qp {
 	struct bnxt_re_dev	*rdev;
 	struct ib_qp		ib_qp;
 	spinlock_t		sq_lock;	/* protect sq */
+	spinlock_t		rq_lock;	/* protect rq */
 	struct bnxt_qplib_qp	qplib_qp;
 	struct ib_umem		*sumem;
 	struct ib_umem		*rumem;
@@ -181,12 +194,9 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
 			       u32 max_num_sg);
 int bnxt_re_dereg_mr(struct ib_mr *mr);
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-				 struct ib_fmr_attr *fmr_attr);
-int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
-			 u64 iova);
-int bnxt_re_unmap_fmr(struct list_head *fmr_list);
-int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int mr_access_flags,
 				  struct ib_udata *udata);

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 5d35540..1fce5e7 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c

@@ -507,10 +507,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->dereg_mr			= bnxt_re_dereg_mr;
 	ibdev->alloc_mr			= bnxt_re_alloc_mr;
 	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
-	ibdev->alloc_fmr		= bnxt_re_alloc_fmr;
-	ibdev->map_phys_fmr		= bnxt_re_map_phys_fmr;
-	ibdev->unmap_fmr		= bnxt_re_unmap_fmr;
-	ibdev->dealloc_fmr		= bnxt_re_dealloc_fmr;
 
 	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 43d08b5..f05500bc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c

@@ -284,7 +284,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_qp1 req;
-	struct creq_create_qp1_resp *resp;
+	struct creq_create_qp1_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct bnxt_qplib_q *sq = &qp->sq;
 	struct bnxt_qplib_q *rq = &qp->rq;
@@ -394,31 +394,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp1_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
-		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -442,7 +423,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
 	struct cmdq_create_qp req;
-	struct creq_create_qp_resp *resp;
+	struct creq_create_qp_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	struct sq_psn_search **psn_search_ptr;
 	unsigned long int psn_search, poff = 0;
@@ -627,31 +608,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	}
 	req.pd_id = cpu_to_le32(qp->pd->id);
 
-	resp = (struct creq_create_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
-		goto fail;
-	}
-	qp->id = le32_to_cpu(resp->xid);
+
+	qp->id = le32_to_cpu(resp.xid);
 	qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
 	sq->flush_in_progress = false;
 	rq->flush_in_progress = false;
@@ -769,10 +731,11 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_modify_qp req;
-	struct creq_modify_qp_resp *resp;
+	struct creq_modify_qp_resp resp;
 	u16 cmd_flags = 0, pkey;
 	u32 temp32[4];
 	u32 bmask;
+	int rc;
 
 	RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
 
@@ -862,27 +825,10 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
 
-	resp = (struct creq_modify_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	qp->cur_qp_state = qp->state;
 	return 0;
 }
@@ -891,37 +837,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_query_qp req;
-	struct creq_query_qp_resp *resp;
+	struct creq_query_qp_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_qp_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
 
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf)
+		return -ENOMEM;
+	sb = sbuf->sb;
+
 	req.qp_cid = cpu_to_le32(qp->id);
 	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     (void **)&sb, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
 	/* Extract the context from the side buffer */
 	qp->state = sb->en_sqd_async_notify_state &
 			CREQ_QUERY_QP_RESP_SB_STATE_MASK;
@@ -976,7 +911,9 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
 	memcpy(qp->smac, sb->src_mac, 6);
 	qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
-	return 0;
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
@@ -1021,34 +958,18 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_qp req;
-	struct creq_destroy_qp_resp *resp;
+	struct creq_destroy_qp_resp resp;
 	unsigned long flags;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
 
 	req.qp_cid = cpu_to_le32(qp->id);
-	resp = (struct creq_destroy_qp_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 
 	/* Must walk the associated CQs to nullified the QP ptr */
 	spin_lock_irqsave(&qp->scq->hwq.lock, flags);
@@ -1162,8 +1083,12 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
-	    HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+
+	if (bnxt_qplib_queue_full(sq)) {
+		dev_err(&sq->hwq.pdev->dev,
+			"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+			sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+			sq->q_full_delta);
 		rc = -ENOMEM;
 		goto done;
 	}
@@ -1373,6 +1298,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 	}
 
 	sq->hwq.prod++;
+
+	qp->wqe_cnt++;
+
 done:
 	return rc;
 }
@@ -1411,8 +1339,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 		rc = -EINVAL;
 		goto done;
 	}
-	if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
-	    HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+	if (bnxt_qplib_queue_full(rq)) {
 		dev_err(&rq->hwq.pdev->dev,
 			"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
 		rc = -EINVAL;
@@ -1483,7 +1410,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_cq req;
-	struct creq_create_cq_resp *resp;
+	struct creq_create_cq_resp resp;
 	struct bnxt_qplib_pbl *pbl;
 	u16 cmd_flags = 0;
 	int rc;
@@ -1525,30 +1452,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 			(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
 			 CMDQ_CREATE_CQ_CNQ_ID_SFT);
 
-	resp = (struct creq_create_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
-		rc = -ETIMEDOUT;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
 		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
-		goto fail;
-	}
-	cq->id = le32_to_cpu(resp->xid);
+
+	cq->id = le32_to_cpu(resp.xid);
 	cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
 	cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
 	init_waitqueue_head(&cq->waitq);
@@ -1566,33 +1475,17 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_cq req;
-	struct creq_destroy_cq_resp *resp;
+	struct creq_destroy_cq_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
 
 	req.cq_cid = cpu_to_le32(cq->id);
-	resp = (struct creq_destroy_cq_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
 	bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
 	return 0;
 }
@@ -1664,14 +1557,113 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
 	return rc;
 }
 
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ *       CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+		     u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_swq *swq;
+	u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+	struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+	struct cq_req *peek_req_hwcqe;
+	struct bnxt_qplib_qp *peek_qp;
+	struct bnxt_qplib_q *peek_sq;
+	int i, rc = 0;
+
+	/* Normal mode */
+	/* Check for the psn_search marking before completing */
+	swq = &sq->swq[sw_sq_cons];
+	if (swq->psn_search &&
+	    le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+		/* Unmark */
+		swq->psn_search->flags_next_psn = cpu_to_le32
+			(le32_to_cpu(swq->psn_search->flags_next_psn)
+				     & ~0x80000000);
+		dev_dbg(&cq->hwq.pdev->dev,
+			"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		sq->condition = true;
+		sq->send_phantom = true;
+
+		/* TODO: Only ARM if the previous SQE is ARMALL */
+		bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+		rc = -EAGAIN;
+		goto out;
+	}
+	if (sq->condition) {
+		/* Peek at the completions */
+		peek_raw_cq_cons = cq->hwq.cons;
+		peek_sw_cq_cons = cq_cons;
+		i = cq->hwq.max_elements;
+		while (i--) {
+			peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+			peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+			peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+						     [CQE_IDX(peek_sw_cq_cons)];
+			/* If the next hwcqe is VALID */
+			if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+					  cq->hwq.max_elements)) {
+				/* If the next hwcqe is a REQ */
+				if ((peek_hwcqe->cqe_type_toggle &
+				    CQ_BASE_CQE_TYPE_MASK) ==
+				    CQ_BASE_CQE_TYPE_REQ) {
+					peek_req_hwcqe = (struct cq_req *)
+							 peek_hwcqe;
+					peek_qp = (struct bnxt_qplib_qp *)
+						((unsigned long)
+						 le64_to_cpu
+						 (peek_req_hwcqe->qp_handle));
+					peek_sq = &peek_qp->sq;
+					peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+						peek_req_hwcqe->sq_cons_idx) - 1
+						, &sq->hwq);
+					/* If the hwcqe's sq's wr_id matches */
+					if (peek_sq == sq &&
+					    sq->swq[peek_sq_cons_idx].wr_id ==
+					    BNXT_QPLIB_FENCE_WRID) {
+						/*
+						 *  Unbreak only if the phantom
+						 *  comes back
+						 */
+						dev_dbg(&cq->hwq.pdev->dev,
+							"FP:Got Phantom CQE");
+						sq->condition = false;
+						sq->single = true;
+						rc = 0;
+						goto out;
+					}
+				}
+				/* Valid but not the phantom, so keep looping */
+			} else {
+				/* Not valid yet, just exit and wait */
+				rc = -EINVAL;
+				goto out;
+			}
+			peek_sw_cq_cons++;
+			peek_raw_cq_cons++;
+		}
+		dev_err(&cq->hwq.pdev->dev,
+			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+			cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+		rc = -EINVAL;
+	}
+out:
+	return rc;
+}
+
 static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 				     struct cq_req *hwcqe,
-				     struct bnxt_qplib_cqe **pcqe, int *budget)
+				     struct bnxt_qplib_cqe **pcqe, int *budget,
+				     u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
 {
 	struct bnxt_qplib_qp *qp;
 	struct bnxt_qplib_q *sq;
 	struct bnxt_qplib_cqe *cqe;
-	u32 sw_cons, cqe_cons;
+	u32 sw_sq_cons, cqe_sq_cons;
+	struct bnxt_qplib_swq *swq;
 	int rc = 0;
 
 	qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -1683,13 +1675,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 	}
 	sq = &qp->sq;
 
-	cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
-	if (cqe_cons > sq->hwq.max_elements) {
+	cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+	if (cqe_sq_cons > sq->hwq.max_elements) {
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: FP: CQ Process req reported ");
 		dev_err(&cq->hwq.pdev->dev,
 			"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
-			cqe_cons, sq->hwq.max_elements);
+			cqe_sq_cons, sq->hwq.max_elements);
 		return -EINVAL;
 	}
 	/* If we were in the middle of flushing the SQ, continue */
@@ -1698,53 +1690,74 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 
 	/* Require to walk the sq's swq to fabricate CQEs for all previously
 	 * signaled SWQEs due to CQE aggregation from the current sq cons
-	 * to the cqe_cons
+	 * to the cqe_sq_cons
 	 */
 	cqe = *pcqe;
 	while (*budget) {
-		sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
-		if (sw_cons == cqe_cons)
+		sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+		if (sw_sq_cons == cqe_sq_cons)
+			/* Done */
 			break;
+
+		swq = &sq->swq[sw_sq_cons];
 		memset(cqe, 0, sizeof(*cqe));
 		cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
 		cqe->qp_handle = (u64)(unsigned long)qp;
 		cqe->src_qp = qp->id;
-		cqe->wr_id = sq->swq[sw_cons].wr_id;
-		cqe->type = sq->swq[sw_cons].type;
+		cqe->wr_id = swq->wr_id;
+		if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+			goto skip;
+		cqe->type = swq->type;
 
 		/* For the last CQE, check for status.  For errors, regardless
 		 * of the request being signaled or not, it must complete with
 		 * the hwcqe error status
 		 */
-		if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+		if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
 		    hwcqe->status != CQ_REQ_STATUS_OK) {
 			cqe->status = hwcqe->status;
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Processed Req ");
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
-				sw_cons, cqe->wr_id, cqe->status);
+				sw_sq_cons, cqe->wr_id, cqe->status);
 			cqe++;
 			(*budget)--;
 			sq->flush_in_progress = true;
 			/* Must block new posting of SQ and RQ */
 			qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+			sq->condition = false;
+			sq->single = false;
 		} else {
-			if (sq->swq[sw_cons].flags &
-			    SQ_SEND_FLAGS_SIGNAL_COMP) {
+			if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+				/* Before we complete, do WA 9060 */
+				if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+					      cqe_sq_cons)) {
+					*lib_qp = qp;
+					goto out;
+				}
 				cqe->status = CQ_REQ_STATUS_OK;
 				cqe++;
 				(*budget)--;
 			}
 		}
+skip:
 		sq->hwq.cons++;
+		if (sq->single)
+			break;
 	}
+out:
 	*pcqe = cqe;
-	if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+	if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
 		/* Out of budget */
 		rc = -EAGAIN;
 		goto done;
 	}
+	/*
+	 * Back to normal completion mode only after it has completed all of
+	 * the WC for this CQE
+	 */
+	sq->single = false;
 	if (!sq->flush_in_progress)
 		goto done;
 flush:
@@ -2074,7 +2087,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
 }
 
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num_cqes)
+		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe, **hw_cqe_ptr;
 	unsigned long flags;
@@ -2099,7 +2112,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		case CQ_BASE_CQE_TYPE_REQ:
 			rc = bnxt_qplib_cq_process_req(cq,
 						       (struct cq_req *)hw_cqe,
-						       &cqe, &budget);
+						       &cqe, &budget,
+						       sw_cons, lib_qp);
 			break;
 		case CQ_BASE_CQE_TYPE_RES_RC:
 			rc = bnxt_qplib_cq_process_res_rc(cq,

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index f0150f8..36b7b7d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h

@@ -88,6 +88,7 @@ struct bnxt_qplib_swq {
 
 struct bnxt_qplib_swqe {
 	/* General */
+#define	BNXT_QPLIB_FENCE_WRID	0x46454E43	/* "FENC" */
 	u64				wr_id;
 	u8				reqs_type;
 	u8				type;
@@ -216,9 +217,16 @@ struct bnxt_qplib_q {
 	struct scatterlist		*sglist;
 	u32				nmap;
 	u32				max_wqe;
+	u16				q_full_delta;
 	u16				max_sge;
 	u32				psn;
 	bool				flush_in_progress;
+	bool				condition;
+	bool				single;
+	bool				send_phantom;
+	u32				phantom_wqe_cnt;
+	u32				phantom_cqe_cnt;
+	u32				next_cq_cons;
 };
 
 struct bnxt_qplib_qp {
@@ -242,6 +250,7 @@ struct bnxt_qplib_qp {
 	u8				timeout;
 	u8				retry_cnt;
 	u8				rnr_retry;
+	u64				wqe_cnt;
 	u32				min_rnr_timer;
 	u32				max_rd_atomic;
 	u32				max_dest_rd_atomic;
@@ -301,6 +310,13 @@ struct bnxt_qplib_qp {
 	(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==		\
 	   !((raw_cons) & (cp_bit)))
 
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+	return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+		       &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+						 &qplib_q->hwq);
+}
+
 struct bnxt_qplib_cqe {
 	u8				status;
 	u8				type;
@@ -432,7 +448,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-		       int num);
+		       int num, struct bnxt_qplib_qp **qp);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 23fb726..16e4275 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

@@ -39,72 +39,55 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/prefetch.h>
+#include <linux/delay.h>
+
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_rcfw.h"
 static void bnxt_qplib_service_creq(unsigned long data);
 
 /* Hardware communication channel */
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
 	u16 cbit;
 	int rc;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
-	if (!test_bit(cbit, rcfw->cmdq_bitmap))
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: CMD bit %d for cookie 0x%x is not set?",
-			 cbit, cookie);
-
 	rc = wait_event_timeout(rcfw->waitq,
 				!test_bit(cbit, rcfw->cmdq_bitmap),
 				msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
-	if (!rc) {
-		dev_warn(&rcfw->pdev->dev,
-			 "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
-			 RCFW_CMD_WAIT_TIME_MS, cookie);
-	}
-
-	return rc;
+	return rc ? 0 : -ETIMEDOUT;
 };
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
-	u32 count = -1;
+	u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
 	u16 cbit;
 
-	cookie &= RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (!test_bit(cbit, rcfw->cmdq_bitmap))
 		goto done;
 	do {
+		mdelay(1); /* 1m sec */
 		bnxt_qplib_service_creq((unsigned long)rcfw);
 	} while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
 done:
-	return count;
+	return count ? 0 : -ETIMEDOUT;
 };
 
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block)
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+			  struct creq_base *resp, void *sb, u8 is_block)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
-	struct bnxt_qplib_crsqe *crsqe = NULL;
-	struct bnxt_qplib_crsbe **crsb_ptr;
+	struct bnxt_qplib_crsq *crsqe;
 	u32 sw_prod, cmdq_prod;
-	u8 retry_cnt = 0xFF;
-	dma_addr_t dma_addr;
 	unsigned long flags;
 	u32 size, opcode;
 	u16 cookie, cbit;
 	int pg, idx;
 	u8 *preq;
 
-retry:
 	opcode = req->opcode;
 	if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
@@ -112,63 +95,50 @@ void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		dev_err(&rcfw->pdev->dev,
 			"QPLIB: RCFW not initialized, reject opcode 0x%x",
 			opcode);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
 	    opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
-		return NULL;
+		return -EINVAL;
 	}
 
 	/* Cmdq are in 16-byte units, each request can consume 1 or more
 	 * cmdqe
 	 */
 	spin_lock_irqsave(&cmdq->lock, flags);
-	if (req->cmd_size > cmdq->max_elements -
-	    ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
-	     (cmdq->max_elements - 1))) {
+	if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
 		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EAGAIN;
 	}
 
-	retry_cnt = 0xFF;
 
-	cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+	cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
 	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
 	if (is_block)
 		cookie |= RCFW_CMD_IS_BLOCKING;
+
+	set_bit(cbit, rcfw->cmdq_bitmap);
 	req->cookie = cpu_to_le16(cookie);
-	if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW MAX outstanding cmd reached!");
-		atomic_dec(&rcfw->seq_num);
+	crsqe = &rcfw->crsqe_tbl[cbit];
+	if (crsqe->resp) {
 		spin_unlock_irqrestore(&cmdq->lock, flags);
-
-		if (!retry_cnt--)
-			return NULL;
-		goto retry;
+		return -EBUSY;
 	}
-	/* Reserve a resp buffer slot if requested */
-	if (req->resp_size && crsbe) {
-		spin_lock(&crsb->lock);
-		sw_prod = HWQ_CMP(crsb->prod, crsb);
-		crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
-		*crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
-					  [get_crsb_idx(sw_prod)];
-		bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
-		req->resp_addr = cpu_to_le64(dma_addr);
-		crsb->prod++;
-		spin_unlock(&crsb->lock);
+	memset(resp, 0, sizeof(*resp));
+	crsqe->resp = (struct creq_qp_event *)resp;
+	crsqe->resp->cookie = req->cookie;
+	crsqe->req_size = req->cmd_size;
+	if (req->resp_size && sb) {
+		struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
 
-		req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
-				  BNXT_QPLIB_CMDQE_UNITS - 1) /
-				 BNXT_QPLIB_CMDQE_UNITS;
+		req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+		req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+				  BNXT_QPLIB_CMDQE_UNITS;
 	}
+
 	cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
 	preq = (u8 *)req;
 	size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
@@ -190,23 +160,24 @@ void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		preq += min_t(u32, size, sizeof(*cmdqe));
 		size -= min_t(u32, size, sizeof(*cmdqe));
 		cmdq->prod++;
+		rcfw->seq_num++;
 	} while (size > 0);
 
+	rcfw->seq_num++;
+
 	cmdq_prod = cmdq->prod;
 	if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
-		/* The very first doorbell write is required to set this flag
-		 * which prompts the FW to reset its internal pointers
+		/* The very first doorbell write
+		 * is required to set this flag
+		 * which prompts the FW to reset
+		 * its internal pointers
 		 */
 		cmdq_prod |= FIRMWARE_FIRST_FLAG;
 		rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
 	}
-	sw_prod = HWQ_CMP(crsq->prod, crsq);
-	crsqe = &crsq->crsq[sw_prod];
-	memset(crsqe, 0, sizeof(*crsqe));
-	crsq->prod++;
-	crsqe->req_size = req->cmd_size;
 
 	/* ring CMDQ DB */
+	wmb();
 	writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
 	       rcfw->cmdq_bar_reg_prod_off);
 	writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
@@ -214,9 +185,56 @@ void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 done:
 	spin_unlock_irqrestore(&cmdq->lock, flags);
 	/* Return the CREQ response pointer */
-	return crsqe ? &crsqe->qp_event : NULL;
+	return 0;
 }
 
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req,
+				 struct creq_base *resp,
+				 void *sb, u8 is_block)
+{
+	struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+	u16 cookie;
+	u8 opcode, retry_cnt = 0xFF;
+	int rc = 0;
+
+	do {
+		opcode = req->opcode;
+		rc = __send_message(rcfw, req, resp, sb, is_block);
+		cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+		if (!rc)
+			break;
+
+		if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+			/* send failed */
+			dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+				cookie, opcode);
+			return rc;
+		}
+		is_block ? mdelay(1) : usleep_range(500, 1000);
+
+	} while (retry_cnt--);
+
+	if (is_block)
+		rc = __block_for_resp(rcfw, cookie);
+	else
+		rc = __wait_for_resp(rcfw, cookie);
+	if (rc) {
+		/* timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+		return rc;
+	}
+
+	if (evnt->status) {
+		/* failed with status */
+		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+			cookie, opcode, evnt->status);
+		rc = -EFAULT;
+	}
+
+	return rc;
+}
 /* Completions */
 static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 					 struct creq_func_event *func_event)
@@ -260,12 +278,12 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 				       struct creq_qp_event *qp_event)
 {
-	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-	struct bnxt_qplib_crsqe *crsqe;
-	u16 cbit, cookie, blocked = 0;
+	struct bnxt_qplib_crsq *crsqe;
 	unsigned long flags;
-	u32 sw_cons;
+	u16 cbit, blocked = 0;
+	u16 cookie;
+	__le16  mcookie;
 
 	switch (qp_event->event) {
 	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
@@ -275,24 +293,31 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 	default:
 		/* Command Response */
 		spin_lock_irqsave(&cmdq->lock, flags);
-		sw_cons = HWQ_CMP(crsq->cons, crsq);
-		crsqe = &crsq->crsq[sw_cons];
-		crsq->cons++;
-		memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
-
-		cookie = le16_to_cpu(crsqe->qp_event.cookie);
+		cookie = le16_to_cpu(qp_event->cookie);
+		mcookie = qp_event->cookie;
 		blocked = cookie & RCFW_CMD_IS_BLOCKING;
 		cookie &= RCFW_MAX_COOKIE_VALUE;
 		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		crsqe = &rcfw->crsqe_tbl[cbit];
+		if (crsqe->resp &&
+		    crsqe->resp->cookie  == mcookie) {
+			memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+			crsqe->resp = NULL;
+		} else {
+			dev_err(&rcfw->pdev->dev,
+				"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+				crsqe->resp ? "mismatch" : "collision",
+				crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+		}
 		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
 			dev_warn(&rcfw->pdev->dev,
 				 "QPLIB: CMD bit %d was not requested", cbit);
-
 		cmdq->cons += crsqe->req_size;
-		spin_unlock_irqrestore(&cmdq->lock, flags);
+		crsqe->req_size = 0;
+
 		if (!blocked)
 			wake_up(&rcfw->waitq);
-		break;
+		spin_unlock_irqrestore(&cmdq->lock, flags);
 	}
 	return 0;
 }
@@ -305,12 +330,12 @@ static void bnxt_qplib_service_creq(unsigned long data)
 	struct creq_base *creqe, **creq_ptr;
 	u32 sw_cons, raw_cons;
 	unsigned long flags;
-	u32 type;
+	u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
 
-	/* Service the CREQ until empty */
+	/* Service the CREQ until budget is over */
 	spin_lock_irqsave(&creq->lock, flags);
 	raw_cons = creq->cons;
-	while (1) {
+	while (budget > 0) {
 		sw_cons = HWQ_CMP(raw_cons, creq);
 		creq_ptr = (struct creq_base **)creq->pbl_ptr;
 		creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
@@ -320,15 +345,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 		type = creqe->type & CREQ_BASE_TYPE_MASK;
 		switch (type) {
 		case CREQ_BASE_TYPE_QP_EVENT:
-			if (!bnxt_qplib_process_qp_event
-			    (rcfw, (struct creq_qp_event *)creqe))
-				rcfw->creq_qp_event_processed++;
-			else {
-				dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
-				dev_warn(&rcfw->pdev->dev,
-					 "QPLIB: type = 0x%x not handled",
-					 type);
-			}
+			bnxt_qplib_process_qp_event
+				(rcfw, (struct creq_qp_event *)creqe);
+			rcfw->creq_qp_event_processed++;
 			break;
 		case CREQ_BASE_TYPE_FUNC_EVENT:
 			if (!bnxt_qplib_process_func_event
@@ -346,7 +365,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
 			break;
 		}
 		raw_cons++;
+		budget--;
 	}
+
 	if (creq->cons != raw_cons) {
 		creq->cons = raw_cons;
 		CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
@@ -375,23 +396,16 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
 /* RCFW */
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
 {
-	struct creq_deinitialize_fw_resp *resp;
 	struct cmdq_deinitialize_fw req;
+	struct creq_deinitialize_fw_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
-	resp = (struct creq_deinitialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp)
-		return -EINVAL;
-
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
-		return -ETIMEDOUT;
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
-		return -EFAULT;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 
 	clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
@@ -417,9 +431,10 @@ static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 			 struct bnxt_qplib_ctx *ctx, int is_virtfn)
 {
-	struct creq_initialize_fw_resp *resp;
 	struct cmdq_initialize_fw req;
+	struct creq_initialize_fw_resp resp;
 	u16 cmd_flags = 0, level;
+	int rc;
 
 	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
 
@@ -482,37 +497,19 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 
 skip_ctx_setup:
 	req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
-	resp = (struct creq_initialize_fw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: RCFW: INITIALIZE_FW failed");
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
 	set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
 	return 0;
 }
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
-	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
-	kfree(rcfw->crsq.crsq);
+	kfree(rcfw->crsqe_tbl);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
 	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
-
 	rcfw->pdev = NULL;
 }
 
@@ -539,21 +536,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 		goto fail;
 	}
 
-	rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
-	rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
-				  sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
-	if (!rcfw->crsq.crsq)
+	rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+				  sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+	if (!rcfw->crsqe_tbl)
 		goto fail;
 
-	rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
-	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
-				      &rcfw->crsb.max_elements,
-				      BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
-				      HWQ_TYPE_CTX)) {
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: HW channel CRSB allocation failed");
-		goto fail;
-	}
 	return 0;
 
 fail:
@@ -606,7 +593,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	int rc;
 
 	/* General */
-	atomic_set(&rcfw->seq_num, 0);
+	rcfw->seq_num = 0;
 	rcfw->flags = FIRMWARE_FIRST_FLAG;
 	bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
 				  sizeof(unsigned long));
@@ -636,10 +623,6 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 
 	rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
 
-	/* CRSQ */
-	rcfw->crsq.prod = 0;
-	rcfw->crsq.cons = 0;
-
 	/* CREQ */
 	rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
 	res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
@@ -692,3 +675,34 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	__iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
 	return 0;
 }
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+		struct bnxt_qplib_rcfw *rcfw,
+		u32 size)
+{
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+	sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+	if (!sbuf)
+		return NULL;
+
+	sbuf->size = size;
+	sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+				       &sbuf->dma_addr, GFP_ATOMIC);
+	if (!sbuf->sb)
+		goto bail;
+
+	return sbuf;
+bail:
+	kfree(sbuf);
+	return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+	if (sbuf->sb)
+		dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+				  sbuf->sb, sbuf->dma_addr);
+	kfree(sbuf);
+}

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index d3567d7..09ce121 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h

@@ -73,6 +73,7 @@
 #define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
 #define RCFW_MAX_COOKIE_VALUE		0x7FFF
 #define RCFW_CMD_IS_BLOCKING		0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT	0x4E20
 
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
@@ -94,32 +95,6 @@ struct bnxt_qplib_crsbe {
 	u8			data[1024];
 };
 
-/* CRSQ SB */
-#define BNXT_QPLIB_CRSBE_MAX_CNT	4
-#define BNXT_QPLIB_CRSBE_UNITS		sizeof(struct bnxt_qplib_crsbe)
-#define BNXT_QPLIB_CRSBE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
-
-#define MAX_CRSB_IDX			(BNXT_QPLIB_CRSBE_MAX_CNT - 1)
-#define MAX_CRSB_IDX_PER_PG		(BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
-
-static inline u32 get_crsb_pg(u32 val)
-{
-	return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
-}
-
-static inline u32 get_crsb_idx(u32 val)
-{
-	return val & MAX_CRSB_IDX_PER_PG;
-}
-
-static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
-					    u32 prod, dma_addr_t *dma_addr)
-{
-		*dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
-		*dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
-			      BNXT_QPLIB_CRSBE_UNITS;
-}
-
 /* CREQ */
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT	(64 * 1024)
@@ -158,17 +133,19 @@ static inline u32 get_creq_idx(u32 val)
 #define CREQ_DB(db, raw_cons, cp_bit)				\
 	writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
 
+#define CREQ_ENTRY_POLL_BUDGET		0x100
+
 /* HWQ */
-struct bnxt_qplib_crsqe {
-	struct creq_qp_event	qp_event;
+
+struct bnxt_qplib_crsq {
+	struct creq_qp_event	*resp;
 	u32			req_size;
 };
 
-struct bnxt_qplib_crsq {
-	struct bnxt_qplib_crsqe	*crsq;
-	u32			prod;
-	u32			cons;
-	u32			max_elements;
+struct bnxt_qplib_rcfw_sbuf {
+	void *sb;
+	dma_addr_t dma_addr;
+	u32 size;
 };
 
 /* RCFW Communication Channels */
@@ -185,7 +162,7 @@ struct bnxt_qplib_rcfw {
 	wait_queue_head_t	waitq;
 	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
 					       struct creq_func_event *);
-	atomic_t		seq_num;
+	u32			seq_num;
 
 	/* Bar region info */
 	void __iomem		*cmdq_bar_reg_iomem;
@@ -203,8 +180,7 @@ struct bnxt_qplib_rcfw {
 
 	/* Actual Cmd and Resp Queues */
 	struct bnxt_qplib_hwq	cmdq;
-	struct bnxt_qplib_crsq	crsq;
-	struct bnxt_qplib_hwq	crsb;
+	struct bnxt_qplib_crsq	*crsqe_tbl;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
@@ -219,11 +195,14 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 					(struct bnxt_qplib_rcfw *,
 					 struct creq_func_event *));
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-				   struct cmdq_base *req, void **crsbe,
-				   u8 is_block);
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+				struct bnxt_qplib_rcfw *rcfw,
+				u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+			       struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				 struct cmdq_base *req, struct creq_base *resp,
+				 void *sbuf, u8 is_block);
 
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 6277d80..2e48555 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h

@@ -48,6 +48,10 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
 #define HWQ_CMP(idx, hwq)	((idx) & ((hwq)->max_elements - 1))
 
+#define HWQ_FREE_SLOTS(hwq)	(hwq->max_elements - \
+				((HWQ_CMP(hwq->prod, hwq)\
+				- HWQ_CMP(hwq->cons, hwq))\
+				& (hwq->max_elements - 1)))
 enum bnxt_qplib_hwq_type {
 	HWQ_TYPE_CTX,
 	HWQ_TYPE_QUEUE,

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 7b31ecc..fde18cf 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c

@@ -55,37 +55,30 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr)
 {
 	struct cmdq_query_func req;
-	struct creq_query_func_resp *resp;
+	struct creq_query_func_resp resp;
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
 	struct creq_query_func_resp_sb *sb;
 	u16 cmd_flags = 0;
 	u32 temp;
 	u8 *tqm_alloc;
-	int i;
+	int i, rc = 0;
 
 	RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
 
-	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-	resp = (struct creq_query_func_resp *)
-		bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
-					     0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf) {
 		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
+			"QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+		return -ENOMEM;
 	}
+
+	sb = sbuf->sb;
+	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  (void *)sbuf, 0);
+	if (rc)
+		goto bail;
+
 	/* Extract the context from the side buffer */
 	attr->max_qp = le32_to_cpu(sb->max_qp);
 	attr->max_qp_rd_atom =
@@ -95,6 +88,11 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
 		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
 	attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+	/*
+	 * 128 WQEs needs to be reserved for the HW (8916). Prevent
+	 * reporting the max number
+	 */
+	attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
 	attr->max_qp_sges = sb->max_sge;
 	attr->max_cq = le32_to_cpu(sb->max_cq);
 	attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
@@ -130,7 +128,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 		attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
 		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
 	}
-	return 0;
+
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
 }
 
 /* SGID */
@@ -178,8 +179,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	/* Remove GID from the SGID table */
 	if (update) {
 		struct cmdq_delete_gid req;
-		struct creq_delete_gid_resp *resp;
+		struct creq_delete_gid_resp resp;
 		u16 cmd_flags = 0;
+		int rc;
 
 		RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
 		if (sgid_tbl->hw_id[index] == 0xFFFF) {
@@ -188,31 +190,10 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 			return -EINVAL;
 		}
 		req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
-		resp = (struct creq_delete_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
-						     0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: DELETE_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
 	}
 	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
 	       sizeof(bnxt_qplib_gid_zero));
@@ -234,7 +215,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 						   struct bnxt_qplib_res,
 						   sgid_tbl);
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-	int i, free_idx, rc = 0;
+	int i, free_idx;
 
 	if (!sgid_tbl) {
 		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
@@ -266,10 +247,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	if (update) {
 		struct cmdq_add_gid req;
-		struct creq_add_gid_resp *resp;
+		struct creq_add_gid_resp resp;
 		u16 cmd_flags = 0;
 		u32 temp32[4];
 		u16 temp16[3];
+		int rc;
 
 		RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
 
@@ -290,31 +272,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		req.src_mac[1] = cpu_to_be16(temp16[1]);
 		req.src_mac[2] = cpu_to_be16(temp16[2]);
 
-		resp = (struct creq_add_gid_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-		if (!resp) {
-			dev_err(&res->pdev->dev,
-				"QPLIB: SP: ADD_GID send failed");
-			return -EINVAL;
-		}
-		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie))) {
-			/* Cmd timed out */
-			dev_err(&res->pdev->dev,
-				"QPIB: SP: ADD_GID timed out");
-			return -ETIMEDOUT;
-		}
-		if (resp->status ||
-		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-			dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
-			dev_err(&res->pdev->dev,
-				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-				resp->status, le16_to_cpu(req.cookie),
-				le16_to_cpu(resp->cookie));
-			return -EINVAL;
-		}
-		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+		rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						  (void *)&resp, NULL, 0);
+		if (rc)
+			return rc;
+		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
 	}
 	/* Add GID to the sgid_tbl */
 	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
@@ -325,7 +287,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 
 	*index = free_idx;
 	/* unlock */
-	return rc;
+	return 0;
 }
 
 /* pkeys */
@@ -422,10 +384,11 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_create_ah req;
-	struct creq_create_ah_resp *resp;
+	struct creq_create_ah_resp resp;
 	u16 cmd_flags = 0;
 	u32 temp32[4];
 	u16 temp16[3];
+	int rc;
 
 	RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
 
@@ -450,28 +413,12 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 	req.dest_mac[1] = cpu_to_le16(temp16[1]);
 	req.dest_mac[2] = cpu_to_le16(temp16[2]);
 
-	resp = (struct creq_create_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-	ah->id = le32_to_cpu(resp->xid);
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
+
+	ah->id = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -479,35 +426,19 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_destroy_ah req;
-	struct creq_destroy_ah_resp *resp;
+	struct creq_destroy_ah_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	/* Clean up the AH table in the device */
 	RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
 
 	req.ah_cid = cpu_to_le32(ah->id);
 
-	resp = (struct creq_destroy_ah_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 1);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 1);
+	if (rc)
+		return rc;
 	return 0;
 }
 
@@ -516,8 +447,9 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deallocate_key req;
-	struct creq_deallocate_key_resp *resp;
+	struct creq_deallocate_key_resp resp;
 	u16 cmd_flags = 0;
+	int rc;
 
 	if (mrw->lkey == 0xFFFFFFFF) {
 		dev_info(&res->pdev->dev,
@@ -536,27 +468,11 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	else
 		req.key = cpu_to_le32(mrw->lkey);
 
-	resp = (struct creq_deallocate_key_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+					  NULL, 0);
+	if (rc)
+		return rc;
+
 	/* Free the qplib's MRW memory */
 	if (mrw->hwq.max_elements)
 		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
@@ -568,9 +484,10 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_allocate_mrw req;
-	struct creq_allocate_mrw_resp *resp;
+	struct creq_allocate_mrw_resp resp;
 	u16 cmd_flags = 0;
 	unsigned long tmp;
+	int rc;
 
 	RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
 
@@ -584,33 +501,17 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 	tmp = (unsigned long)mrw;
 	req.mrw_handle = cpu_to_le64(tmp);
 
-	resp = (struct creq_allocate_mrw_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, 0);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
-		return -EINVAL;
-	}
-	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-		/* Cmd timed out */
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
+	if (rc)
+		return rc;
+
 	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
 	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
-		mrw->rkey = le32_to_cpu(resp->xid);
+		mrw->rkey = le32_to_cpu(resp.xid);
 	else
-		mrw->lkey = le32_to_cpu(resp->xid);
+		mrw->lkey = le32_to_cpu(resp.xid);
 	return 0;
 }
 
@@ -619,40 +520,17 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_deregister_mr req;
-	struct creq_deregister_mr_resp *resp;
+	struct creq_deregister_mr_resp resp;
 	u16 cmd_flags = 0;
 	int rc;
 
 	RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
 
 	req.lkey = cpu_to_le32(mrw->lkey);
-	resp = (struct creq_deregister_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
-		return -EINVAL;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
-		return -ETIMEDOUT;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
-		dev_err(&rcfw->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
+		return rc;
 
 	/* Free the qplib's MR memory */
 	if (mrw->hwq.max_elements) {
@@ -669,7 +547,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_register_mr req;
-	struct creq_register_mr_resp *resp;
+	struct creq_register_mr_resp resp;
 	u16 cmd_flags = 0, level;
 	int pg_ptrs, pages, i, rc;
 	dma_addr_t **pbl_ptr;
@@ -730,36 +608,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 	req.key = cpu_to_le32(mr->lkey);
 	req.mr_size = cpu_to_le64(mr->total_size);
 
-	resp = (struct creq_register_mr_resp *)
-			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-						     NULL, block);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "SP: REG_MR send failed");
-		rc = -EINVAL;
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, block);
+	if (rc)
 		goto fail;
-	}
-	if (block)
-		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-						    le16_to_cpu(req.cookie));
-	else
-		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-						   le16_to_cpu(req.cookie));
-	if (!rc) {
-		/* Cmd timed out */
-		dev_err(&res->pdev->dev, "SP: REG_MR timed out");
-		rc = -ETIMEDOUT;
-		goto fail;
-	}
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		rc = -EINVAL;
-		goto fail;
-	}
+
 	return 0;
 
 fail:
@@ -804,35 +657,15 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
 {
 	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
 	struct cmdq_map_tc_to_cos req;
-	struct creq_map_tc_to_cos_resp *resp;
+	struct creq_map_tc_to_cos_resp resp;
 	u16 cmd_flags = 0;
-	int tleft;
+	int rc = 0;
 
 	RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
 	req.cos0 = cpu_to_le16(cids[0]);
 	req.cos1 = cpu_to_le16(cids[1]);
 
-	resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
-	if (!resp) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
-		return -EINVAL;
-	}
-
-	tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
-	if (!tleft) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
-		return -ETIMEDOUT;
-	}
-
-	if (resp->status ||
-	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
-		dev_err(&res->pdev->dev,
-			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-			resp->status, le16_to_cpu(req.cookie),
-			le16_to_cpu(resp->cookie));
-		return -EINVAL;
-	}
-
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, NULL, 0);
 	return 0;
 }

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 1442a61..a543f95 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h

@@ -40,6 +40,8 @@
 #ifndef __BNXT_QPLIB_SP_H__
 #define __BNXT_QPLIB_SP_H__
 
+#define BNXT_QPLIB_RESERVED_QP_WRS	128
+
 struct bnxt_qplib_dev_attr {
 	char				fw_ver[32];
 	u16				max_sgid;

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index b6fe459..0910faf 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c

@@ -488,6 +488,7 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	release_ep_resources(ep);
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -498,6 +499,7 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	c4iw_put_ep(&ep->parent_ep->com);
 	release_ep_resources(ep);
+	kfree_skb(skb);
 	return 0;
 }
 
@@ -569,11 +571,13 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
 
 	pr_debug("%s rdev %p\n", __func__, rdev);
 	req->cmd = CPL_ABORT_NO_RST;
+	skb_get(skb);
 	ret = c4iw_ofld_send(rdev, skb);
 	if (ret) {
 		__state_set(&ep->com, DEAD);
 		queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
-	}
+	} else
+		kfree_skb(skb);
 }
 
 static int send_flowc(struct c4iw_ep *ep)
@@ -2517,7 +2521,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		goto reject;
 	}
 
-	hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+	hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+	       sizeof(struct tcphdr) +
 	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
 	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
 		child_ep->mtu = peer_mss + hdrs;

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 329fb65e..ae0b79ae 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c

@@ -767,7 +767,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 		kfree(entry);
 	}
 
-	list_for_each_safe(pos, nxt, &uctx->qpids) {
+	list_for_each_safe(pos, nxt, &uctx->cqids) {
 		entry = list_entry(pos, struct c4iw_qid_list, entry);
 		list_del_init(&entry->entry);
 		kfree(entry);
@@ -880,13 +880,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
 	if (!rdev->free_workq) {
 		err = -ENOMEM;
-		goto err_free_status_page;
+		goto err_free_status_page_and_wr_log;
 	}
 
 	rdev->status_page->db_off = 0;
 
 	return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+	if (c4iw_wr_log && rdev->wr_log)
+		kfree(rdev->wr_log);
 	free_page((unsigned long)rdev->status_page);
 destroy_ocqp_pool:
 	c4iw_ocqp_pool_destroy(rdev);
@@ -903,9 +905,11 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
 	destroy_workqueue(rdev->free_workq);
 	kfree(rdev->wr_log);
+	c4iw_release_dev_ucontext(rdev, &rdev->uctx);
 	free_page((unsigned long)rdev->status_page);
 	c4iw_pblpool_destroy(rdev);
 	c4iw_rqtpool_destroy(rdev);
+	c4iw_ocqp_pool_destroy(rdev);
 	c4iw_destroy_resource(&rdev->resource);
 }
 
@@ -971,7 +975,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 		 devp->rdev.lldi.sge_egrstatuspagesize);
 
 	devp->rdev.hw_queue.t4_eq_status_entries =
-		devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
+		devp->rdev.lldi.sge_egrstatuspagesize / 64;
 	devp->rdev.hw_queue.t4_max_eq_size = 65520;
 	devp->rdev.hw_queue.t4_max_iq_size = 65520;
 	devp->rdev.hw_queue.t4_max_rq_size = 8192 -

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 5d6b1ee..2ba00b8 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c

@@ -6312,25 +6312,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
 	}
 }
 
-static void write_global_credit(struct hfi1_devdata *dd,
-				u8 vau, u16 total, u16 shared)
+/*
+ * Set up allocation unit vaulue.
+ */
+void set_up_vau(struct hfi1_devdata *dd, u8 vau)
 {
-	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-		  ((u64)total <<
-		   SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
-		  ((u64)shared <<
-		   SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
-		  ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+	u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+	/* do not modify other values in the register */
+	reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK;
+	reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT;
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
 }
 
 /*
  * Set up initial VL15 credits of the remote.  Assumes the rest of
- * the CM credit registers are zero from a previous global or credit reset .
+ * the CM credit registers are zero from a previous global or credit reset.
+ * Shared limit for VL15 will always be 0.
  */
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf)
 {
-	/* leave shared count at zero for both global and VL15 */
-	write_global_credit(dd, vau, vl15buf, 0);
+	u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+	/* set initial values for total and shared credit limit */
+	reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK |
+		 SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK);
+
+	/*
+	 * Set total limit to be equal to VL15 credits.
+	 * Leave shared limit at 0.
+	 */
+	reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
 
 	write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
 		  << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
@@ -6348,9 +6361,11 @@ void reset_link_credits(struct hfi1_devdata *dd)
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
 		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
-	write_global_credit(dd, 0, 0, 0);
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0);
 	/* reset the CM block */
 	pio_send_control(dd, PSC_CM_RESET);
+	/* reset cached value */
+	dd->vl15buf_cached = 0;
 }
 
 /* convert a vCU to a CU */
@@ -6839,24 +6854,35 @@ void handle_link_up(struct work_struct *work)
 {
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
 						  link_up_work);
+	struct hfi1_devdata *dd = ppd->dd;
+
 	set_link_state(ppd, HLS_UP_INIT);
 
 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
-	read_ltp_rtt(ppd->dd);
+	read_ltp_rtt(dd);
 	/*
 	 * OPA specifies that certain counters are cleared on a transition
 	 * to link up, so do that.
 	 */
-	clear_linkup_counters(ppd->dd);
+	clear_linkup_counters(dd);
 	/*
 	 * And (re)set link up default values.
 	 */
 	set_linkup_defaults(ppd);
 
+	/*
+	 * Set VL15 credits. Use cached value from verify cap interrupt.
+	 * In case of quick linkup or simulator, vl15 value will be set by
+	 * handle_linkup_change. VerifyCap interrupt handler will not be
+	 * called in those scenarios.
+	 */
+	if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR))
+		set_up_vl15(dd, dd->vl15buf_cached);
+
 	/* enforce link speed enabled */
 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
 		/* oops - current speed is not enabled, bounce */
-		dd_dev_err(ppd->dd,
+		dd_dev_err(dd,
 			   "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
 			   ppd->link_speed_active, ppd->link_speed_enabled);
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
@@ -7357,7 +7383,14 @@ void handle_verify_cap(struct work_struct *work)
 	 */
 	if (vau == 0)
 		vau = 1;
-	set_up_vl15(dd, vau, vl15buf);
+	set_up_vau(dd, vau);
+
+	/*
+	 * Set VL15 credits to 0 in global credit register. Cache remote VL15
+	 * credits value and wait for link-up interrupt ot set it.
+	 */
+	set_up_vl15(dd, 0);
+	dd->vl15buf_cached = vl15buf;
 
 	/* set up the LCB CRC mode */
 	crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;

diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5bfa839..793514f 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h

@@ -839,7 +839,9 @@
 #define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull
 #define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull
 #define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508)
+#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull
 #define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16
+#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull
 #define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull
 #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull
 #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index da322e6..414a04a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h

@@ -1045,6 +1045,14 @@ struct hfi1_devdata {
 	/* initial vl15 credits to use */
 	u16 vl15_init;
 
+	/*
+	 * Cached value for vl15buf, read during verify cap interrupt. VL15
+	 * credits are to be kept at 0 and set when handling the link-up
+	 * interrupt. This removes the possibility of receiving VL15 MAD
+	 * packets before this HFI is ready.
+	 */
+	u16 vl15buf_cached;
+
 	/* Misc small ints */
 	u8 n_krcv_queues;
 	u8 qos_shift;
@@ -1598,7 +1606,8 @@ int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
 
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
+void set_up_vau(struct hfi1_devdata *dd, u8 vau);
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 

diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index ba265d0..04a5082d 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c

@@ -130,7 +130,8 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		 * the remote values.  Both sides must be using the values.
 		 */
 		if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-			set_up_vl15(dd, dd->vau, dd->vl15_init);
+			set_up_vau(dd, dd->vau);
+			set_up_vl15(dd, dd->vl15_init);
 			assign_remote_cm_au_table(dd, dd->vcu);
 		}
 

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 93faf86..6a9f6f9 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c

@@ -207,8 +207,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 	/*
 	 * Save BARs and command to rewrite after device reset.
 	 */
-	dd->pcibar0 = addr;
-	dd->pcibar1 = addr >> 32;
+	pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0);
+	pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1);
 	pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
 	pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 069bdaf..1080778 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c

@@ -2159,8 +2159,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
-		if (!ret)
+		if (!ret) {
+			/* peer will send again */
+			rvt_put_ss(&qp->r_sge);
 			goto rnr_nak;
+		}
 		wc.ex.imm_data = ohdr->u.rc.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		goto send_last;

diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 50d140d..2f3bbca 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c

@@ -196,7 +196,8 @@ static const struct sysfs_ops port_cc_sysfs_ops = {
 };
 
 static struct attribute *port_cc_default_attributes[] = {
-	&cc_prescan_attr.attr
+	&cc_prescan_attr.attr,
+	NULL
 };
 
 static struct kobj_type port_cc_ktype = {

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index f3bc01b..6ae98aa 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c

@@ -784,7 +784,6 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
 	}
 
 	ctrl_ird |= IETF_PEER_TO_PEER;
-	ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
 	switch (mpa_key) {
 	case MPA_KEY_REQUEST:
@@ -2446,8 +2445,8 @@ static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
 		} else {
 			type = I40IW_CM_EVENT_CONNECTED;
 			cm_node->state = I40IW_CM_STATE_OFFLOADED;
-			i40iw_send_ack(cm_node);
 		}
+		i40iw_send_ack(cm_node);
 		break;
 	default:
 		pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);

diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index f82483b..a027e20 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c

@@ -285,28 +285,20 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa
 	struct i40iw_sc_dev *dev = vsi->dev;
 	struct i40iw_sc_qp *qp = NULL;
 	bool qs_handle_change = false;
-	bool mss_change = false;
 	unsigned long flags;
 	u16 qs_handle;
 	int i;
 
-	if (vsi->mss != l2params->mss) {
-		mss_change = true;
-		vsi->mss = l2params->mss;
-	}
+	vsi->mss = l2params->mss;
 
 	i40iw_fill_qos_list(l2params->qs_handle_list);
 	for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
 		qs_handle = l2params->qs_handle_list[i];
 		if (vsi->qos[i].qs_handle != qs_handle)
 			qs_handle_change = true;
-		else if (!mss_change)
-			continue;       /* no MSS nor qs handle change */
 		spin_lock_irqsave(&vsi->qos[i].lock, flags);
 		qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
 		while (qp) {
-			if (mss_change)
-				i40iw_qp_mss_modify(dev, qp);
 			if (qs_handle_change) {
 				qp->qs_handle = qs_handle;
 				/* issue cqp suspend command */
@@ -2395,7 +2387,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
 
 	set_64bit_val(wqe,
 		      8,
-		      LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
 		      LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
 
 	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
@@ -2410,7 +2401,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify(
 		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
 		 LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
 		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
-		 LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
 		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
 		 LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
 		 LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 2728af3..e0f47cc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c

@@ -1319,13 +1319,13 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
 	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
 				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
 	if (status)
-		goto exit;
+		goto error;
 	info.fpm_query_buf_pa = mem.pa;
 	info.fpm_query_buf = mem.va;
 	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
 				       I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
 	if (status)
-		goto exit;
+		goto error;
 	info.fpm_commit_buf_pa = mem.pa;
 	info.fpm_commit_buf = mem.va;
 	info.hmc_fn_id = ldev->fid;
@@ -1347,11 +1347,9 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
 	info.exception_lan_queue = 1;
 	info.vchnl_send = i40iw_virtchnl_send;
 	status = i40iw_device_init(&iwdev->sc_dev, &info);
-exit:
-	if (status) {
-		kfree(iwdev->hmc_info_mem);
-		iwdev->hmc_info_mem = NULL;
-	}
+
+	if (status)
+		goto error;
 	memset(&vsi_info, 0, sizeof(vsi_info));
 	vsi_info.dev = &iwdev->sc_dev;
 	vsi_info.back_vsi = (void *)iwdev;
@@ -1362,11 +1360,19 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
 		memset(&stats_info, 0, sizeof(stats_info));
 		stats_info.fcn_id = ldev->fid;
 		stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+		if (!stats_info.pestat) {
+			status = I40IW_ERR_NO_MEMORY;
+			goto error;
+		}
 		stats_info.stats_initialize = true;
 		if (stats_info.pestat)
 			i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
 	}
 	return status;
+error:
+	kfree(iwdev->hmc_info_mem);
+	iwdev->hmc_info_mem = NULL;
+	return status;
 }
 
 /**
@@ -1933,7 +1939,7 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev,
 bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
 {
 	struct i40iw_device *iwdev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	iwdev = dev->back_dev;
 

diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index aa66c1c..f27be3e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h

@@ -199,7 +199,6 @@ void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
 			    struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
 void *i40iw_remove_head(struct list_head *list);
 void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
 
 void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
 void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);

diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 7b76259..959ec81 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h

@@ -541,7 +541,6 @@ struct i40iw_create_qp_info {
 struct i40iw_modify_qp_info {
 	u64 rx_win0;
 	u64 rx_win1;
-	u16 new_mss;
 	u8 next_iwarp_state;
 	u8 termlen;
 	bool ord_valid;
@@ -554,7 +553,6 @@ struct i40iw_modify_qp_info {
 	bool dont_send_term;
 	bool dont_send_fin;
 	bool cached_var_valid;
-	bool mss_change;
 	bool force_loopback;
 };
 

diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 409a378..56d9869 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c

@@ -757,23 +757,6 @@ void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, b
 }
 
 /**
- * i40iw_qp_mss_modify - modify mss for qp
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
-	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
-	struct i40iw_modify_qp_info info;
-
-	memset(&info, 0, sizeof(info));
-	info.mss_change = true;
-	info.new_mss = qp->vsi->mss;
-	i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
-}
-
-/**
  * i40iw_term_modify_qp - modify qp for term message
  * @qp: hardware control qp
  * @next_state: qp's next state

diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index f4d1368..48fd327 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c

@@ -443,10 +443,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
 	if (!dev->vchnl_up)
 		return I40IW_ERR_NOT_READY;
 	if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
-		if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
-			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
-		else
-			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
 		return I40IW_SUCCESS;
 	}
 	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index b469471..21d31cb 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c

@@ -1578,6 +1578,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 	if (port < 0)
 		return;
 	ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
+	ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port);
 
 	mlx4_ib_query_ah(&ah.ibah, &ah_attr);
 	if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d45772d..9ecc089 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c

@@ -2979,6 +2979,18 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
 	return ret;
 }
 
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+{
+	switch (umr_fence_cap) {
+	case MLX5_CAP_UMR_FENCE_NONE:
+		return MLX5_FENCE_MODE_NONE;
+	case MLX5_CAP_UMR_FENCE_SMALL:
+		return MLX5_FENCE_MODE_INITIATOR_SMALL;
+	default:
+		return MLX5_FENCE_MODE_STRONG_ORDERING;
+	}
+}
+
 static int create_dev_resources(struct mlx5_ib_resources *devr)
 {
 	struct ib_srq_init_attr attr;
@@ -3680,8 +3692,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
-	dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
-	dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+		dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
+		dev->ib_dev.free_rdma_netdev	= mlx5_ib_free_rdma_netdev;
+	}
 	if (mlx5_core_is_pf(mdev)) {
 		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
 		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
@@ -3693,6 +3707,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 
+	dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
+
 	if (MLX5_CAP_GEN(mdev, imaicl)) {
 		dev->ib_dev.alloc_mw		= mlx5_ib_alloc_mw;
 		dev->ib_dev.dealloc_mw		= mlx5_ib_dealloc_mw;

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 38c877b..bdcf254 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -349,7 +349,7 @@ struct mlx5_ib_qp {
 	struct mlx5_ib_wq	rq;
 
 	u8			sq_signal_bits;
-	u8			fm_cache;
+	u8			next_fence;
 	struct mlx5_ib_wq	sq;
 
 	/* serialize qp state modifications
@@ -654,6 +654,7 @@ struct mlx5_ib_dev {
 	struct mlx5_ib_port	*port;
 	struct mlx5_sq_bfreg     bfreg;
 	struct mlx5_sq_bfreg     fp_bfreg;
+	u8				umr_fence;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 93959e1..ebb6768 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c

@@ -3738,24 +3738,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 	}
 }
 
-static u8 get_fence(u8 fence, struct ib_send_wr *wr)
-{
-	if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
-		     wr->send_flags & IB_SEND_FENCE))
-		return MLX5_FENCE_MODE_STRONG_ORDERING;
-
-	if (unlikely(fence)) {
-		if (wr->send_flags & IB_SEND_FENCE)
-			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
-		else
-			return fence;
-	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
-		return MLX5_FENCE_MODE_FENCE;
-	}
-
-	return 0;
-}
-
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 		     struct mlx5_wqe_ctrl_seg **ctrl,
 		     struct ib_send_wr *wr, unsigned *idx,
@@ -3784,8 +3766,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 static void finish_wqe(struct mlx5_ib_qp *qp,
 		       struct mlx5_wqe_ctrl_seg *ctrl,
 		       u8 size, unsigned idx, u64 wr_id,
-		       int nreq, u8 fence, u8 next_fence,
-		       u32 mlx5_opcode)
+		       int nreq, u8 fence, u32 mlx5_opcode)
 {
 	u8 opmod = 0;
 
@@ -3793,7 +3774,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 					     mlx5_opcode | ((u32)opmod << 24));
 	ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
 	ctrl->fm_ce_se |= fence;
-	qp->fm_cache = next_fence;
 	if (unlikely(qp->wq_sig))
 		ctrl->signature = wq_sig(ctrl);
 
@@ -3853,7 +3833,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
-		fence = qp->fm_cache;
 		num_sge = wr->num_sge;
 		if (unlikely(num_sge > qp->sq.max_gs)) {
 			mlx5_ib_warn(dev, "\n");
@@ -3870,6 +3849,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
+		if (wr->opcode == IB_WR_LOCAL_INV ||
+		    wr->opcode == IB_WR_REG_MR) {
+			fence = dev->umr_fence;
+			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+		} else if (wr->send_flags & IB_SEND_FENCE) {
+			if (qp->next_fence)
+				fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+			else
+				fence = MLX5_FENCE_MODE_FENCE;
+		} else {
+			fence = qp->next_fence;
+		}
+
 		switch (ibqp->qp_type) {
 		case IB_QPT_XRC_INI:
 			xrc = seg;
@@ -3896,7 +3888,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				goto out;
 
 			case IB_WR_LOCAL_INV:
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
 				ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
 				set_linv_wr(qp, &seg, &size);
@@ -3904,7 +3895,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				break;
 
 			case IB_WR_REG_MR:
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				qp->sq.wr_data[idx] = IB_WR_REG_MR;
 				ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
 				err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
@@ -3927,9 +3917,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_UMR);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_UMR);
 				/*
 				 * SET_PSV WQEs are not signaled and solicited
 				 * on error
@@ -3954,9 +3943,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_SET_PSV);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_SET_PSV);
 				err = begin_wqe(qp, &seg, &ctrl, wr,
 						&idx, &size, nreq);
 				if (err) {
@@ -3966,7 +3954,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
 						 mr->sig->psv_wire.psv_idx, &seg,
 						 &size);
@@ -3976,9 +3963,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					goto out;
 				}
 
-				finish_wqe(qp, ctrl, size, idx, wr->wr_id,
-					   nreq, get_fence(fence, wr),
-					   next_fence, MLX5_OPCODE_SET_PSV);
+				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+					   fence, MLX5_OPCODE_SET_PSV);
+				qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
 				num_sge = 0;
 				goto skip_psv;
 
@@ -4089,8 +4076,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 		}
 
-		finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
-			   get_fence(fence, wr), next_fence,
+		qp->next_fence = next_fence;
+		finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
 			   mlx5_ib_opcode[wr->opcode]);
 skip_psv:
 		if (0)

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index fb983df..30b256a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c

@@ -610,7 +610,6 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
 		ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
 	}
 	ctrl_ird |= IETF_PEER_TO_PEER;
-	ctrl_ird |= IETF_FLPDU_ZERO_LEN;
 
 	switch (mpa_key) {
 	case MPA_KEY_REQUEST:
@@ -1826,7 +1825,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 			type = NES_CM_EVENT_CONNECTED;
 			cm_node->state = NES_CM_STATE_TSA;
 		}
-
+		send_ack(cm_node, NULL);
 		break;
 	default:
 		WARN_ON(1);

diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index aa08c76..d961f79 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h

@@ -58,7 +58,10 @@
 #define QEDR_MSG_QP   "  QP"
 #define QEDR_MSG_GSI  " GSI"
 
-#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER	(0x11223344)
+
+#define FW_PAGE_SIZE		(RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT		(12)
 
 struct qedr_dev;
 

diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index 3d7705c..d86dbe8 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c

@@ -270,11 +270,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 		return rc;
 	}
 
-	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
-	if (vlan_id < VLAN_CFI_MASK)
-		has_vlan = true;
-	if (sgid_attr.ndev)
+	if (sgid_attr.ndev) {
+		vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+		if (vlan_id < VLAN_CFI_MASK)
+			has_vlan = true;
+
 		dev_put(sgid_attr.ndev);
+	}
 
 	if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
 		DP_ERR(dev, "gsi post send: GID not found GID index %d\n",

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 17685cf..d6723c3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c

@@ -653,14 +653,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 
 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 			       struct qedr_pbl *pbl,
-			       struct qedr_pbl_info *pbl_info)
+			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+	u32 fw_pg_cnt, fw_pg_per_umem_pg;
 	struct qedr_pbl *pbl_tbl;
 	struct scatterlist *sg;
 	struct regpair *pbe;
+	u64 pg_addr;
 	int entry;
-	u32 addr;
 
 	if (!pbl_info->num_pbes)
 		return;
@@ -683,29 +684,35 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
 	shift = umem->page_shift;
 
+	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
+
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 		pages = sg_dma_len(sg) >> shift;
+		pg_addr = sg_dma_address(sg);
 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-			/* store the page address in pbe */
-			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
-					      (pg_cnt << shift));
-			addr = upper_32_bits(sg_dma_address(sg) +
-					     (pg_cnt << shift));
-			pbe->hi = cpu_to_le32(addr);
-			pbe_cnt++;
-			total_num_pbes++;
-			pbe++;
+			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+				pbe->lo = cpu_to_le32(pg_addr);
+				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
 
-			if (total_num_pbes == pbl_info->num_pbes)
-				return;
+				pg_addr += BIT(pg_shift);
+				pbe_cnt++;
+				total_num_pbes++;
+				pbe++;
 
-			/* If the given pbl is full storing the pbes,
-			 * move to next pbl.
-			 */
-			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-				pbl_tbl++;
-				pbe = (struct regpair *)pbl_tbl->va;
-				pbe_cnt = 0;
+				if (total_num_pbes == pbl_info->num_pbes)
+					return;
+
+				/* If the given pbl is full storing the pbes,
+				 * move to next pbl.
+				 */
+				if (pbe_cnt ==
+				    (pbl_info->pbl_size / sizeof(u64))) {
+					pbl_tbl++;
+					pbe = (struct regpair *)pbl_tbl->va;
+					pbe_cnt = 0;
+				}
+
+				fw_pg_cnt++;
 			}
 		}
 	}
@@ -754,7 +761,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 				       u64 buf_addr, size_t buf_len,
 				       int access, int dmasync)
 {
-	int page_cnt;
+	u32 fw_pages;
 	int rc;
 
 	q->buf_addr = buf_addr;
@@ -766,8 +773,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		return PTR_ERR(q->umem);
 	}
 
-	page_cnt = ib_umem_page_count(q->umem);
-	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+	fw_pages = ib_umem_page_count(q->umem) <<
+	    (q->umem->page_shift - FW_PAGE_SHIFT);
+
+	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
 	if (rc)
 		goto err0;
 
@@ -777,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 		goto err0;
 	}
 
-	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+				   FW_PAGE_SHIFT);
 
 	return 0;
 
@@ -2226,7 +2236,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 		goto err1;
 
 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
-			   &mr->info.pbl_info);
+			   &mr->info.pbl_info, mr->umem->page_shift);
 
 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
 	if (rc) {
@@ -3209,6 +3219,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
 		case IB_WC_REG_MR:
 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
 			break;
+		case IB_WC_RDMA_READ:
+		case IB_WC_SEND:
+			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+			break;
 		default:
 			break;
 		}

diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index fc8b885..4ddbcac 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c

@@ -1956,8 +1956,10 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 		ret = qib_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
-		if (!ret)
+		if (!ret) {
+			rvt_put_ss(&qp->r_sge);
 			goto rnr_nak;
+		}
 		wc.ex.imm_data = ohdr->u.rc.imm_data;
 		hdrsize += 4;
 		wc.wc_flags = IB_WC_WITH_IMM;

diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index ecdba2f..1ac5b85 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h

@@ -68,6 +68,7 @@
 static inline u32 rxe_crc32(struct rxe_dev *rxe,
 			    u32 crc, void *next, size_t len)
 {
+	u32 retval;
 	int err;
 
 	SHASH_DESC_ON_STACK(shash, rxe->tfm);
@@ -81,7 +82,9 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
 		return crc32_le(crc, next, len);
 	}
 
-	return *(u32 *)shash_desc_ctx(shash);
+	retval = *(u32 *)shash_desc_ctx(shash);
+	barrier_data(shash_desc_ctx(shash));
+	return retval;
 }
 
 int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 83d709e..073e667 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c

@@ -740,13 +740,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 
 		sge = ibwr->sg_list;
 		for (i = 0; i < num_sge; i++, sge++) {
-			if (qp->is_user && copy_from_user(p, (__user void *)
-					    (uintptr_t)sge->addr, sge->length))
-				return -EFAULT;
-
-			else if (!qp->is_user)
-				memcpy(p, (void *)(uintptr_t)sge->addr,
-				       sge->length);
+			memcpy(p, (void *)(uintptr_t)sge->addr,
+					sge->length);
 
 			p += sge->length;
 		}

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 874b243..7871379 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c

@@ -178,7 +178,7 @@ static inline int ib_speed_enum_to_int(int speed)
 static int ipoib_get_link_ksettings(struct net_device *netdev,
 				    struct ethtool_link_ksettings *cmd)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(netdev);
+	struct ipoib_dev_priv *priv = ipoib_priv(netdev);
 	struct ib_port_attr attr;
 	int ret, speed, width;
 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0060b2f..efe7402 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -863,7 +863,6 @@ int ipoib_ib_dev_open(struct net_device *dev)
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
-	napi_enable(&priv->napi);
 	ipoib_ib_dev_stop(dev);
 	return -1;
 }

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2869d1a..1015a63 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -1590,12 +1590,14 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 	wait_for_completion(&priv->ntbl.deleted);
 }
 
-void ipoib_dev_uninit_default(struct net_device *dev)
+static void ipoib_dev_uninit_default(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ipoib_transport_dev_cleanup(dev);
 
+	netif_napi_del(&priv->napi);
+
 	ipoib_cm_dev_cleanup(dev);
 
 	kfree(priv->rx_ring);
@@ -1649,6 +1651,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
 	kfree(priv->rx_ring);
 
 out:
+	netif_napi_del(&priv->napi);
 	return -ENOMEM;
 }
 
@@ -2237,6 +2240,7 @@ static struct net_device *ipoib_add_port(const char *format,
 
 device_init_failed:
 	free_netdev(priv->dev);
+	kfree(priv);
 
 alloc_mem_failed:
 	return ERR_PTR(result);
@@ -2277,7 +2281,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-	struct ipoib_dev_priv *priv, *tmp;
+	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
 	struct list_head *dev_list = client_data;
 
 	if (!dev_list)
@@ -2300,7 +2304,14 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		flush_workqueue(priv->wq);
 
 		unregister_netdev(priv->dev);
-		free_netdev(priv->dev);
+		if (device->free_rdma_netdev)
+			device->free_rdma_netdev(priv->dev);
+		else
+			free_netdev(priv->dev);
+
+		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+			kfree(cpriv);
+
 		kfree(priv);
 	}
 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 36dc4fc..081b33d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c

@@ -133,13 +133,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
 	if (!priv)
 		return -ENOMEM;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-
 	down_write(&ppriv->vlan_rwsem);
 
 	/*
@@ -167,8 +167,10 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 
 	rtnl_unlock();
 
-	if (result)
+	if (result) {
 		free_netdev(priv->dev);
+		kfree(priv);
+	}
 
 	return result;
 }
@@ -209,6 +211,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	if (dev) {
 		free_netdev(dev);
+		kfree(priv);
 		return 0;
 	}
 

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index def723a..2354c74 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c

@@ -320,7 +320,7 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
 	ch->path.sgid = target->sgid;
 	ch->path.dgid = target->orig_dgid;
 	ch->path.pkey = target->pkey;
-	sa_path_set_service_id(&ch->path, target->service_id);
+	ch->path.service_id = target->service_id;
 
 	return 0;
 }
@@ -575,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
 	return 0;
 
 err_qp:
-	srp_destroy_qp(ch, qp);
+	ib_destroy_qp(qp);
 
 err_send_cq:
 	ib_free_cq(send_cq);

diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c
index 485900f..abc266e 100644
--- a/drivers/input/keyboard/tm2-touchkey.c
+++ b/drivers/input/keyboard/tm2-touchkey.c

@@ -213,7 +213,7 @@ static int tm2_touchkey_probe(struct i2c_client *client,
 	/* led device */
 	touchkey->led_dev.name = TM2_TOUCHKEY_DEV_NAME;
 	touchkey->led_dev.brightness = LED_FULL;
-	touchkey->led_dev.max_brightness = LED_FULL;
+	touchkey->led_dev.max_brightness = LED_ON;
 	touchkey->led_dev.brightness_set = tm2_touchkey_led_brightness_set;
 
 	error = devm_led_classdev_register(&client->dev, &touchkey->led_dev);

diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c
index f11807d..400869e 100644
--- a/drivers/input/misc/axp20x-pek.c
+++ b/drivers/input/misc/axp20x-pek.c

@@ -256,6 +256,42 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek,
+					     struct platform_device *pdev)
+{
+	unsigned long long hrv = 0;
+	acpi_status status;
+
+	if (IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY) &&
+	    axp20x_pek->axp20x->variant == AXP288_ID) {
+		status = acpi_evaluate_integer(ACPI_HANDLE(pdev->dev.parent),
+					       "_HRV", NULL, &hrv);
+		if (ACPI_FAILURE(status))
+			dev_err(&pdev->dev, "Failed to get PMIC hardware revision\n");
+
+		/*
+		 * On Cherry Trail platforms (hrv == 3), do not register the
+		 * input device if there is an "INTCFD9" or "ACPI0011" gpio
+		 * button ACPI device, as that handles the power button too,
+		 * and otherwise we end up reporting all presses twice.
+		 */
+		if (hrv == 3 && (acpi_dev_present("INTCFD9", NULL, -1) ||
+				 acpi_dev_present("ACPI0011", NULL, -1)))
+			return false;
+
+	}
+
+	return true;
+}
+#else
+static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek,
+					     struct platform_device *pdev)
+{
+	return true;
+}
+#endif
+
 static int axp20x_pek_probe(struct platform_device *pdev)
 {
 	struct axp20x_pek *axp20x_pek;
@@ -268,13 +304,7 @@ static int axp20x_pek_probe(struct platform_device *pdev)
 
 	axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent);
 
-	/*
-	 * Do not register the input device if there is an "INTCFD9"
-	 * gpio button ACPI device, that handles the power button too,
-	 * and otherwise we end up reporting all presses twice.
-	 */
-	if (!acpi_dev_found("INTCFD9") ||
-	    !IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY)) {
+	if (axp20x_pek_should_register_input(axp20x_pek, pdev)) {
 		error = axp20x_pek_probe_input_device(axp20x_pek, pdev);
 		if (error)
 			return error;

diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index e37d372..f600f3a 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c

@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 
 	if (!btns_desc) {
 		dev_err(dev, "ACPI Button Descriptors not found\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 	}
 	if (collection_uid == -1) {
 		dev_err(dev, "Invalid Button Collection Descriptor\n");
-		return ERR_PTR(-ENODEV);
+		button_info = ERR_PTR(-ENODEV);
+		goto out;
 	}
 
 	/* There are package.count - 1 buttons + 1 terminating empty entry */
 	button_info = devm_kcalloc(dev, btns_desc->package.count,
 				   sizeof(*button_info), GFP_KERNEL);
-	if (!button_info)
-		return ERR_PTR(-ENOMEM);
+	if (!button_info) {
+		button_info = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	/* Parse the button descriptors */
 	for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
 		if (soc_button_parse_btn_desc(dev,
 					      &btns_desc->package.elements[i],
 					      collection_uid,
-					      &button_info[btn]))
-			return ERR_PTR(-ENODEV);
+					      &button_info[btn])) {
+			button_info = ERR_PTR(-ENODEV);
+			goto out;
+		}
 	}
 
+out:
+	kfree(buf.pointer);
 	return button_info;
 }
 

diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index a679e56..f431da0 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c

@@ -554,32 +554,34 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client,
 				     struct completion *completion)
 {
 	struct device *dev = &client->dev;
-	long ret;
 	int error;
 	int len;
-	u8 buffer[ETP_I2C_INF_LENGTH];
+	u8 buffer[ETP_I2C_REPORT_LEN];
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN);
+	if (len != ETP_I2C_REPORT_LEN) {
+		error = len < 0 ? len : -EIO;
+		dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n",
+			error, len);
+	}
 
 	reinit_completion(completion);
 	enable_irq(client->irq);
 
 	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
-	if (!error)
-		ret = wait_for_completion_interruptible_timeout(completion,
-							msecs_to_jiffies(300));
-	disable_irq(client->irq);
-
 	if (error) {
 		dev_err(dev, "device reset failed: %d\n", error);
-		return error;
-	} else if (ret == 0) {
+	} else if (!wait_for_completion_timeout(completion,
+						msecs_to_jiffies(300))) {
 		dev_err(dev, "timeout waiting for device reset\n");
-		return -ETIMEDOUT;
-	} else if (ret < 0) {
-		error = ret;
-		dev_err(dev, "error waiting for device reset: %d\n", error);
-		return error;
+		error = -ETIMEDOUT;
 	}
 
+	disable_irq(client->irq);
+
+	if (error)
+		return error;
+
 	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
 	if (len != ETP_I2C_INF_LENGTH) {
 		error = len < 0 ? len : -EIO;

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index e73d968..f1fa1f1 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c

@@ -1118,8 +1118,10 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
  * Avatar AVIU-145A2       0x361f00        ?               clickpad
  * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E546   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
+ * Fujitsu LIFEBOOK E557   0x570f01        40, 14, 0c      2 hw buttons
  * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
  * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
  * Gigabyte U2442          0x450f01        58, 17, 0c      2 hw buttons
@@ -1525,6 +1527,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 		},
 	},
 	{
+		/* Fujitsu LIFEBOOK E546  does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E546"),
+		},
+	},
+	{
 		/* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
@@ -1546,6 +1555,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 		},
 	},
 	{
+		/* Fujitsu LIFEBOOK E557 does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E557"),
+		},
+	},
+	{
 		/* Fujitsu LIFEBOOK U745 does not work with crc_enabled == 0 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 131df9d..16c3046 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c

@@ -176,6 +176,12 @@ static const char * const smbus_pnp_ids[] = {
 	NULL
 };
 
+static const char * const forcepad_pnp_ids[] = {
+	"SYN300D",
+	"SYN3014",
+	NULL
+};
+
 /*
  * Send a command to the synpatics touchpad by special commands
  */
@@ -397,6 +403,8 @@ static int synaptics_query_hardware(struct psmouse *psmouse,
 {
 	int error;
 
+	memset(info, 0, sizeof(*info));
+
 	error = synaptics_identify(psmouse, info);
 	if (error)
 		return error;
@@ -480,13 +488,6 @@ static const struct min_max_quirk min_max_pnpid_table[] = {
 	{ }
 };
 
-/* This list has been kindly provided by Synaptics. */
-static const char * const forcepad_pnp_ids[] = {
-	"SYN300D",
-	"SYN3014",
-	NULL
-};
-
 /*****************************************************************************
  *	Synaptics communications functions
  ****************************************************************************/
@@ -1687,7 +1688,8 @@ enum {
 	SYNAPTICS_INTERTOUCH_ON,
 };
 
-static int synaptics_intertouch = SYNAPTICS_INTERTOUCH_NOT_SET;
+static int synaptics_intertouch = IS_ENABLED(CONFIG_RMI4_SMB) ?
+		SYNAPTICS_INTERTOUCH_NOT_SET : SYNAPTICS_INTERTOUCH_OFF;
 module_param_named(synaptics_intertouch, synaptics_intertouch, int, 0644);
 MODULE_PARM_DESC(synaptics_intertouch, "Use a secondary bus for the Synaptics device.");
 
@@ -1737,8 +1739,16 @@ static int synaptics_setup_intertouch(struct psmouse *psmouse,
 
 	if (synaptics_intertouch == SYNAPTICS_INTERTOUCH_NOT_SET) {
 		if (!psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) &&
-		    !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids))
+		    !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) {
+
+			if (!psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids))
+				psmouse_info(psmouse,
+					     "Your touchpad (%s) says it can support a different bus. "
+					     "If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org.\n",
+					     psmouse->ps2dev.serio->firmware_id);
+
 			return -ENXIO;
+		}
 	}
 
 	psmouse_info(psmouse, "Trying to set up SMBus access\n");
@@ -1810,6 +1820,15 @@ int synaptics_init(struct psmouse *psmouse)
 	}
 
 	if (SYN_CAP_INTERTOUCH(info.ext_cap_0c)) {
+		if ((!IS_ENABLED(CONFIG_RMI4_SMB) ||
+		     !IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS)) &&
+		    /* Forcepads need F21, which is not ready */
+		    !psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) {
+			psmouse_warn(psmouse,
+				     "The touchpad can support a better bus than the too old PS/2 protocol. "
+				     "Make sure MOUSE_PS2_SYNAPTICS_SMBUS and RMI4_SMB are enabled to get a better touchpad experience.\n");
+		}
+
 		error = synaptics_setup_intertouch(psmouse, &info, true);
 		if (!error)
 			return PSMOUSE_SYNAPTICS_SMBUS;

diff --git a/drivers/input/rmi4/rmi_f03.c b/drivers/input/rmi4/rmi_f03.c
index 77dad04..ad71a5e 100644
--- a/drivers/input/rmi4/rmi_f03.c
+++ b/drivers/input/rmi4/rmi_f03.c

@@ -146,7 +146,7 @@ static int rmi_f03_register_pt(struct f03_data *f03)
 	if (!serio)
 		return -ENOMEM;
 
-	serio->id.type = SERIO_8042;
+	serio->id.type = SERIO_PS_PSTHRU;
 	serio->write = rmi_f03_pt_write;
 	serio->port_data = f03;
 

diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index dea63e2..f5206e2 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c

@@ -31,9 +31,6 @@
 #define F54_GET_REPORT          1
 #define F54_FORCE_CAL           2
 
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN			27
-
 /* F54 capabilities */
 #define F54_CAP_BASELINE	(1 << 2)
 #define F54_CAP_IMAGE8		(1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
 struct f54_data {
 	struct rmi_function *fn;
 
-	u8 qry[F54_QUERY_LEN];
 	u8 num_rx_electrodes;
 	u8 num_tx_electrodes;
 	u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
 {
 	int error;
 	struct f54_data *f54;
+	u8 buf[6];
 
 	f54 = dev_get_drvdata(&fn->dev);
 
 	error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
-			       &f54->qry, sizeof(f54->qry));
+			       buf, sizeof(buf));
 	if (error) {
 		dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
 			__func__);
 		return error;
 	}
 
-	f54->num_rx_electrodes = f54->qry[0];
-	f54->num_tx_electrodes = f54->qry[1];
-	f54->capabilities = f54->qry[2];
-	f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
-	f54->family = f54->qry[5];
+	f54->num_rx_electrodes = buf[0];
+	f54->num_tx_electrodes = buf[1];
+	f54->capabilities = buf[2];
+	f54->clock_rate = buf[3] | (buf[4] << 8);
+	f54->family = buf[5];
 
 	rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
 		f54->num_rx_electrodes);

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 09720d9..f932a83 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h

@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
 		},
 	},
+	{
+		/* Fujitsu UH554 laptop */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+		},
+	},
 	{ }
 };
 

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2302aef..dd042a9 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c

@@ -350,6 +350,7 @@ static bool mxt_object_readable(unsigned int type)
 	case MXT_TOUCH_KEYARRAY_T15:
 	case MXT_TOUCH_PROXIMITY_T23:
 	case MXT_TOUCH_PROXKEY_T52:
+	case MXT_TOUCH_MULTITOUCHSCREEN_T100:
 	case MXT_PROCI_GRIPFACE_T20:
 	case MXT_PROCG_NOISE_T22:
 	case MXT_PROCI_ONETOUCH_T24:

diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 8cf8d8d..f872817 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c

@@ -471,7 +471,7 @@ static EDT_ATTR(gain, S_IWUSR | S_IRUGO, WORK_REGISTER_GAIN,
 static EDT_ATTR(offset, S_IWUSR | S_IRUGO, WORK_REGISTER_OFFSET,
 		M09_REGISTER_OFFSET, 0, 31);
 static EDT_ATTR(threshold, S_IWUSR | S_IRUGO, WORK_REGISTER_THRESHOLD,
-		M09_REGISTER_THRESHOLD, 20, 80);
+		M09_REGISTER_THRESHOLD, 0, 80);
 static EDT_ATTR(report_rate, S_IWUSR | S_IRUGO, WORK_REGISTER_REPORT_RATE,
 		NO_REGISTER, 3, 14);
 

diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index 813dd68..0dbcf10 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c

@@ -526,6 +526,7 @@ static int __maybe_unused silead_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
+	disable_irq(client->irq);
 	silead_ts_set_power(client, SILEAD_POWER_OFF);
 	return 0;
 }
@@ -551,6 +552,8 @@ static int __maybe_unused silead_ts_resume(struct device *dev)
 		return -ENODEV;
 	}
 
+	enable_irq(client->irq);
+
 	return 0;
 }
 

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5..0f1219f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -3879,11 +3879,9 @@ static void irte_ga_prepare(void *entry,
 			    u8 vector, u32 dest_apicid, int devid)
 {
 	struct irte_ga *irte = (struct irte_ga *) entry;
-	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
 	irte->lo.val                      = 0;
 	irte->hi.val                      = 0;
-	irte->lo.fields_remap.guest_mode  = dev_data ? dev_data->use_vapic : 0;
 	irte->lo.fields_remap.int_type    = delivery_mode;
 	irte->lo.fields_remap.dm          = dest_mode;
 	irte->hi.fields.vector            = vector;
@@ -3939,10 +3937,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
 	struct irte_ga *irte = (struct irte_ga *) entry;
 	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-	if (!dev_data || !dev_data->use_vapic) {
+	if (!dev_data || !dev_data->use_vapic ||
+	    !irte->lo.fields_remap.guest_mode) {
 		irte->hi.fields.vector = vector;
 		irte->lo.fields_remap.destination = dest_apicid;
-		irte->lo.fields_remap.guest_mode = 0;
 		modify_irte_ga(devid, index, irte, NULL);
 	}
 }

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index cbf7763..c8b0329 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c

@@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu);
  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
  * "Remapping Hardware Unit Hot Plug".
  */
-static u8 dmar_hp_uuid[] = {
-	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
-	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
-};
+static guid_t dmar_hp_guid =
+	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
+		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
 
 /*
  * Currently there's only one revision and BIOS will not check the revision id,
@@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = {
 
 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
 {
-	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
 }
 
 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
@@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
 	if (!dmar_detect_dsm(handle, func))
 		return 0;
 
-	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
 				      func, NULL, ACPI_TYPE_BUFFER);
 	if (!obj)
 		return -ENODEV;

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fc2765c..8500ded 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -4315,7 +4315,7 @@ int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
-	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
+	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
 		return 0;
 
 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
@@ -4565,7 +4565,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 	struct acpi_dmar_atsr *atsr;
 	struct acpi_dmar_reserved_memory *rmrr;
 
-	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
+	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
 		return 0;
 
 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 9f44ee8..8cb6082 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c

@@ -103,7 +103,7 @@ static bool of_iommu_driver_present(struct device_node *np)
 	 * it never will be. We don't want to defer indefinitely, nor attempt
 	 * to dereference __iommu_of_table after it's been freed.
 	 */
-	if (system_state > SYSTEM_BOOTING)
+	if (system_state >= SYSTEM_RUNNING)
 		return false;
 
 	return of_match_node(&__iommu_of_table, np);
@@ -118,6 +118,7 @@ static const struct iommu_ops
 
 	ops = iommu_ops_from_fwnode(fwnode);
 	if ((ops && !ops->of_xlate) ||
+	    !of_device_is_available(iommu_spec->np) ||
 	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
 		return NULL;
 
@@ -236,6 +237,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 			ops = ERR_PTR(err);
 	}
 
+	/* Ignore all other errors apart from EPROBE_DEFER */
+	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
+		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+		ops = NULL;
+	}
+
 	return ops;
 }
 

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index eb7fbe1..929f855 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c

@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
 {
 	unsigned int hi, hi2, lo;
 
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
 	return bits;
 }
 
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
 {
 	if (mips_cm_is64) {
 		gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
 	}
 }
 
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
 {
 	unsigned long flags;
 

diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c
index bb3ac5f..72a391e 100644
--- a/drivers/irqchip/irq-xtensa-mx.c
+++ b/drivers/irqchip/irq-xtensa-mx.c

@@ -142,7 +142,7 @@ static struct irq_chip xtensa_mx_irq_chip = {
 int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
 {
 	struct irq_domain *root_domain =
-		irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+		irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
 				&xtensa_mx_irq_domain_ops,
 				&xtensa_mx_irq_chip);
 	irq_set_default_host(root_domain);

diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c
index 472ae17..f728755 100644
--- a/drivers/irqchip/irq-xtensa-pic.c
+++ b/drivers/irqchip/irq-xtensa-pic.c

@@ -89,7 +89,7 @@ static struct irq_chip xtensa_irq_chip = {
 int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent)
 {
 	struct irq_domain *root_domain =
-		irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+		irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
 				&xtensa_irq_domain_ops, &xtensa_irq_chip);
 	irq_set_default_host(root_domain);
 	return 0;

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index d07dd51..8aa158a 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c

@@ -2364,7 +2364,7 @@ static struct ippp_ccp_reset_state *isdn_ppp_ccp_reset_alloc_state(struct ippp_s
 		       id);
 		return NULL;
 	} else {
-		rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_KERNEL);
+		rs = kzalloc(sizeof(struct ippp_ccp_reset_state), GFP_ATOMIC);
 		if (!rs)
 			return NULL;
 		rs->state = CCPResetIdle;

diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 8b7faea..422dced 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c

@@ -75,7 +75,7 @@ send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
 		if (sk->sk_state != MISDN_BOUND)
 			continue;
 		if (!cskb)
-			cskb = skb_copy(skb, GFP_KERNEL);
+			cskb = skb_copy(skb, GFP_ATOMIC);
 		if (!cskb) {
 			printk(KERN_WARNING "%s no skb\n", __func__);
 			break;

diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c
index 1548259..2cfd938 100644
--- a/drivers/leds/leds-bcm6328.c
+++ b/drivers/leds/leds-bcm6328.c

@@ -242,7 +242,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
 		spin_lock_irqsave(lock, flags);
 		val = bcm6328_led_read(addr);
-		val |= (BIT(reg) << (((sel % 4) * 4) + 16));
+		val |= (BIT(reg % 4) << (((sel % 4) * 4) + 16));
 		bcm6328_led_write(addr, val);
 		spin_unlock_irqrestore(lock, flags);
 	}
@@ -269,7 +269,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
 		spin_lock_irqsave(lock, flags);
 		val = bcm6328_led_read(addr);
-		val |= (BIT(reg) << ((sel % 4) * 4));
+		val |= (BIT(reg % 4) << ((sel % 4) * 4));
 		bcm6328_led_write(addr, val);
 		spin_unlock_irqrestore(lock, flags);
 	}

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 78a7ce8..9a87311 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c

@@ -285,7 +285,7 @@ static int pca955x_probe(struct i2c_client *client,
 			"slave address 0x%02x\n",
 			client->name, chip->bits, client->addr);
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
 	if (pdata) {

diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index afa3b40..e95ea65 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c

@@ -20,7 +20,6 @@
 #include <linux/sched/loadavg.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
-#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -163,30 +162,6 @@ static struct led_trigger heartbeat_led_trigger = {
 	.deactivate = heartbeat_trig_deactivate,
 };
 
-static int heartbeat_pm_notifier(struct notifier_block *nb,
-				 unsigned long pm_event, void *unused)
-{
-	int rc;
-
-	switch (pm_event) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-	case PM_RESTORE_PREPARE:
-		led_trigger_unregister(&heartbeat_led_trigger);
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-	case PM_POST_RESTORE:
-		rc = led_trigger_register(&heartbeat_led_trigger);
-		if (rc)
-			pr_err("could not re-register heartbeat trigger\n");
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
 				     unsigned long code, void *unused)
 {
@@ -201,10 +176,6 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block heartbeat_pm_nb = {
-	.notifier_call = heartbeat_pm_notifier,
-};
-
 static struct notifier_block heartbeat_reboot_nb = {
 	.notifier_call = heartbeat_reboot_notifier,
 };
@@ -221,14 +192,12 @@ static int __init heartbeat_trig_init(void)
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &heartbeat_panic_nb);
 		register_reboot_notifier(&heartbeat_reboot_nb);
-		register_pm_notifier(&heartbeat_pm_nb);
 	}
 	return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
-	unregister_pm_notifier(&heartbeat_pm_nb);
 	unregister_reboot_notifier(&heartbeat_reboot_nb);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &heartbeat_panic_nb);

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa60..ddae430 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c

@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	}
 	mutex_unlock(&dev->mlock);
 
-	if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
-		return -ENOMEM;
+	ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+	if (ret)
+		return ret;
 
 	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
 	if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
 int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
+	int ret;
 
 	if (!dev->ops->submit_io)
 		return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	nvm_rq_tgt_to_dev(tgt_dev, rqd);
 
 	rqd->dev = tgt_dev;
-	return dev->ops->submit_io(dev, rqd);
+
+	/* In case of error, fail with right address format */
+	ret = dev->ops->submit_io(dev, rqd);
+	if (ret)
+		nvm_rq_dev_to_tgt(tgt_dev, rqd);
+	return ret;
 }
 EXPORT_SYMBOL(nvm_submit_io);
 

diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea8..024a8fc 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c

@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
 	 */
 retry:
 	ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
-	if (ret == NVM_IO_REQUEUE) {
+	switch (ret) {
+	case NVM_IO_REQUEUE:
 		io_schedule();
 		goto retry;
+	case NVM_IO_ERR:
+		pblk_pipeline_stop(pblk);
+		goto out;
 	}
 
 	if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
 	atomic_long_add(nr_entries, &pblk->req_writes);
 #endif
 
+	pblk_rl_inserted(&pblk->rl, nr_entries);
+
 out:
 	pblk_write_should_kick(pblk);
 	return ret;

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768..11fe0c5 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c

@@ -17,7 +17,6 @@
  */
 
 #include "pblk.h"
-#include <linux/time.h>
 
 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
 			 struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
 		pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
 							line->id, pos);
 
-	pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+	pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
 }
 
 static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
 		*ppa = rqd->ppa_addr;
 		pblk_mark_bb(pblk, line, ppa);
 	}
+
+	atomic_dec(&pblk->inflight_io);
 }
 
 /* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 
-	up(&pblk->erase_sem);
 	__pblk_end_io_erase(pblk, rqd);
-	mempool_free(rqd, pblk->r_rq_pool);
+	mempool_free(rqd, pblk->g_rq_pool);
 }
 
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
-				  u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+			   u64 paddr)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
 		spin_unlock(&line->lock);
 		return;
 	}
-	line->vsc--;
+	le32_add_cpu(line->vsc, -1);
 
 	if (line->state == PBLK_LINESTATE_CLOSED)
 		move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
 	__pblk_map_invalidate(pblk, line, paddr);
 }
 
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-			     u64 paddr)
-{
-	__pblk_map_invalidate(pblk, line, paddr);
-
-	pblk_rb_sync_init(&pblk->rwb, NULL);
-	line->left_ssecs--;
-	if (!line->left_ssecs)
-		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-	pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
 static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
 				  unsigned int nr_secs)
 {
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
 		pool = pblk->w_rq_pool;
 		rq_size = pblk_w_rq_size;
 	} else {
-		pool = pblk->r_rq_pool;
-		rq_size = pblk_r_rq_size;
+		pool = pblk->g_rq_pool;
+		rq_size = pblk_g_rq_size;
 	}
 
 	rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
 	if (rw == WRITE)
 		pool = pblk->w_rq_pool;
 	else
-		pool = pblk->r_rq_pool;
+		pool = pblk->g_rq_pool;
 
 	mempool_free(rqd, pool);
 }
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
 	complete(waiting);
 }
 
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
 {
-	struct bio *bio;
-	int ret;
-	DECLARE_COMPLETION_ONSTACK(wait);
+	do {
+		if (!atomic_read(&pblk->inflight_io))
+			break;
 
-	bio = bio_alloc(GFP_KERNEL, 1);
-	if (!bio)
-		return;
+		schedule();
+	} while (1);
+}
 
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
-	bio->bi_private = &wait;
-	bio->bi_end_io = pblk_end_bio_sync;
+static void pblk_flush_writer(struct pblk *pblk)
+{
+	pblk_rb_flush(&pblk->rwb);
+	do {
+		if (!pblk_rb_sync_count(&pblk->rwb))
+			break;
 
-	ret = pblk_write_to_cache(pblk, bio, 0);
-	if (ret == NVM_IO_OK) {
-		if (!wait_for_completion_io_timeout(&wait,
-				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-			pr_err("pblk: flush cache timed out\n");
-		}
-	} else if (ret != NVM_IO_DONE) {
-		pr_err("pblk: tear down bio failed\n");
-	}
-
-	if (bio->bi_error)
-		pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
-
-	bio_put(bio);
+		pblk_write_kick(pblk);
+		schedule();
+	} while (1);
 }
 
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list = NULL;
+	int vsc = le32_to_cpu(*line->vsc);
 
-	if (!line->vsc) {
+	lockdep_assert_held(&line->lock);
+
+	if (!vsc) {
 		if (line->gc_group != PBLK_LINEGC_FULL) {
 			line->gc_group = PBLK_LINEGC_FULL;
 			move_list = &l_mg->gc_full_list;
 		}
-	} else if (line->vsc < lm->mid_thrs) {
+	} else if (vsc < lm->high_thrs) {
 		if (line->gc_group != PBLK_LINEGC_HIGH) {
 			line->gc_group = PBLK_LINEGC_HIGH;
 			move_list = &l_mg->gc_high_list;
 		}
-	} else if (line->vsc < lm->high_thrs) {
+	} else if (vsc < lm->mid_thrs) {
 		if (line->gc_group != PBLK_LINEGC_MID) {
 			line->gc_group = PBLK_LINEGC_MID;
 			move_list = &l_mg->gc_mid_list;
 		}
-	} else if (line->vsc < line->sec_in_line) {
+	} else if (vsc < line->sec_in_line) {
 		if (line->gc_group != PBLK_LINEGC_LOW) {
 			line->gc_group = PBLK_LINEGC_LOW;
 			move_list = &l_mg->gc_low_list;
 		}
-	} else if (line->vsc == line->sec_in_line) {
+	} else if (vsc == line->sec_in_line) {
 		if (line->gc_group != PBLK_LINEGC_EMPTY) {
 			line->gc_group = PBLK_LINEGC_EMPTY;
 			move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 		line->gc_group = PBLK_LINEGC_NONE;
 		move_list =  &l_mg->corrupt_list;
 		pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
-						line->id, line->vsc,
+						line->id, vsc,
 						line->sec_in_line,
 						lm->high_thrs, lm->mid_thrs);
 	}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
 #endif
 }
 
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+	pblk->sec_per_write = sec_per_write;
+}
+
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 		}
 	}
 #endif
+
+	atomic_inc(&pblk->inflight_io);
+
 	return nvm_submit_io(dev, rqd);
 }
 
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      unsigned int nr_secs, unsigned int len,
-			      gfp_t gfp_mask)
+			      int alloc_type, gfp_t gfp_mask)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	void *kaddr = data;
 	struct page *page;
 	struct bio *bio;
 	int i, ret;
 
-	if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+	if (alloc_type == PBLK_KMALLOC_META)
 		return bio_map_kern(dev->q, kaddr, len, gfp_mask);
 
 	bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush)
 {
-	int max = pblk->max_write_pgs;
+	int max = pblk->sec_per_write;
 	int min = pblk->min_write_pgs;
 	int secs_to_sync = 0;
 
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 	return secs_to_sync;
 }
 
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
-			     int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
 {
 	u64 addr;
 	int i;
 
+	addr = find_next_zero_bit(line->map_bitmap,
+					pblk->lm.sec_per_line, line->cur_sec);
+	line->cur_sec = addr - nr_secs;
+
+	for (i = 0; i < nr_secs; i++, line->cur_sec--)
+		WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+	u64 addr;
+	int i;
+
+	lockdep_assert_held(&line->lock);
+
 	/* logic error: ppa out-of-bounds. Prevent generating bad address */
 	if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
 		WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
 	return addr;
 }
 
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+	u64 paddr;
+
+	spin_lock(&line->lock);
+	paddr = find_next_zero_bit(line->map_bitmap,
+					pblk->lm.sec_per_line, line->cur_sec);
+	spin_unlock(&line->lock);
+
+	return paddr;
+}
+
 /*
  * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
  * taking the per LUN semaphore.
  */
 static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
-				     u64 paddr, int dir)
+				     void *emeta_buf, u64 paddr, int dir)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
+	void *ppa_list, *meta_list;
 	struct bio *bio;
 	struct nvm_rq rqd;
-	struct ppa_addr *ppa_list;
-	dma_addr_t dma_ppa_list;
-	void *emeta = line->emeta;
+	dma_addr_t dma_ppa_list, dma_meta_list;
 	int min = pblk->min_write_pgs;
-	int left_ppas = lm->emeta_sec;
+	int left_ppas = lm->emeta_sec[0];
 	int id = line->id;
 	int rq_ppas, rq_len;
 	int cmd_op, bio_op;
-	int flags;
 	int i, j;
 	int ret;
 	DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 	if (dir == WRITE) {
 		bio_op = REQ_OP_WRITE;
 		cmd_op = NVM_OP_PWRITE;
-		flags = pblk_set_progr_mode(pblk, WRITE);
 	} else if (dir == READ) {
 		bio_op = REQ_OP_READ;
 		cmd_op = NVM_OP_PREAD;
-		flags = pblk_set_read_mode(pblk);
 	} else
 		return -EINVAL;
 
-	ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
-	if (!ppa_list)
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&dma_meta_list);
+	if (!meta_list)
 		return -ENOMEM;
 
+	ppa_list = meta_list + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
 next_rq:
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	rq_len = rq_ppas * geo->sec_size;
 
-	bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+					l_mg->emeta_alloc_type, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
 		goto free_rqd_dma;
@@ -584,27 +601,38 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 	bio_set_op_attrs(bio, bio_op, 0);
 
 	rqd.bio = bio;
-	rqd.opcode = cmd_op;
-	rqd.flags = flags;
-	rqd.nr_ppas = rq_ppas;
+	rqd.meta_list = meta_list;
 	rqd.ppa_list = ppa_list;
+	rqd.dma_meta_list = dma_meta_list;
 	rqd.dma_ppa_list = dma_ppa_list;
+	rqd.opcode = cmd_op;
+	rqd.nr_ppas = rq_ppas;
 	rqd.end_io = pblk_end_io_sync;
 	rqd.private = &wait;
 
 	if (dir == WRITE) {
+		struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+		rqd.flags = pblk_set_progr_mode(pblk, WRITE);
 		for (i = 0; i < rqd.nr_ppas; ) {
 			spin_lock(&line->lock);
 			paddr = __pblk_alloc_page(pblk, line, min);
 			spin_unlock(&line->lock);
-			for (j = 0; j < min; j++, i++, paddr++)
+			for (j = 0; j < min; j++, i++, paddr++) {
+				meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
 				rqd.ppa_list[i] =
 					addr_to_gen_ppa(pblk, paddr, id);
+			}
 		}
 	} else {
 		for (i = 0; i < rqd.nr_ppas; ) {
 			struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
 			int pos = pblk_dev_ppa_to_pos(geo, ppa);
+			int read_type = PBLK_READ_RANDOM;
+
+			if (pblk_io_aligned(pblk, rq_ppas))
+				read_type = PBLK_READ_SEQUENTIAL;
+			rqd.flags = pblk_set_read_mode(pblk, read_type);
 
 			while (test_bit(pos, line->blk_bitmap)) {
 				paddr += min;
@@ -645,9 +673,11 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: emeta I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
-	bio_put(bio);
+	if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+		bio_put(bio);
 
 	if (rqd.error) {
 		if (dir == WRITE)
@@ -656,12 +686,12 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
 			pblk_log_read_err(pblk, &rqd);
 	}
 
-	emeta += rq_len;
+	emeta_buf += rq_len;
 	left_ppas -= rq_ppas;
 	if (left_ppas)
 		goto next_rq;
 free_rqd_dma:
-	nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return ret;
 }
 
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 		bio_op = REQ_OP_WRITE;
 		cmd_op = NVM_OP_PWRITE;
 		flags = pblk_set_progr_mode(pblk, WRITE);
-		lba_list = pblk_line_emeta_to_lbas(line->emeta);
+		lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	} else if (dir == READ) {
 		bio_op = REQ_OP_READ;
 		cmd_op = NVM_OP_PREAD;
-		flags = pblk_set_read_mode(pblk);
+		flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 	} else
 		return -EINVAL;
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
-	rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-							&rqd.dma_ppa_list);
-	if (!rqd.ppa_list)
+	rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_meta_list);
+	if (!rqd.meta_list)
 		return -ENOMEM;
 
+	rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+	rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
 	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	rqd.private = &wait;
 
 	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+		struct pblk_sec_meta *meta_list = rqd.meta_list;
+
 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-		if (dir == WRITE)
-			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+		if (dir == WRITE) {
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+			meta_list[i].lba = lba_list[paddr] = addr_empty;
+		}
 	}
 
 	/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: smeta I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
 		if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	}
 
 free_ppa_list:
-	nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 
 	return ret;
 }
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
 	return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
 }
 
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+			 void *emeta_buf)
 {
-	return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+	return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+						line->emeta_ssec, READ);
 }
 
 static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
 {
 	struct nvm_rq rqd;
-	int ret;
+	int ret = 0;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
 	rqd.private = pblk;
 	__pblk_end_io_erase(pblk, &rqd);
 
-	return 0;
+	return ret;
 }
 
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct ppa_addr ppa;
-	int bit = -1;
+	int ret, bit = -1;
 
 	/* Erase only good blocks, one at a time */
 	do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
 		WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
 		spin_unlock(&line->lock);
 
-		if (pblk_blk_erase_sync(pblk, ppa)) {
+		ret = pblk_blk_erase_sync(pblk, ppa);
+		if (ret) {
 			pr_err("pblk: failed to erase line %d\n", line->id);
-			return -ENOMEM;
+			return ret;
 		}
 	} while (1);
 
 	return 0;
 }
 
+static void pblk_line_setup_metadata(struct pblk_line *line,
+				     struct pblk_line_mgmt *l_mg,
+				     struct pblk_line_meta *lm)
+{
+	int meta_line;
+
+	lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+	if (meta_line == PBLK_DATA_LINES) {
+		spin_unlock(&l_mg->free_lock);
+		io_schedule();
+		spin_lock(&l_mg->free_lock);
+		goto retry_meta;
+	}
+
+	set_bit(meta_line, &l_mg->meta_bitmap);
+	line->meta_line = meta_line;
+
+	line->smeta = l_mg->sline_meta[meta_line];
+	line->emeta = l_mg->eline_meta[meta_line];
+
+	memset(line->smeta, 0, lm->smeta_len);
+	memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+	line->emeta->mem = 0;
+	atomic_set(&line->emeta->sync, 0);
+}
+
 /* For now lines are always assumed full lines. Thus, smeta former and current
  * lun bitmaps are omitted.
  */
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
 				  struct pblk_line *cur)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct line_smeta *smeta = line->smeta;
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
+	struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
 	int nr_blk_line;
 
 	/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
 	}
 
 	/* Run-time metadata */
-	line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+	line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
 
 	/* Mark LUNs allocated in this line (all for now) */
 	bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
 
-	smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
-	memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
-	smeta->header.id = cpu_to_le32(line->id);
-	smeta->header.type = cpu_to_le16(line->type);
-	smeta->header.version = cpu_to_le16(1);
+	smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+	memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+	smeta_buf->header.id = cpu_to_le32(line->id);
+	smeta_buf->header.type = cpu_to_le16(line->type);
+	smeta_buf->header.version = cpu_to_le16(1);
 
 	/* Start metadata */
-	smeta->seq_nr = cpu_to_le64(line->seq_nr);
-	smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+	smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+	smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
 
 	/* Fill metadata among lines */
 	if (cur) {
 		memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
-		smeta->prev_id = cpu_to_le32(cur->id);
-		cur->emeta->next_id = cpu_to_le32(line->id);
+		smeta_buf->prev_id = cpu_to_le32(cur->id);
+		cur->emeta->buf->next_id = cpu_to_le32(line->id);
 	} else {
-		smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+		smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
 	}
 
 	/* All smeta must be set at this point */
-	smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
-	smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+	smeta_buf->header.crc = cpu_to_le32(
+			pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+	smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
 
 	/* End metadata */
-	memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
-	emeta->seq_nr = cpu_to_le64(line->seq_nr);
-	emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
-	emeta->nr_valid_lbas = cpu_to_le64(0);
-	emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
-	emeta->crc = cpu_to_le32(0);
-	emeta->prev_id = smeta->prev_id;
+	memcpy(&emeta_buf->header, &smeta_buf->header,
+						sizeof(struct line_header));
+	emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+	emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+	emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+	emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+	emeta_buf->crc = cpu_to_le32(0);
+	emeta_buf->prev_id = smeta_buf->prev_id;
 
 	return 1;
 }
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 	/* Mark smeta metadata sectors as bad sectors */
 	bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
 	off = bit * geo->sec_per_pl;
-retry_smeta:
 	bitmap_set(line->map_bitmap, off, lm->smeta_sec);
 	line->sec_in_line -= lm->smeta_sec;
 	line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 
 	if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
 		pr_debug("pblk: line smeta I/O failed. Retry\n");
-		off += geo->sec_per_pl;
-		goto retry_smeta;
+		return 1;
 	}
 
 	bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 	 * blocks to make sure that there are enough sectors to store emeta
 	 */
 	bit = lm->sec_per_line;
-	off = lm->sec_per_line - lm->emeta_sec;
-	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+	off = lm->sec_per_line - lm->emeta_sec[0];
+	bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
 	while (nr_bb) {
 		off -= geo->sec_per_pl;
 		if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
 		}
 	}
 
-	line->sec_in_line -= lm->emeta_sec;
+	line->sec_in_line -= lm->emeta_sec[0];
 	line->emeta_ssec = off;
-	line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+	line->nr_valid_lbas = 0;
+	line->left_msecs = line->sec_in_line;
+	*line->vsc = cpu_to_le32(line->sec_in_line);
 
 	if (lm->sec_per_line - line->sec_in_line !=
 		bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
 
 	spin_lock(&line->lock);
 	if (line->state != PBLK_LINESTATE_FREE) {
+		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+		mempool_free(line->map_bitmap, pblk->line_meta_pool);
 		spin_unlock(&line->lock);
-		WARN(1, "pblk: corrupted line state\n");
-		return -EINTR;
+		WARN(1, "pblk: corrupted line %d, state %d\n",
+							line->id, line->state);
+		return -EAGAIN;
 	}
+
 	line->state = PBLK_LINESTATE_OPEN;
 
 	atomic_set(&line->left_eblks, blk_in_line);
 	atomic_set(&line->left_seblks, blk_in_line);
+
+	line->meta_distance = lm->meta_distance;
 	spin_unlock(&line->lock);
 
 	/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line *line = NULL;
-	int bit;
+	struct pblk_line *line;
+	int ret, bit;
 
 	lockdep_assert_held(&l_mg->free_lock);
 
-retry_get:
+retry:
 	if (list_empty(&l_mg->free_list)) {
 		pr_err("pblk: no free lines\n");
-		goto out;
+		return NULL;
 	}
 
 	line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
 		list_add_tail(&line->list, &l_mg->bad_list);
 
 		pr_debug("pblk: line %d is bad\n", line->id);
-		goto retry_get;
+		goto retry;
 	}
 
-	if (pblk_line_prepare(pblk, line)) {
-		pr_err("pblk: failed to prepare line %d\n", line->id);
-		list_add(&line->list, &l_mg->free_list);
-		return NULL;
+	ret = pblk_line_prepare(pblk, line);
+	if (ret) {
+		if (ret == -EAGAIN) {
+			list_add(&line->list, &l_mg->corrupt_list);
+			goto retry;
+		} else {
+			pr_err("pblk: failed to prepare line %d\n", line->id);
+			list_add(&line->list, &l_mg->free_list);
+			l_mg->nr_free_lines++;
+			return NULL;
+		}
 	}
 
-out:
 	return line;
 }
 
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *retry_line;
 
+retry:
 	spin_lock(&l_mg->free_lock);
 	retry_line = pblk_line_get(pblk);
 	if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
 	l_mg->data_line = retry_line;
 	spin_unlock(&l_mg->free_lock);
 
-	if (pblk_line_erase(pblk, retry_line)) {
-		spin_lock(&l_mg->free_lock);
-		l_mg->data_line = NULL;
-		spin_unlock(&l_mg->free_lock);
-		return NULL;
-	}
-
 	pblk_rl_free_lines_dec(&pblk->rl, retry_line);
 
+	if (pblk_line_erase(pblk, retry_line))
+		goto retry;
+
 	return retry_line;
 }
 
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+	struct pblk_rl *rl = &pblk->rl;
+
+	atomic_set(&rl->rb_space, 0);
+}
+
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line;
-	int meta_line;
 	int is_next = 0;
 
 	spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 	line->type = PBLK_LINETYPE_DATA;
 	l_mg->data_line = line;
 
-	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-	set_bit(meta_line, &l_mg->meta_bitmap);
-	line->smeta = l_mg->sline_meta[meta_line].meta;
-	line->emeta = l_mg->eline_meta[meta_line].meta;
-	line->meta_line = meta_line;
+	pblk_line_setup_metadata(line, l_mg, &pblk->lm);
 
 	/* Allocate next line for preparation */
 	l_mg->data_next = pblk_line_get(pblk);
-	if (l_mg->data_next) {
+	if (!l_mg->data_next) {
+		/* If we cannot get a new line, we need to stop the pipeline.
+		 * Only allow as many writes in as we can store safely and then
+		 * fail gracefully
+		 */
+		pblk_set_space_limit(pblk);
+
+		l_mg->data_next = NULL;
+	} else {
 		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
 		l_mg->data_next->type = PBLK_LINETYPE_DATA;
 		is_next = 1;
 	}
 	spin_unlock(&l_mg->free_lock);
 
+	if (pblk_line_erase(pblk, line)) {
+		line = pblk_line_retry(pblk, line);
+		if (!line)
+			return NULL;
+	}
+
 	pblk_rl_free_lines_dec(&pblk->rl, line);
 	if (is_next)
 		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 
-	if (pblk_line_erase(pblk, line))
-		return NULL;
-
 retry_setup:
-	if (!pblk_line_set_metadata(pblk, line, NULL)) {
+	if (!pblk_line_init_metadata(pblk, line, NULL)) {
 		line = pblk_line_retry(pblk, line);
 		if (!line)
 			return NULL;
@@ -1222,69 +1320,89 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 	return line;
 }
 
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
 {
-	struct pblk_line_meta *lm = &pblk->lm;
+	lockdep_assert_held(&pblk->l_mg.free_lock);
+
+	pblk_set_space_limit(pblk);
+	pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	int ret;
+
+	spin_lock(&l_mg->free_lock);
+	if (pblk->state == PBLK_STATE_RECOVERING ||
+					pblk->state == PBLK_STATE_STOPPED) {
+		spin_unlock(&l_mg->free_lock);
+		return;
+	}
+	pblk->state = PBLK_STATE_RECOVERING;
+	spin_unlock(&l_mg->free_lock);
+
+	pblk_flush_writer(pblk);
+	pblk_wait_for_meta(pblk);
+
+	ret = pblk_recov_pad(pblk);
+	if (ret) {
+		pr_err("pblk: could not close data on teardown(%d)\n", ret);
+		return;
+	}
+
+	flush_workqueue(pblk->bb_wq);
+	pblk_line_close_meta_sync(pblk);
+
+	spin_lock(&l_mg->free_lock);
+	pblk->state = PBLK_STATE_STOPPED;
+	l_mg->data_line = NULL;
+	l_mg->data_next = NULL;
+	spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
+{
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *cur, *new;
 	unsigned int left_seblks;
-	int meta_line;
 	int is_next = 0;
 
 	cur = l_mg->data_line;
 	new = l_mg->data_next;
 	if (!new)
-		return NULL;
+		return;
 	l_mg->data_line = new;
 
-retry_line:
+	spin_lock(&l_mg->free_lock);
+	if (pblk->state != PBLK_STATE_RUNNING) {
+		l_mg->data_line = NULL;
+		l_mg->data_next = NULL;
+		spin_unlock(&l_mg->free_lock);
+		return;
+	}
+
+	pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+	spin_unlock(&l_mg->free_lock);
+
+retry_erase:
 	left_seblks = atomic_read(&new->left_seblks);
 	if (left_seblks) {
 		/* If line is not fully erased, erase it */
 		if (atomic_read(&new->left_eblks)) {
 			if (pblk_line_erase(pblk, new))
-				return NULL;
+				return;
 		} else {
 			io_schedule();
 		}
-		goto retry_line;
+		goto retry_erase;
 	}
 
-	spin_lock(&l_mg->free_lock);
-	/* Allocate next line for preparation */
-	l_mg->data_next = pblk_line_get(pblk);
-	if (l_mg->data_next) {
-		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
-		l_mg->data_next->type = PBLK_LINETYPE_DATA;
-		is_next = 1;
-	}
-
-retry_meta:
-	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-	if (meta_line == PBLK_DATA_LINES) {
-		spin_unlock(&l_mg->free_lock);
-		io_schedule();
-		spin_lock(&l_mg->free_lock);
-		goto retry_meta;
-	}
-
-	set_bit(meta_line, &l_mg->meta_bitmap);
-	new->smeta = l_mg->sline_meta[meta_line].meta;
-	new->emeta = l_mg->eline_meta[meta_line].meta;
-	new->meta_line = meta_line;
-
-	memset(new->smeta, 0, lm->smeta_len);
-	memset(new->emeta, 0, lm->emeta_len);
-	spin_unlock(&l_mg->free_lock);
-
-	if (is_next)
-		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
 retry_setup:
-	if (!pblk_line_set_metadata(pblk, new, cur)) {
+	if (!pblk_line_init_metadata(pblk, new, cur)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return NULL;
+			return;
 
 		goto retry_setup;
 	}
@@ -1292,12 +1410,30 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 	if (!pblk_line_init_bb(pblk, new, 1)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return NULL;
+			return;
 
 		goto retry_setup;
 	}
 
-	return new;
+	/* Allocate next line for preparation */
+	spin_lock(&l_mg->free_lock);
+	l_mg->data_next = pblk_line_get(pblk);
+	if (!l_mg->data_next) {
+		/* If we cannot get a new line, we need to stop the pipeline.
+		 * Only allow as many writes in as we can store safely and then
+		 * fail gracefully
+		 */
+		pblk_stop_writes(pblk, new);
+		l_mg->data_next = NULL;
+	} else {
+		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+		l_mg->data_next->type = PBLK_LINETYPE_DATA;
+		is_next = 1;
+	}
+	spin_unlock(&l_mg->free_lock);
+
+	if (is_next)
+		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 }
 
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
 	if (line->invalid_bitmap)
 		mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
 
+	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
 	line->map_bitmap = NULL;
 	line->invalid_bitmap = NULL;
 	line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
 	struct nvm_rq *rqd;
 	int err;
 
-	rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
-	memset(rqd, 0, pblk_r_rq_size);
+	rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	pblk_setup_e_rq(pblk, rqd, ppa);
 
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
 	return pblk->l_mg.data_line;
 }
 
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
 {
 	return pblk->l_mg.data_next;
 }
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
 	return (line->left_msecs == 0);
 }
 
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line *line, *tline;
+	LIST_HEAD(list);
+
+	spin_lock(&l_mg->close_lock);
+	if (list_empty(&l_mg->emeta_list)) {
+		spin_unlock(&l_mg->close_lock);
+		return;
+	}
+
+	list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+	spin_unlock(&l_mg->close_lock);
+
+	list_for_each_entry_safe(line, tline, &list, list) {
+		struct pblk_emeta *emeta = line->emeta;
+
+		while (emeta->mem < lm->emeta_len[0]) {
+			int ret;
+
+			ret = pblk_submit_meta_io(pblk, line);
+			if (ret) {
+				pr_err("pblk: sync meta line %d failed (%d)\n",
+							line->id, ret);
+				return;
+			}
+		}
+	}
+
+	pblk_wait_for_meta(pblk);
+	flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+	if (pblk_rl_is_limit(&pblk->rl))
+		pblk_line_close_meta_sync(pblk);
+}
+
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list;
 
-	line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
+#ifdef CONFIG_NVM_DEBUG
+	struct pblk_line_meta *lm = &pblk->lm;
 
-	if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
-		pr_err("pblk: line %d close I/O failed\n", line->id);
-
-	WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+	WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
 				"pblk: corrupt closed line %d\n", line->id);
+#endif
 
 	spin_lock(&l_mg->free_lock);
 	WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 
 	spin_unlock(&line->lock);
 	spin_unlock(&l_mg->gc_lock);
+
+	pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
+
+	/* No need for exact vsc value; avoid a big line lock and take aprox. */
+	memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+	memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+	emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+	emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+	spin_lock(&l_mg->close_lock);
+	spin_lock(&line->lock);
+	list_add_tail(&line->list, &l_mg->emeta_list);
+	spin_unlock(&line->lock);
+	spin_unlock(&l_mg->close_lock);
+
+	pblk_line_should_sync_meta(pblk);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
 }
 
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-		      void (*work)(struct work_struct *))
+		      void (*work)(struct work_struct *),
+		      struct workqueue_struct *wq)
 {
 	struct pblk_line_ws *line_ws;
 
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 	line_ws->priv = priv;
 
 	INIT_WORK(&line_ws->ws, work);
-	queue_work(pblk->kw_wq, &line_ws->ws);
+	queue_work(wq, &line_ws->ws);
 }
 
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_lun *rlun;
-	int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
 	int ret;
 
 	/*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	/* If the LUN has been locked for this same request, do no attempt to
 	 * lock it again
 	 */
-	if (test_and_set_bit(lun_id, lun_bitmap))
+	if (test_and_set_bit(pos, lun_bitmap))
 		return;
 
-	rlun = &pblk->luns[lun_id];
+	rlun = &pblk->luns[pos];
 	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
 	if (ret) {
 		switch (ret) {

diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c..6090d28 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c

@@ -20,8 +20,7 @@
 
 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
 {
-	kfree(gc_rq->data);
-	kfree(gc_rq->lba_list);
+	vfree(gc_rq->data);
 	kfree(gc_rq);
 }
 
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
 		return 1;
 	}
 
-	list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
-		list_move_tail(&gc_rq->list, &w_list);
-		gc->w_entries--;
-	}
+	list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+	gc->w_entries = 0;
 	spin_unlock(&gc->w_lock);
 
 	list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
 				gc_rq->nr_secs, gc_rq->secs_to_gc,
 				gc_rq->line, PBLK_IOTYPE_GC);
 
-		kref_put(&gc_rq->line->ref, pblk_line_put);
-
 		list_del(&gc_rq->list);
+		kref_put(&gc_rq->line->ref, pblk_line_put);
 		pblk_gc_free_gc_rq(gc_rq);
 	}
 
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
  * Responsible for managing all memory related to a gc request. Also in case of
  * failure
  */
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
-				   u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_gc *gc = &pblk->gc;
-	struct pblk_gc_rq *gc_rq;
+	struct pblk_line *line = gc_rq->line;
 	void *data;
 	unsigned int secs_to_gc;
-	int ret = NVM_IO_OK;
+	int ret = 0;
 
-	data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+	data = vmalloc(gc_rq->nr_secs * geo->sec_size);
 	if (!data) {
-		ret = NVM_IO_ERR;
-		goto free_lba_list;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	/* Read from GC victim block */
-	if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+	if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
 							&secs_to_gc, line)) {
-		ret = NVM_IO_ERR;
+		ret = -EFAULT;
 		goto free_data;
 	}
 
 	if (!secs_to_gc)
-		goto free_data;
+		goto free_rq;
 
-	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
-	if (!gc_rq) {
-		ret = NVM_IO_ERR;
-		goto free_data;
-	}
-
-	gc_rq->line = line;
 	gc_rq->data = data;
-	gc_rq->lba_list = lba_list;
-	gc_rq->nr_secs = nr_secs;
 	gc_rq->secs_to_gc = secs_to_gc;
 
-	kref_get(&line->ref);
-
 retry:
 	spin_lock(&gc->w_lock);
-	if (gc->w_entries > 256) {
+	if (gc->w_entries >= PBLK_GC_W_QD) {
 		spin_unlock(&gc->w_lock);
-		usleep_range(256, 1024);
+		pblk_gc_writer_kick(&pblk->gc);
+		usleep_range(128, 256);
 		goto retry;
 	}
 	gc->w_entries++;
@@ -120,13 +105,14 @@ static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
 
 	pblk_gc_writer_kick(&pblk->gc);
 
-	return NVM_IO_OK;
+	return 0;
 
+free_rq:
+	kfree(gc_rq);
 free_data:
-	kfree(data);
-free_lba_list:
-	kfree(lba_list);
-
+	vfree(data);
+out:
+	kref_put(&line->ref, pblk_line_put);
 	return ret;
 }
 
@@ -150,98 +136,48 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
 
 static void pblk_gc_line_ws(struct work_struct *work)
 {
+	struct pblk_line_ws *line_rq_ws = container_of(work,
+						struct pblk_line_ws, ws);
+	struct pblk *pblk = line_rq_ws->pblk;
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line *line = line_rq_ws->line;
+	struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+	up(&gc->gc_sem);
+
+	if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+		pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+						line->id, *line->vsc,
+						gc_rq->nr_secs);
+	}
+
+	mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
 	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
 									ws);
 	struct pblk *pblk = line_ws->pblk;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line = line_ws->line;
-	struct pblk_line_meta *lm = &pblk->lm;
-	__le64 *lba_list = line_ws->priv;
-	u64 *gc_list;
-	int sec_left;
-	int nr_ppas, bit;
-	int put_line = 1;
-
-	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
-
-	spin_lock(&line->lock);
-	sec_left = line->vsc;
-	if (!sec_left) {
-		/* Lines are erased before being used (l_mg->data_/log_next) */
-		spin_unlock(&line->lock);
-		goto out;
-	}
-	spin_unlock(&line->lock);
-
-	if (sec_left < 0) {
-		pr_err("pblk: corrupted GC line (%d)\n", line->id);
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
-	}
-
-	bit = -1;
-next_rq:
-	gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
-	if (!gc_list) {
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
-	}
-
-	nr_ppas = 0;
-	do {
-		bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
-								bit + 1);
-		if (bit > line->emeta_ssec)
-			break;
-
-		gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
-	} while (nr_ppas < pblk->max_write_pgs);
-
-	if (unlikely(!nr_ppas)) {
-		kfree(gc_list);
-		goto out;
-	}
-
-	if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
-		pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
-						line->id, line->vsc,
-						nr_ppas, nr_ppas);
-		put_line = 0;
-		pblk_put_line_back(pblk, line);
-		goto out;
-	}
-
-	sec_left -= nr_ppas;
-	if (sec_left > 0)
-		goto next_rq;
-
-out:
-	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-	mempool_free(line_ws, pblk->line_ws_pool);
-	atomic_dec(&pblk->gc.inflight_gc);
-	if (put_line)
-		kref_put(&line->ref, pblk_line_put);
-}
-
-static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
-{
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line_ws *line_ws;
+	struct pblk_gc *gc = &pblk->gc;
+	struct line_emeta *emeta_buf;
+	struct pblk_line_ws *line_rq_ws;
+	struct pblk_gc_rq *gc_rq;
 	__le64 *lba_list;
+	int sec_left, nr_secs, bit;
 	int ret;
 
-	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
-	line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
+	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
 								GFP_KERNEL);
-	if (!line->emeta) {
+	if (!emeta_buf) {
 		pr_err("pblk: cannot use GC emeta\n");
-		goto fail_free_ws;
+		return;
 	}
 
-	ret = pblk_line_read_emeta(pblk, line);
+	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
 	if (ret) {
 		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
 		goto fail_free_emeta;
@@ -251,39 +187,155 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
 	 * the line untouched. TODO: Implement a recovery routine that scans and
 	 * moves all sectors on the line.
 	 */
-	lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
+	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
 	if (!lba_list) {
 		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
 		goto fail_free_emeta;
 	}
 
-	line_ws->pblk = pblk;
-	line_ws->line = line;
-	line_ws->priv = lba_list;
+	sec_left = pblk_line_vsc(line);
+	if (sec_left < 0) {
+		pr_err("pblk: corrupted GC line (%d)\n", line->id);
+		goto fail_free_emeta;
+	}
 
-	INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
-	queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+	bit = -1;
+next_rq:
+	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+	if (!gc_rq)
+		goto fail_free_emeta;
 
-	return 0;
+	nr_secs = 0;
+	do {
+		bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
+								bit + 1);
+		if (bit > line->emeta_ssec)
+			break;
 
-fail_free_emeta:
-	pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
+		gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+	} while (nr_secs < pblk->max_write_pgs);
+
+	if (unlikely(!nr_secs)) {
+		kfree(gc_rq);
+		goto out;
+	}
+
+	gc_rq->nr_secs = nr_secs;
+	gc_rq->line = line;
+
+	line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+	if (!line_rq_ws)
+		goto fail_free_gc_rq;
+
+	line_rq_ws->pblk = pblk;
+	line_rq_ws->line = line;
+	line_rq_ws->priv = gc_rq;
+
+	down(&gc->gc_sem);
+	kref_get(&line->ref);
+
+	INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+	queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+	sec_left -= nr_secs;
+	if (sec_left > 0)
+		goto next_rq;
+
+out:
+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
 	mempool_free(line_ws, pblk->line_ws_pool);
-	pblk_put_line_back(pblk, line);
 
-	return 1;
+	kref_put(&line->ref, pblk_line_put);
+	atomic_dec(&gc->inflight_gc);
+
+	return;
+
+fail_free_gc_rq:
+	kfree(gc_rq);
+fail_free_emeta:
+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+	pblk_put_line_back(pblk, line);
+	kref_put(&line->ref, pblk_line_put);
+	mempool_free(line_ws, pblk->line_ws_pool);
+	atomic_dec(&gc->inflight_gc);
+
+	pr_err("pblk: Failed to GC line %d\n", line->id);
 }
 
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
 {
-	struct pblk_line *line, *tline;
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line_ws *line_ws;
 
-	list_for_each_entry_safe(line, tline, gc_list, list) {
-		if (pblk_gc_line(pblk, line))
-			pr_err("pblk: failed to GC line %d\n", line->id);
-		list_del(&line->list);
+	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+
+	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+	if (!line_ws)
+		return -ENOMEM;
+
+	line_ws->pblk = pblk;
+	line_ws->line = line;
+
+	INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+	queue_work(gc->gc_reader_wq, &line_ws->ws);
+
+	return 0;
+}
+
+static int pblk_gc_read(struct pblk *pblk)
+{
+	struct pblk_gc *gc = &pblk->gc;
+	struct pblk_line *line;
+
+	spin_lock(&gc->r_lock);
+	if (list_empty(&gc->r_list)) {
+		spin_unlock(&gc->r_lock);
+		return 1;
 	}
+
+	line = list_first_entry(&gc->r_list, struct pblk_line, list);
+	list_del(&line->list);
+	spin_unlock(&gc->r_lock);
+
+	pblk_gc_kick(pblk);
+
+	if (pblk_gc_line(pblk, line))
+		pr_err("pblk: failed to GC line %d\n", line->id);
+
+	return 0;
+}
+
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
+{
+	wake_up_process(gc->gc_reader_ts);
+}
+
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+						 struct list_head *group_list)
+{
+	struct pblk_line *line, *victim;
+	int line_vsc, victim_vsc;
+
+	victim = list_first_entry(group_list, struct pblk_line, list);
+	list_for_each_entry(line, group_list, list) {
+		line_vsc = le32_to_cpu(*line->vsc);
+		victim_vsc = le32_to_cpu(*victim->vsc);
+		if (line_vsc < victim_vsc)
+			victim = line;
+	}
+
+	return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+	unsigned int nr_blocks_free, nr_blocks_need;
+
+	nr_blocks_need = pblk_rl_high_thrs(rl);
+	nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+	/* This is not critical, no need to take lock here */
+	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
 }
 
 /*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_gc *gc = &pblk->gc;
-	struct pblk_line *line, *tline;
-	unsigned int nr_blocks_free, nr_blocks_need;
+	struct pblk_line *line;
 	struct list_head *group_list;
-	int run_gc, gc_group = 0;
-	int prev_gc = 0;
-	int inflight_gc = atomic_read(&gc->inflight_gc);
-	LIST_HEAD(gc_list);
+	bool run_gc;
+	int inflight_gc, gc_group = 0, prev_group = 0;
 
-	spin_lock(&l_mg->gc_lock);
-	list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
+	do {
+		spin_lock(&l_mg->gc_lock);
+		if (list_empty(&l_mg->gc_full_list)) {
+			spin_unlock(&l_mg->gc_lock);
+			break;
+		}
+
+		line = list_first_entry(&l_mg->gc_full_list,
+							struct pblk_line, list);
+
 		spin_lock(&line->lock);
 		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 		line->state = PBLK_LINESTATE_GC;
 		spin_unlock(&line->lock);
 
 		list_del(&line->list);
-		kref_put(&line->ref, pblk_line_put);
-	}
-	spin_unlock(&l_mg->gc_lock);
+		spin_unlock(&l_mg->gc_lock);
 
-	nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
-	nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
-	run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+		kref_put(&line->ref, pblk_line_put);
+	} while (1);
+
+	run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+	if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+		return;
 
 next_gc_group:
 	group_list = l_mg->gc_lists[gc_group++];
-	spin_lock(&l_mg->gc_lock);
-	while (run_gc && !list_empty(group_list)) {
-		/* No need to queue up more GC lines than we can handle */
-		if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+	do {
+		spin_lock(&l_mg->gc_lock);
+		if (list_empty(group_list)) {
 			spin_unlock(&l_mg->gc_lock);
-			pblk_gc_lines(pblk, &gc_list);
-			return;
+			break;
 		}
 
-		line = list_first_entry(group_list, struct pblk_line, list);
-		nr_blocks_free += atomic_read(&line->blk_in_line);
+		line = pblk_gc_get_victim_line(pblk, group_list);
 
 		spin_lock(&line->lock);
 		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
 		line->state = PBLK_LINESTATE_GC;
-		list_move_tail(&line->list, &gc_list);
-		atomic_inc(&gc->inflight_gc);
-		inflight_gc++;
 		spin_unlock(&line->lock);
 
-		prev_gc = 1;
-		run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
-	}
-	spin_unlock(&l_mg->gc_lock);
+		list_del(&line->list);
+		spin_unlock(&l_mg->gc_lock);
 
-	pblk_gc_lines(pblk, &gc_list);
+		spin_lock(&gc->r_lock);
+		list_add_tail(&line->list, &gc->r_list);
+		spin_unlock(&gc->r_lock);
 
-	if (!prev_gc && pblk->rl.rb_state > gc_group &&
-						gc_group < PBLK_NR_GC_LISTS)
+		inflight_gc = atomic_inc_return(&gc->inflight_gc);
+		pblk_gc_reader_kick(gc);
+
+		prev_group = 1;
+
+		/* No need to queue up more GC lines than we can handle */
+		run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+		if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+			break;
+	} while (1);
+
+	if (!prev_group && pblk->rl.rb_state > gc_group &&
+						gc_group < PBLK_GC_NR_LISTS)
 		goto next_gc_group;
 }
 
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
 {
 	struct pblk_gc *gc = &pblk->gc;
 
 	wake_up_process(gc->gc_ts);
 	pblk_gc_writer_kick(gc);
+	pblk_gc_reader_kick(gc);
 	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 }
 
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
 	return 0;
 }
 
+static int pblk_gc_reader_ts(void *data)
+{
+	struct pblk *pblk = data;
+
+	while (!kthread_should_stop()) {
+		if (!pblk_gc_read(pblk))
+			continue;
+		set_current_state(TASK_INTERRUPTIBLE);
+		io_schedule();
+	}
+
+	return 0;
+}
+
 static void pblk_gc_start(struct pblk *pblk)
 {
 	pblk->gc.gc_active = 1;
-
 	pr_debug("pblk: gc start\n");
 }
 
-int pblk_gc_status(struct pblk *pblk)
-{
-	struct pblk_gc *gc = &pblk->gc;
-	int ret;
-
-	spin_lock(&gc->lock);
-	ret = gc->gc_active;
-	spin_unlock(&gc->lock);
-
-	return ret;
-}
-
-static void __pblk_gc_should_start(struct pblk *pblk)
-{
-	struct pblk_gc *gc = &pblk->gc;
-
-	lockdep_assert_held(&gc->lock);
-
-	if (gc->gc_enabled && !gc->gc_active)
-		pblk_gc_start(pblk);
-}
-
 void pblk_gc_should_start(struct pblk *pblk)
 {
 	struct pblk_gc *gc = &pblk->gc;
 
-	spin_lock(&gc->lock);
-	__pblk_gc_should_start(pblk);
-	spin_unlock(&gc->lock);
+	if (gc->gc_enabled && !gc->gc_active)
+		pblk_gc_start(pblk);
+
+	pblk_gc_kick(pblk);
 }
 
 /*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
  */
 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
 {
-	spin_lock(&pblk->gc.lock);
 	pblk->gc.gc_active = 0;
-	spin_unlock(&pblk->gc.lock);
-
 	pr_debug("pblk: gc stop\n");
 }
 
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
 	spin_unlock(&gc->lock);
 }
 
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
 {
 	struct pblk_gc *gc = &pblk->gc;
-	int rsv = 0;
+
+	if (force < 0 || force > 1)
+		return -EINVAL;
 
 	spin_lock(&gc->lock);
-	if (force) {
-		gc->gc_enabled = 1;
-		rsv = 64;
-	}
-	pblk_rl_set_gc_rsc(&pblk->rl, rsv);
 	gc->gc_forced = force;
-	__pblk_gc_should_start(pblk);
+
+	if (force)
+		gc->gc_enabled = 1;
+	else
+		gc->gc_enabled = 0;
 	spin_unlock(&gc->lock);
+
+	pblk_gc_should_start(pblk);
+
+	return 0;
 }
 
 int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
 		goto fail_free_main_kthread;
 	}
 
+	gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+							"pblk-gc-reader-ts");
+	if (IS_ERR(gc->gc_reader_ts)) {
+		pr_err("pblk: could not allocate GC reader kthread\n");
+		ret = PTR_ERR(gc->gc_reader_ts);
+		goto fail_free_writer_kthread;
+	}
+
 	setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
 	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 
 	gc->gc_active = 0;
 	gc->gc_forced = 0;
 	gc->gc_enabled = 1;
-	gc->gc_jobs_active = 8;
 	gc->w_entries = 0;
 	atomic_set(&gc->inflight_gc, 0);
 
-	gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
-			WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+	/* Workqueue that reads valid sectors from a line and submit them to the
+	 * GC writer to be recycled.
+	 */
+	gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+	if (!gc->gc_line_reader_wq) {
+		pr_err("pblk: could not allocate GC line reader workqueue\n");
+		ret = -ENOMEM;
+		goto fail_free_reader_kthread;
+	}
+
+	/* Workqueue that prepare lines for GC */
+	gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
 	if (!gc->gc_reader_wq) {
 		pr_err("pblk: could not allocate GC reader workqueue\n");
 		ret = -ENOMEM;
-		goto fail_free_writer_kthread;
+		goto fail_free_reader_line_wq;
 	}
 
 	spin_lock_init(&gc->lock);
 	spin_lock_init(&gc->w_lock);
+	spin_lock_init(&gc->r_lock);
+
+	sema_init(&gc->gc_sem, 128);
+
 	INIT_LIST_HEAD(&gc->w_list);
+	INIT_LIST_HEAD(&gc->r_list);
 
 	return 0;
 
+fail_free_reader_line_wq:
+	destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+	kthread_stop(gc->gc_reader_ts);
 fail_free_writer_kthread:
 	kthread_stop(gc->gc_writer_ts);
 fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
 	struct pblk_gc *gc = &pblk->gc;
 
 	flush_workqueue(gc->gc_reader_wq);
+	flush_workqueue(gc->gc_line_reader_wq);
 
 	del_timer(&gc->gc_timer);
 	pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
 	if (gc->gc_ts)
 		kthread_stop(gc->gc_ts);
 
-	if (pblk->gc.gc_reader_wq)
-		destroy_workqueue(pblk->gc.gc_reader_wq);
+	if (gc->gc_reader_wq)
+		destroy_workqueue(gc->gc_reader_wq);
+
+	if (gc->gc_line_reader_wq)
+		destroy_workqueue(gc->gc_line_reader_wq);
 
 	if (gc->gc_writer_ts)
 		kthread_stop(gc->gc_writer_ts);
+
+	if (gc->gc_reader_ts)
+		kthread_stop(gc->gc_reader_ts);
 }

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d..1b0f612 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c

@@ -20,9 +20,10 @@
 
 #include "pblk.h"
 
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
-					*pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+				*pblk_w_rq_cache, *pblk_line_meta_cache;
 static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
 
 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 			  struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * constraint. Writes can be of arbitrary size.
 	 */
 	if (bio_data_dir(bio) == READ) {
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 		ret = pblk_submit_read(pblk, bio);
 		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
 			bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 	 * available for user I/O.
 	 */
 	if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
-		blk_queue_split(q, &bio, q->bio_split);
+		blk_queue_split(q, &bio);
 
 	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
 }
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
 		return -ENOMEM;
 	}
 
-	pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+	pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
 				0, 0, NULL);
-	if (!pblk_r_rq_cache) {
+	if (!pblk_g_rq_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
 		up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
 	if (!pblk_w_rq_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
-		kmem_cache_destroy(pblk_r_rq_cache);
+		kmem_cache_destroy(pblk_g_rq_cache);
 		up_write(&pblk_lock);
 		return -ENOMEM;
 	}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
 	if (!pblk_line_meta_cache) {
 		kmem_cache_destroy(pblk_blk_ws_cache);
 		kmem_cache_destroy(pblk_rec_cache);
-		kmem_cache_destroy(pblk_r_rq_cache);
+		kmem_cache_destroy(pblk_g_rq_cache);
 		kmem_cache_destroy(pblk_w_rq_cache);
 		up_write(&pblk_lock);
 		return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	int max_write_ppas;
-	int mod;
 
-	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
-	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
-	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
-				max_write_ppas : nvm_max_phys_sects(dev);
 	pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
 						geo->nr_planes * geo->nr_luns;
 
-	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
-		pr_err("pblk: cannot support device max_phys_sect\n");
-		return -EINVAL;
-	}
-
-	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
-	if (mod) {
-		pr_err("pblk: bad configuration of sectors/pages\n");
-		return -EINVAL;
-	}
-
 	if (pblk_init_global_caches(pblk))
 		return -ENOMEM;
 
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
 	if (!pblk->page_pool)
 		return -ENOMEM;
 
-	pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+	pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
 							pblk_blk_ws_cache);
 	if (!pblk->line_ws_pool)
 		goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
 	if (!pblk->rec_pool)
 		goto free_blk_ws_pool;
 
-	pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
-	if (!pblk->r_rq_pool)
+	pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+							pblk_g_rq_cache);
+	if (!pblk->g_rq_pool)
 		goto free_rec_pool;
 
-	pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+	pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+							pblk_w_rq_cache);
 	if (!pblk->w_rq_pool)
-		goto free_r_rq_pool;
+		goto free_g_rq_pool;
 
 	pblk->line_meta_pool =
-			mempool_create_slab_pool(16, pblk_line_meta_cache);
+			mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+							pblk_line_meta_cache);
 	if (!pblk->line_meta_pool)
 		goto free_w_rq_pool;
 
-	pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
-					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
-	if (!pblk->kw_wq)
+	pblk->close_wq = alloc_workqueue("pblk-close-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+	if (!pblk->close_wq)
 		goto free_line_meta_pool;
 
+	pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!pblk->bb_wq)
+		goto free_close_wq;
+
 	if (pblk_set_ppaf(pblk))
-		goto free_kw_wq;
+		goto free_bb_wq;
 
 	if (pblk_rwb_init(pblk))
-		goto free_kw_wq;
+		goto free_bb_wq;
 
 	INIT_LIST_HEAD(&pblk->compl_list);
 	return 0;
 
-free_kw_wq:
-	destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+	destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+	destroy_workqueue(pblk->close_wq);
 free_line_meta_pool:
 	mempool_destroy(pblk->line_meta_pool);
 free_w_rq_pool:
 	mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
-	mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+	mempool_destroy(pblk->g_rq_pool);
 free_rec_pool:
 	mempool_destroy(pblk->rec_pool);
 free_blk_ws_pool:
@@ -322,19 +316,22 @@ static int pblk_core_init(struct pblk *pblk)
 
 static void pblk_core_free(struct pblk *pblk)
 {
-	if (pblk->kw_wq)
-		destroy_workqueue(pblk->kw_wq);
+	if (pblk->close_wq)
+		destroy_workqueue(pblk->close_wq);
+
+	if (pblk->bb_wq)
+		destroy_workqueue(pblk->bb_wq);
 
 	mempool_destroy(pblk->page_pool);
 	mempool_destroy(pblk->line_ws_pool);
 	mempool_destroy(pblk->rec_pool);
-	mempool_destroy(pblk->r_rq_pool);
+	mempool_destroy(pblk->g_rq_pool);
 	mempool_destroy(pblk->w_rq_pool);
 	mempool_destroy(pblk->line_meta_pool);
 
 	kmem_cache_destroy(pblk_blk_ws_cache);
 	kmem_cache_destroy(pblk_rec_cache);
-	kmem_cache_destroy(pblk_r_rq_cache);
+	kmem_cache_destroy(pblk_g_rq_cache);
 	kmem_cache_destroy(pblk_w_rq_cache);
 	kmem_cache_destroy(pblk_line_meta_cache);
 }
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
 	kfree(pblk->luns);
 }
 
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+	kfree(line->blk_bitmap);
+	kfree(line->erase_bitmap);
+}
+
 static void pblk_lines_free(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
 		line = &pblk->lines[i];
 
 		pblk_line_free(pblk, line);
-		kfree(line->blk_bitmap);
-		kfree(line->erase_bitmap);
+		pblk_free_line_bitmaps(line);
 	}
 	spin_unlock(&l_mg->free_lock);
 }
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
 
 	kfree(l_mg->bb_template);
 	kfree(l_mg->bb_aux);
+	kfree(l_mg->vsc_list);
 
+	spin_lock(&l_mg->free_lock);
 	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+		kfree(l_mg->sline_meta[i]);
+		pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+		kfree(l_mg->eline_meta[i]);
 	}
+	spin_unlock(&l_mg->free_lock);
 
 	kfree(pblk->lines);
 }
@@ -411,13 +417,31 @@ static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
 	return ret;
 }
 
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+			int blk_per_line)
 {
-	struct pblk_line_meta *lm = &pblk->lm;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
 	struct pblk_lun *rlun;
 	int bb_cnt = 0;
 	int i;
 
+	for (i = 0; i < blk_per_line; i++) {
+		rlun = &pblk->luns[i];
+		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+			continue;
+
+		set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+		bb_cnt++;
+	}
+
+	return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+
 	line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
 	if (!line->blk_bitmap)
 		return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < lm->blk_per_line; i++) {
-		rlun = &pblk->luns[i];
-		if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
-			continue;
-
-		set_bit(i, line->blk_bitmap);
-		bb_cnt++;
-	}
-
-	return bb_cnt;
+	return 0;
 }
 
 static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
 }
 
 /* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
 {
-	return (sizeof(struct line_emeta) +
-			((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
-			(pblk->l_mg.nr_lines * sizeof(u32)) +
-			lm->blk_bitmap_len);
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	/* Round to sector size so that lba_list starts on its own sector */
+	lm->emeta_sec[1] = DIV_ROUND_UP(
+			sizeof(struct line_emeta) + lm->blk_bitmap_len,
+			geo->sec_size);
+	lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+	/* Round to sector size so that vsc_list starts on its own sector */
+	lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+	lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+			geo->sec_size);
+	lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+	lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+			geo->sec_size);
+	lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+	lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
 }
 
 static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
 }
 
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	int i;
+
+	/* smeta is always small enough to fit on a kmalloc memory allocation,
+	 * emeta depends on the number of LUNs allocated to the pblk instance
+	 */
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+		if (!l_mg->sline_meta[i])
+			goto fail_free_smeta;
+	}
+
+	/* emeta allocates three different buffers for managing metadata with
+	 * in-memory and in-media layouts
+	 */
+	for (i = 0; i < PBLK_DATA_LINES; i++) {
+		struct pblk_emeta *emeta;
+
+		emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+		if (!emeta)
+			goto fail_free_emeta;
+
+		if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+			l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+			emeta->buf = vmalloc(lm->emeta_len[0]);
+			if (!emeta->buf) {
+				kfree(emeta);
+				goto fail_free_emeta;
+			}
+
+			emeta->nr_entries = lm->emeta_sec[0];
+			l_mg->eline_meta[i] = emeta;
+		} else {
+			l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+			emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+			if (!emeta->buf) {
+				kfree(emeta);
+				goto fail_free_emeta;
+			}
+
+			emeta->nr_entries = lm->emeta_sec[0];
+			l_mg->eline_meta[i] = emeta;
+		}
+	}
+
+	l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+	if (!l_mg->vsc_list)
+		goto fail_free_emeta;
+
+	for (i = 0; i < l_mg->nr_lines; i++)
+		l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+	return 0;
+
+fail_free_emeta:
+	while (--i >= 0) {
+		vfree(l_mg->eline_meta[i]->buf);
+		kfree(l_mg->eline_meta[i]);
+	}
+
+fail_free_smeta:
+	for (i = 0; i < PBLK_DATA_LINES; i++)
+		kfree(l_mg->sline_meta[i]);
+
+	return -ENOMEM;
+}
+
 static int pblk_lines_init(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line *line;
 	unsigned int smeta_len, emeta_len;
-	long nr_bad_blks, nr_meta_blks, nr_free_blks;
-	int bb_distance;
-	int i;
-	int ret;
+	long nr_bad_blks, nr_free_blks;
+	int bb_distance, max_write_ppas, mod;
+	int i, ret;
+
+	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+	max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+	pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+				max_write_ppas : nvm_max_phys_sects(dev);
+	pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+		pr_err("pblk: cannot support device max_phys_sect\n");
+		return -EINVAL;
+	}
+
+	div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+	if (mod) {
+		pr_err("pblk: bad configuration of sectors/pages\n");
+		return -EINVAL;
+	}
+
+	l_mg->nr_lines = geo->blks_per_lun;
+	l_mg->log_line = l_mg->data_line = NULL;
+	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+	l_mg->nr_free_lines = 0;
+	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 
 	lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
 	lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
 	lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
 	lm->high_thrs = lm->sec_per_line / 2;
 	lm->mid_thrs = lm->sec_per_line / 4;
+	lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
 
 	/* Calculate necessary pages for smeta. See comment over struct
 	 * line_smeta definition
 	 */
-	lm->smeta_len = sizeof(struct line_smeta) +
-				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
 	i = 1;
 add_smeta_page:
 	lm->smeta_sec = i * geo->sec_per_pl;
 	lm->smeta_len = lm->smeta_sec * geo->sec_size;
 
-	smeta_len = sizeof(struct line_smeta) +
-				PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+	smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
 	if (smeta_len > lm->smeta_len) {
 		i++;
 		goto add_smeta_page;
@@ -578,66 +704,28 @@ static int pblk_lines_init(struct pblk *pblk)
 	 */
 	i = 1;
 add_emeta_page:
-	lm->emeta_sec = i * geo->sec_per_pl;
-	lm->emeta_len = lm->emeta_sec * geo->sec_size;
+	lm->emeta_sec[0] = i * geo->sec_per_pl;
+	lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
 
-	emeta_len = calc_emeta_len(pblk, lm);
-	if (emeta_len > lm->emeta_len) {
+	emeta_len = calc_emeta_len(pblk);
+	if (emeta_len > lm->emeta_len[0]) {
 		i++;
 		goto add_emeta_page;
 	}
+
 	lm->emeta_bb = geo->nr_luns - i;
-
-	nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
-				(geo->sec_per_blk / 2)) / geo->sec_per_blk;
-	lm->min_blk_line = nr_meta_blks + 1;
-
-	l_mg->nr_lines = geo->blks_per_lun;
-	l_mg->log_line = l_mg->data_line = NULL;
-	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
-	l_mg->nr_free_lines = 0;
-	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-
-	/* smeta is always small enough to fit on a kmalloc memory allocation,
-	 * emeta depends on the number of LUNs allocated to the pblk instance
-	 */
-	l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
-	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
-		if (!l_mg->sline_meta[i].meta)
-			while (--i >= 0) {
-				kfree(l_mg->sline_meta[i].meta);
-				ret = -ENOMEM;
-				goto fail;
-			}
+	lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+							geo->sec_per_blk);
+	if (lm->min_blk_line > lm->blk_per_line) {
+		pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+							lm->blk_per_line);
+		ret = -EINVAL;
+		goto fail;
 	}
 
-	if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
-		l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
-		for (i = 0; i < PBLK_DATA_LINES; i++) {
-			l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
-			if (!l_mg->eline_meta[i].meta)
-				while (--i >= 0) {
-					vfree(l_mg->eline_meta[i].meta);
-					ret = -ENOMEM;
-					goto fail;
-				}
-		}
-	} else {
-		l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
-		for (i = 0; i < PBLK_DATA_LINES; i++) {
-			l_mg->eline_meta[i].meta =
-					kmalloc(lm->emeta_len, GFP_KERNEL);
-			if (!l_mg->eline_meta[i].meta)
-				while (--i >= 0) {
-					kfree(l_mg->eline_meta[i].meta);
-					ret = -ENOMEM;
-					goto fail;
-				}
-		}
-	}
+	ret = pblk_lines_alloc_metadata(pblk);
+	if (ret)
+		goto fail;
 
 	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
 	if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ static int pblk_lines_init(struct pblk *pblk)
 	INIT_LIST_HEAD(&l_mg->gc_low_list);
 	INIT_LIST_HEAD(&l_mg->gc_empty_list);
 
+	INIT_LIST_HEAD(&l_mg->emeta_list);
+
 	l_mg->gc_lists[0] = &l_mg->gc_high_list;
 	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
 	l_mg->gc_lists[2] = &l_mg->gc_low_list;
 
 	spin_lock_init(&l_mg->free_lock);
+	spin_lock_init(&l_mg->close_lock);
 	spin_lock_init(&l_mg->gc_lock);
 
 	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ static int pblk_lines_init(struct pblk *pblk)
 		line->type = PBLK_LINETYPE_FREE;
 		line->state = PBLK_LINESTATE_FREE;
 		line->gc_group = PBLK_LINEGC_NONE;
+		line->vsc = &l_mg->vsc_list[i];
 		spin_lock_init(&line->lock);
 
-		nr_bad_blks = pblk_bb_line(pblk, line);
+		ret = pblk_alloc_line_bitmaps(pblk, line);
+		if (ret)
+			goto fail_free_lines;
+
+		nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
 		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+			pblk_free_line_bitmaps(line);
 			ret = -EINVAL;
 			goto fail_free_lines;
 		}
@@ -713,24 +810,20 @@ static int pblk_lines_init(struct pblk *pblk)
 
 	pblk_set_provision(pblk, nr_free_blks);
 
-	sema_init(&pblk->erase_sem, 1);
-
 	/* Cleanup per-LUN bad block lists - managed within lines on run-time */
 	for (i = 0; i < geo->nr_luns; i++)
 		kfree(pblk->luns[i].bb_list);
 
 	return 0;
 fail_free_lines:
-	kfree(pblk->lines);
+	while (--i >= 0)
+		pblk_free_line_bitmaps(&pblk->lines[i]);
 fail_free_bb_aux:
 	kfree(l_mg->bb_aux);
 fail_free_bb_template:
 	kfree(l_mg->bb_template);
 fail_free_meta:
-	for (i = 0; i < PBLK_DATA_LINES; i++) {
-		pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-		pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
-	}
+	pblk_line_meta_free(pblk);
 fail:
 	for (i = 0; i < geo->nr_luns; i++)
 		kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
 
 static void pblk_writer_stop(struct pblk *pblk)
 {
+	/* The pipeline must be stopped and the write buffer emptied before the
+	 * write thread is stopped
+	 */
+	WARN(pblk_rb_read_count(&pblk->rwb),
+			"Stopping not fully persisted write buffer\n");
+
+	WARN(pblk_rb_sync_count(&pblk->rwb),
+			"Stopping not fully synced write buffer\n");
+
 	if (pblk->writer_ts)
 		kthread_stop(pblk->writer_ts);
 	del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
 
 static void pblk_tear_down(struct pblk *pblk)
 {
-	pblk_flush_writer(pblk);
+	pblk_pipeline_stop(pblk);
 	pblk_writer_stop(pblk);
 	pblk_rb_sync_l2p(&pblk->rwb);
-	pblk_recov_pad(pblk);
 	pblk_rwb_free(pblk);
 	pblk_rl_free(&pblk->rl);
 
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 
 	pblk->dev = dev;
 	pblk->disk = tdisk;
+	pblk->state = PBLK_STATE_RUNNING;
 
 	spin_lock_init(&pblk->trans_lock);
 	spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	atomic_long_set(&pblk->req_writes, 0);
 	atomic_long_set(&pblk->sub_writes, 0);
 	atomic_long_set(&pblk->sync_writes, 0);
-	atomic_long_set(&pblk->compl_writes, 0);
 	atomic_long_set(&pblk->inflight_reads, 0);
+	atomic_long_set(&pblk->cache_reads, 0);
 	atomic_long_set(&pblk->sync_reads, 0);
 	atomic_long_set(&pblk->recov_writes, 0);
 	atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
 
 static int __init pblk_module_init(void)
 {
-	return nvm_register_tgt_type(&tt_pblk);
+	int ret;
+
+	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+	if (!pblk_bio_set)
+		return -ENOMEM;
+	ret = nvm_register_tgt_type(&tt_pblk);
+	if (ret)
+		bioset_free(pblk_bio_set);
+	return ret;
 }
 
 static void pblk_module_exit(void)
 {
+	bioset_free(pblk_bio_set);
 	nvm_unregister_tgt_type(&tt_pblk);
 }
 

diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c1695..fddb924 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c

@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			       unsigned int valid_secs)
 {
 	struct pblk_line *line = pblk_line_get_data(pblk);
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
 	struct pblk_w_ctx *w_ctx;
-	__le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+	__le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
 	u64 paddr;
 	int nr_secs = pblk->min_write_pgs;
 	int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			w_ctx->ppa = ppa_list[i];
 			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
 			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
-			le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+			line->nr_valid_lbas++;
 		} else {
-			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-			lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
-			pblk_map_pad_invalidate(pblk, line, paddr);
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+			lba_list[paddr] = meta_list[i].lba = addr_empty;
+			__pblk_map_invalidate(pblk, line, paddr);
 		}
 	}
 
 	if (pblk_line_is_full(line)) {
-		line = pblk_line_replace_data(pblk);
-		if (!line)
-			return;
+		struct pblk_line *prev_line = line;
+
+		pblk_line_replace_data(pblk);
+		pblk_line_close_meta(pblk, prev_line);
 	}
 
 	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct pblk_line *e_line, *d_line;
 	unsigned int map_secs;
 	int min = pblk->min_write_pgs;
 	int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
 					lun_bitmap, &meta_list[i], map_secs);
 
-		erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
-							rqd->ppa_list[i].g.ch;
+		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
 
+		/* line can change after page map. We might also be writing the
+		 * last line.
+		 */
+		e_line = pblk_line_get_erase(pblk);
+		if (!e_line)
+			return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+							valid_secs, i + min);
+
+		spin_lock(&e_line->lock);
 		if (!test_bit(erase_lun, e_line->erase_bitmap)) {
-			if (down_trylock(&pblk->erase_sem))
-				continue;
-
 			set_bit(erase_lun, e_line->erase_bitmap);
 			atomic_dec(&e_line->left_eblks);
+
 			*erase_ppa = rqd->ppa_list[i];
 			erase_ppa->g.blk = e_line->id;
 
+			spin_unlock(&e_line->lock);
+
 			/* Avoid evaluating e_line->left_eblks */
 			return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
 							valid_secs, i + min);
 		}
+		spin_unlock(&e_line->lock);
 	}
 
-	/* Erase blocks that are bad in this line but might not be in next */
-	if (unlikely(ppa_empty(*erase_ppa))) {
-		struct pblk_line_meta *lm = &pblk->lm;
+	d_line = pblk_line_get_data(pblk);
 
-		i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
-		if (i == lm->blk_per_line)
+	/* line can change after page map. We might also be writing the
+	 * last line.
+	 */
+	e_line = pblk_line_get_erase(pblk);
+	if (!e_line)
+		return;
+
+	/* Erase blocks that are bad in this line but might not be in next */
+	if (unlikely(ppa_empty(*erase_ppa)) &&
+			bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+		int bit = -1;
+
+retry:
+		bit = find_next_bit(d_line->blk_bitmap,
+						lm->blk_per_line, bit + 1);
+		if (bit >= lm->blk_per_line)
 			return;
 
-		set_bit(i, e_line->erase_bitmap);
+		spin_lock(&e_line->lock);
+		if (test_bit(bit, e_line->erase_bitmap)) {
+			spin_unlock(&e_line->lock);
+			goto retry;
+		}
+		spin_unlock(&e_line->lock);
+
+		set_bit(bit, e_line->erase_bitmap);
 		atomic_dec(&e_line->left_eblks);
-		*erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+		*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
 		erase_ppa->g.blk = e_line->id;
 	}
 }

diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384d..5ecc154 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c

@@ -150,6 +150,7 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx)
 	/* Release flags on context. Protect from writes and reads */
 	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
 	pblk_ppa_set_empty(&w_ctx->ppa);
+	w_ctx->lba = ADDR_EMPTY;
 }
 
 #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
 	return pblk_rb_ring_count(mem, subm, rb->nr_entries);
 }
 
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+	unsigned int mem = READ_ONCE(rb->mem);
+	unsigned int sync = READ_ONCE(rb->sync);
+
+	return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
 {
 	unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
 	struct pblk_line *line;
 	struct pblk_rb_entry *entry;
 	struct pblk_w_ctx *w_ctx;
+	unsigned int user_io = 0, gc_io = 0;
 	unsigned int i;
+	int flags;
 
 	for (i = 0; i < to_update; i++) {
 		entry = &rb->entries[*l2p_upd];
 		w_ctx = &entry->w_ctx;
 
+		flags = READ_ONCE(entry->w_ctx.flags);
+		if (flags & PBLK_IOTYPE_USER)
+			user_io++;
+		else if (flags & PBLK_IOTYPE_GC)
+			gc_io++;
+		else
+			WARN(1, "pblk: unknown IO type\n");
+
 		pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
 							entry->cacheline);
 
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
 		*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
 	}
 
+	pblk_rl_out(&pblk->rl, user_io, gc_io);
+
 	return 0;
 }
 
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
 	/* Protect syncs */
 	smp_store_release(&rb->sync_point, sync_point);
 
+	if (!bio)
+		return 0;
+
 	spin_lock_irq(&rb->s_lock);
 	bio_list_add(&entry->w_ctx.bios, bio);
 	spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
 	return 1;
 }
 
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	unsigned int mem = READ_ONCE(rb->mem);
+
+	if (pblk_rb_sync_point_set(rb, NULL, mem))
+		return;
+
+	pblk_write_should_kick(pblk);
+}
+
 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
 				   unsigned int *pos, struct bio *bio,
 				   int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
 			   unsigned int nr_entries, unsigned int *pos)
 {
 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
-	int flush_done;
+	int io_ret;
 
 	spin_lock(&rb->w_lock);
-	if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+	io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+	if (io_ret) {
 		spin_unlock(&rb->w_lock);
-		return NVM_IO_REQUEUE;
+		return io_ret;
 	}
 
-	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+	if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
 		spin_unlock(&rb->w_lock);
 		return NVM_IO_REQUEUE;
 	}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
 	pblk_rl_user_in(&pblk->rl, nr_entries);
 	spin_unlock(&rb->w_lock);
 
-	return flush_done;
+	return io_ret;
 }
 
 /*
@@ -521,20 +557,18 @@ unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
  * This function is used by the write thread to form the write bio that will
  * persist data on the write buffer to the media.
  */
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-				 struct pblk_c_ctx *c_ctx,
-				 unsigned int pos,
-				 unsigned int nr_entries,
-				 unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+				 struct bio *bio, unsigned int pos,
+				 unsigned int nr_entries, unsigned int count)
 {
 	struct pblk *pblk = container_of(rb, struct pblk, rwb);
+	struct request_queue *q = pblk->dev->q;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_rb_entry *entry;
 	struct page *page;
-	unsigned int pad = 0, read = 0, to_read = nr_entries;
-	unsigned int user_io = 0, gc_io = 0;
+	unsigned int pad = 0, to_read = nr_entries;
 	unsigned int i;
 	int flags;
-	int ret;
 
 	if (count < nr_entries) {
 		pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
 		 */
 try:
 		flags = READ_ONCE(entry->w_ctx.flags);
-		if (!(flags & PBLK_WRITTEN_DATA))
+		if (!(flags & PBLK_WRITTEN_DATA)) {
+			io_schedule();
 			goto try;
-
-		if (flags & PBLK_IOTYPE_USER)
-			user_io++;
-		else if (flags & PBLK_IOTYPE_GC)
-			gc_io++;
-		else
-			WARN(1, "pblk: unknown IO type\n");
+		}
 
 		page = virt_to_page(entry->data);
 		if (!page) {
@@ -570,17 +599,17 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
 			flags |= PBLK_SUBMITTED_ENTRY;
 			/* Release flags on context. Protect from writes */
 			smp_store_release(&entry->w_ctx.flags, flags);
-			goto out;
+			return NVM_IO_ERR;
 		}
 
-		ret = bio_add_page(bio, page, rb->seg_size, 0);
-		if (ret != rb->seg_size) {
+		if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+								rb->seg_size) {
 			pr_err("pblk: could not add page to write bio\n");
 			flags &= ~PBLK_WRITTEN_DATA;
 			flags |= PBLK_SUBMITTED_ENTRY;
 			/* Release flags on context. Protect from writes */
 			smp_store_release(&entry->w_ctx.flags, flags);
-			goto out;
+			return NVM_IO_ERR;
 		}
 
 		if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
 		pos = (pos + 1) & (rb->nr_entries - 1);
 	}
 
-	read = to_read;
-	pblk_rl_out(&pblk->rl, user_io, gc_io);
+	if (pad) {
+		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+			pr_err("pblk: could not pad page in write bio\n");
+			return NVM_IO_ERR;
+		}
+	}
+
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(pad, &((struct pblk *)
 			(container_of(rb, struct pblk, rwb)))->padded_writes);
 #endif
-out:
-	return read;
+
+	return NVM_IO_OK;
 }
 
 /*
@@ -623,15 +657,17 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
  * be directed to disk.
  */
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-			u64 pos, int bio_iter)
+			struct ppa_addr ppa, int bio_iter)
 {
+	struct pblk *pblk = container_of(rb, struct pblk, rwb);
 	struct pblk_rb_entry *entry;
 	struct pblk_w_ctx *w_ctx;
+	struct ppa_addr l2p_ppa;
+	u64 pos = pblk_addr_to_cacheline(ppa);
 	void *data;
 	int flags;
 	int ret = 1;
 
-	spin_lock(&rb->w_lock);
 
 #ifdef CONFIG_NVM_DEBUG
 	/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
 	w_ctx = &entry->w_ctx;
 	flags = READ_ONCE(w_ctx->flags);
 
+	spin_lock(&rb->w_lock);
+	spin_lock(&pblk->trans_lock);
+	l2p_ppa = pblk_trans_map_get(pblk, lba);
+	spin_unlock(&pblk->trans_lock);
+
 	/* Check if the entry has been overwritten or is scheduled to be */
-	if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+	if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+						flags & PBLK_WRITABLE_ENTRY) {
 		ret = 0;
 		goto out;
 	}

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14..4e5c48f 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c

@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
 	BUG_ON(!pblk_addr_in_cache(ppa));
 #endif
 
-	return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
-					pblk_addr_to_cacheline(ppa), bio_iter);
+	return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
 }
 
 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			}
 			WARN_ON(test_and_set_bit(i, read_bitmap));
 			advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+			atomic_long_inc(&pblk->cache_reads);
+#endif
 		} else {
 			/* Read from media non-cached sectors */
 			rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
 	}
 
+	if (pblk_io_aligned(pblk, nr_secs))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(nr_secs, &pblk->inflight_reads);
 #endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	int err;
 
-	rqd->flags = pblk_set_read_mode(pblk);
-
 	err = pblk_submit_io(pblk, rqd);
 	if (err)
 		return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct bio *bio = rqd->bio;
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
-	if (rqd->nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
 	bio_put(bio);
-	if (r_ctx->orig_bio) {
+	if (r_ctx->private) {
+		struct bio *orig_bio = r_ctx->private;
+
 #ifdef CONFIG_NVM_DEBUG
-		WARN_ONCE(r_ctx->orig_bio->bi_error,
-						"pblk: corrupted read bio\n");
+		WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
 #endif
-		bio_endio(r_ctx->orig_bio);
-		bio_put(r_ctx->orig_bio);
+		bio_endio(orig_bio);
+		bio_put(orig_bio);
 	}
 
 #ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 #endif
 
 	pblk_free_rqd(pblk, rqd, READ);
+	atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	rqd->bio = new_bio;
 	rqd->nr_ppas = nr_holes;
+	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 	rqd->end_io = NULL;
 
 	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			goto retry;
 		}
 		WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+			atomic_long_inc(&pblk->cache_reads);
+#endif
 	} else {
 		rqd->ppa_addr = ppa;
 	}
+
+	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 }
 
 int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	 */
 	bio_init_idx = pblk_get_bi_idx(bio);
 
+	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd->dma_meta_list);
+	if (!rqd->meta_list) {
+		pr_err("pblk: not able to allocate ppa list\n");
+		goto fail_rqd_free;
+	}
+
 	if (nr_secs > 1) {
-		rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-						&rqd->dma_ppa_list);
-		if (!rqd->ppa_list) {
-			pr_err("pblk: not able to allocate ppa list\n");
-			goto fail_rqd_free;
-		}
+		rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+		rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
 		pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
 	} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	bio_get(bio);
 	if (bitmap_full(&read_bitmap, nr_secs)) {
 		bio_endio(bio);
+		atomic_inc(&pblk->inflight_io);
 		pblk_end_io_read(rqd);
 		return NVM_IO_OK;
 	}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	/* All sectors are to be read from the device */
 	if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
 		struct bio *int_bio = NULL;
-		struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+		struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 
 		/* Clone read bio to deal with read errors internally */
-		int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+		int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
 		if (!int_bio) {
 			pr_err("pblk: could not clone read bio\n");
 			return NVM_IO_ERR;
 		}
 
 		rqd->bio = int_bio;
-		r_ctx->orig_bio = bio;
+		r_ctx->private = bio;
 
 		ret = pblk_submit_read_io(pblk, rqd);
 		if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct request_queue *q = dev->q;
 	struct bio *bio;
 	struct nvm_rq rqd;
 	int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
+	rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+							&rqd.dma_meta_list);
+	if (!rqd.meta_list)
+		return NVM_IO_ERR;
+
 	if (nr_secs > 1) {
-		rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-							&rqd.dma_ppa_list);
-		if (!rqd.ppa_list)
-			return NVM_IO_ERR;
+		rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+		rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
 
 		*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
 								nr_secs);
-		if (*secs_to_gc == 1) {
-			struct ppa_addr ppa;
-
-			ppa = rqd.ppa_list[0];
-			nvm_dev_dma_free(dev->parent, rqd.ppa_list,
-							rqd.dma_ppa_list);
-			rqd.ppa_addr = ppa;
-		}
+		if (*secs_to_gc == 1)
+			rqd.ppa_addr = rqd.ppa_list[0];
 	} else {
 		*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
 	}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 		goto out;
 
 	data_len = (*secs_to_gc) * geo->sec_size;
-	bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+						PBLK_KMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
 		goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 	rqd.end_io = pblk_end_io_sync;
 	rqd.private = &wait;
 	rqd.nr_ppas = *secs_to_gc;
+	rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 	rqd.bio = bio;
 
 	ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: GC read I/O timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
 		atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 #endif
 
 out:
-	if (rqd.nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return NVM_IO_OK;
 
 err_free_dma:
-	if (rqd.nr_ppas > 1)
-		nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 	return NVM_IO_ERR;
 }

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f8508..0e48d3e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c

@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 	return 0;
 }
 
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
 {
 	u32 crc;
 
-	crc = pblk_calc_emeta_crc(pblk, emeta);
-	if (le32_to_cpu(emeta->crc) != crc)
+	crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+	if (le32_to_cpu(emeta_buf->crc) != crc)
 		return NULL;
 
-	if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+	if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
 		return NULL;
 
-	return pblk_line_emeta_to_lbas(emeta);
+	return emeta_to_lbas(pblk, emeta_buf);
 }
 
 static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct line_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta = line->emeta;
+	struct line_emeta *emeta_buf = emeta->buf;
 	__le64 *lba_list;
 	int data_start;
 	int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
 	int i;
 
-	lba_list = pblk_recov_get_lba_list(pblk, emeta);
+	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
 	if (!lba_list)
 		return 1;
 
 	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
-	nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
-	nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+	nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
 
 	for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
 		struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 			if (test_and_set_bit(i, line->invalid_bitmap))
 				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
 			else
-				line->vsc--;
+				le32_add_cpu(line->vsc, -1);
 			spin_unlock(&line->lock);
 
 			continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 
 	if (nr_valid_lbas != nr_lbas)
 		pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
-				line->id, line->emeta->nr_valid_lbas, nr_lbas);
+				line->id, emeta_buf->nr_valid_lbas, nr_lbas);
 
 	line->left_msecs = 0;
 
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 
-	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
 				nr_bb * geo->sec_per_blk;
 }
 
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 	r_ptr_int = r_ptr;
 
 next_read_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -256,7 +257,6 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -295,7 +300,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 		pr_err("pblk: L2P recovery read timed out\n");
 		return -EINTR;
 	}
-
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 	return 0;
 }
 
+static void pblk_recov_complete(struct kref *ref)
+{
+	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+	complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+	struct pblk_pad_rq *pad_rq = rqd->private;
+	struct pblk *pblk = pad_rq->pblk;
+	struct nvm_tgt_dev *dev = pblk->dev;
+
+	kref_put(&pad_rq->ref, pblk_recov_complete);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+	pblk_free_rqd(pblk, rqd, WRITE);
+}
+
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
-			      struct pblk_recov_alloc p, int left_ppas)
+			      int left_ppas)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr *ppa_list;
 	struct pblk_sec_meta *meta_list;
+	struct pblk_pad_rq *pad_rq;
 	struct nvm_rq *rqd;
 	struct bio *bio;
 	void *data;
 	dma_addr_t dma_ppa_list, dma_meta_list;
-	__le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	u64 w_ptr = line->cur_sec;
-	int left_line_ppas = line->left_msecs;
-	int rq_ppas, rq_len;
+	int left_line_ppas, rq_ppas, rq_len;
 	int i, j;
 	int ret = 0;
-	DECLARE_COMPLETION_ONSTACK(wait);
 
-	ppa_list = p.ppa_list;
-	meta_list = p.meta_list;
-	rqd = p.rqd;
-	data = p.data;
-	dma_ppa_list = p.dma_ppa_list;
-	dma_meta_list = p.dma_meta_list;
+	spin_lock(&line->lock);
+	left_line_ppas = line->left_msecs;
+	spin_unlock(&line->lock);
+
+	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+	if (!pad_rq)
+		return -ENOMEM;
+
+	data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+	if (!data) {
+		ret = -ENOMEM;
+		goto free_rq;
+	}
+
+	pad_rq->pblk = pblk;
+	init_completion(&pad_rq->wait);
+	kref_init(&pad_rq->ref);
 
 next_pad_rq:
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
-	if (!rq_ppas)
-		rq_ppas = pblk->min_write_pgs;
+	if (rq_ppas < pblk->min_write_pgs) {
+		pr_err("pblk: corrupted pad line %d\n", line->id);
+		goto free_rq;
+	}
+
 	rq_len = rq_ppas * geo->sec_size;
 
+	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+	if (!meta_list) {
+		ret = -ENOMEM;
+		goto free_data;
+	}
+
+	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+	rqd = pblk_alloc_rqd(pblk, WRITE);
+	if (IS_ERR(rqd)) {
+		ret = PTR_ERR(rqd);
+		goto fail_free_meta;
+	}
+	memset(rqd, 0, pblk_w_rq_size);
+
 	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto fail_free_rqd;
+	}
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
-	memset(rqd, 0, pblk_r_rq_size);
-
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PWRITE;
 	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	rqd->ppa_list = ppa_list;
 	rqd->dma_ppa_list = dma_ppa_list;
 	rqd->dma_meta_list = dma_meta_list;
-	rqd->end_io = pblk_end_io_sync;
-	rqd->private = &wait;
+	rqd->end_io = pblk_end_io_recov;
+	rqd->private = pad_rq;
 
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
@@ -390,34 +442,51 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 
 		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
 			struct ppa_addr dev_ppa;
+			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 
 			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 
 			pblk_map_invalidate(pblk, dev_ppa);
-			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-			lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
 			rqd->ppa_list[i] = dev_ppa;
 		}
 	}
 
+	kref_get(&pad_rq->ref);
+
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: I/O submission failed: %d\n", ret);
-		return ret;
+		goto free_data;
 	}
 
-	if (!wait_for_completion_io_timeout(&wait,
-				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-		pr_err("pblk: L2P recovery write timed out\n");
-	}
-	reinit_completion(&wait);
+	atomic_dec(&pblk->inflight_io);
 
 	left_line_ppas -= rq_ppas;
 	left_ppas -= rq_ppas;
-	if (left_ppas > 0 && left_line_ppas)
+	if (left_ppas && left_line_ppas)
 		goto next_pad_rq;
 
-	return 0;
+	kref_put(&pad_rq->ref, pblk_recov_complete);
+
+	if (!wait_for_completion_io_timeout(&pad_rq->wait,
+				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+		pr_err("pblk: pad write timed out\n");
+		ret = -ETIME;
+	}
+
+free_rq:
+	kfree(pad_rq);
+free_data:
+	vfree(data);
+	return ret;
+
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+	kfree(pad_rq);
+	return ret;
 }
 
 /* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 	rec_round = 0;
 
 next_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -472,7 +541,6 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -510,6 +583,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: L2P recovery read timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 		if (pad_secs > line->left_msecs)
 			pad_secs = line->left_msecs;
 
-		ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+		ret = pblk_recov_pad_oob(pblk, line, pad_secs);
 		if (ret)
 			pr_err("pblk: OOB padding failed (err:%d)\n", ret);
 
@@ -552,7 +626,6 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 		if (ret)
 			pr_err("pblk: OOB read failed (err:%d)\n", ret);
 
-		line->left_ssecs = line->left_msecs;
 		left_ppas = 0;
 	}
 
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	*done = 1;
 
 next_rq:
-	memset(rqd, 0, pblk_r_rq_size);
+	memset(rqd, 0, pblk_g_rq_size);
 
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (!rq_ppas)
@@ -607,7 +680,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->flags = pblk_set_read_mode(pblk);
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
+	if (pblk_io_aligned(pblk, rq_ppas))
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+	else
+		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -646,6 +723,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 		pr_err("pblk: L2P recovery read timed out\n");
 	}
+	atomic_dec(&pblk->inflight_io);
 	reinit_completion(&wait);
 
 	/* Reached the end of the written line */
@@ -658,7 +736,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 		/* Roll back failed sectors */
 		line->cur_sec -= nr_error_bits;
 		line->left_msecs += nr_error_bits;
-		line->left_ssecs = line->left_msecs;
 		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
 
 		left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line, *tline, *data_line = NULL;
-	struct line_smeta *smeta;
-	struct line_emeta *emeta;
+	struct pblk_smeta *smeta;
+	struct pblk_emeta *emeta;
+	struct line_smeta *smeta_buf;
 	int found_lines = 0, recovered_lines = 0, open_lines = 0;
 	int is_next = 0;
 	int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 	spin_lock(&l_mg->free_lock);
 	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 	set_bit(meta_line, &l_mg->meta_bitmap);
-	smeta = l_mg->sline_meta[meta_line].meta;
-	emeta = l_mg->eline_meta[meta_line].meta;
+	smeta = l_mg->sline_meta[meta_line];
+	emeta = l_mg->eline_meta[meta_line];
+	smeta_buf = (struct line_smeta *)smeta;
 	spin_unlock(&l_mg->free_lock);
 
 	/* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		memset(smeta, 0, lm->smeta_len);
 		line->smeta = smeta;
-		line->lun_bitmap = ((void *)(smeta)) +
+		line->lun_bitmap = ((void *)(smeta_buf)) +
 						sizeof(struct line_smeta);
 
 		/* Lines that cannot be read are assumed as not written here */
 		if (pblk_line_read_smeta(pblk, line))
 			continue;
 
-		crc = pblk_calc_smeta_crc(pblk, smeta);
-		if (le32_to_cpu(smeta->crc) != crc)
+		crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+		if (le32_to_cpu(smeta_buf->crc) != crc)
 			continue;
 
-		if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+		if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
 			continue;
 
-		if (le16_to_cpu(smeta->header.version) != 1) {
+		if (le16_to_cpu(smeta_buf->header.version) != 1) {
 			pr_err("pblk: found incompatible line version %u\n",
-					smeta->header.version);
+					smeta_buf->header.version);
 			return ERR_PTR(-EINVAL);
 		}
 
 		/* The first valid instance uuid is used for initialization */
 		if (!valid_uuid) {
-			memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+			memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
 			valid_uuid = 1;
 		}
 
-		if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+		if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
 			pr_debug("pblk: ignore line %u due to uuid mismatch\n",
 					i);
 			continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		/* Update line metadata */
 		spin_lock(&line->lock);
-		line->id = le32_to_cpu(line->smeta->header.id);
-		line->type = le16_to_cpu(line->smeta->header.type);
-		line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+		line->id = le32_to_cpu(smeta_buf->header.id);
+		line->type = le16_to_cpu(smeta_buf->header.type);
+		line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
 		spin_unlock(&line->lock);
 
 		/* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 		pblk_recov_line_add_ordered(&recov_list, line);
 		found_lines++;
 		pr_debug("pblk: recovering data line %d, seq:%llu\n",
-						line->id, smeta->seq_nr);
+						line->id, smeta_buf->seq_nr);
 	}
 
 	if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
 		recovered_lines++;
 		/* Calculate where emeta starts based on the line bb */
-		off = lm->sec_per_line - lm->emeta_sec;
+		off = lm->sec_per_line - lm->emeta_sec[0];
 		nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 		off -= nr_bb * geo->sec_per_pl;
 
-		memset(emeta, 0, lm->emeta_len);
-		line->emeta = emeta;
 		line->emeta_ssec = off;
+		line->emeta = emeta;
+		memset(line->emeta->buf, 0, lm->emeta_len[0]);
 
-		if (pblk_line_read_emeta(pblk, line)) {
+		if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
 			pblk_recov_l2p_from_oob(pblk, line);
 			goto next;
 		}
@@ -941,58 +1020,26 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 }
 
 /*
- * Pad until smeta can be read on current data line
+ * Pad current line
  */
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line *line;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct nvm_rq *rqd;
-	struct pblk_recov_alloc p;
-	struct ppa_addr *ppa_list;
-	struct pblk_sec_meta *meta_list;
-	void *data;
-	dma_addr_t dma_ppa_list, dma_meta_list;
+	int left_msecs;
+	int ret = 0;
 
 	spin_lock(&l_mg->free_lock);
 	line = l_mg->data_line;
+	left_msecs = line->left_msecs;
 	spin_unlock(&l_mg->free_lock);
 
-	rqd = pblk_alloc_rqd(pblk, READ);
-	if (IS_ERR(rqd))
-		return;
-
-	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
-	if (!meta_list)
-		goto free_rqd;
-
-	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
-	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
-	data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
-	if (!data)
-		goto free_meta_list;
-
-	p.ppa_list = ppa_list;
-	p.meta_list = meta_list;
-	p.rqd = rqd;
-	p.data = data;
-	p.dma_ppa_list = dma_ppa_list;
-	p.dma_meta_list = dma_meta_list;
-
-	if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
-		pr_err("pblk: Tear down padding failed\n");
-		goto free_data;
+	ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+	if (ret) {
+		pr_err("pblk: Tear down padding failed (%d)\n", ret);
+		return ret;
 	}
 
-	pblk_line_close(pblk, line);
-
-free_data:
-	kfree(data);
-free_meta_list:
-	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
-	pblk_free_rqd(pblk, rqd, READ);
+	pblk_line_close_meta(pblk, line);
+	return ret;
 }

diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb1..2e6a536 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c

@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
 	mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
 }
 
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+	int rb_space;
+
+	rb_space = atomic_read(&rl->rb_space);
+
+	return (rb_space == 0);
+}
+
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
 {
 	int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+	int rb_space = atomic_read(&rl->rb_space);
 
-	return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+	if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+		return NVM_IO_ERR;
+
+	if (rb_user_cnt >= rl->rb_user_max)
+		return NVM_IO_REQUEUE;
+
+	return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+	int rb_space = atomic_read(&rl->rb_space);
+
+	if (unlikely(rb_space >= 0))
+		atomic_sub(nr_entries, &rl->rb_space);
 }
 
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
 
 	/* If there is no user I/O let GC take over space on the write buffer */
 	rb_user_active = READ_ONCE(rl->rb_user_active);
-	return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+	return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
 }
 
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
 	unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
 
 	if (free_blocks >= rl->high) {
-		rl->rb_user_max = max - rl->rb_gc_rsv;
-		rl->rb_gc_max = rl->rb_gc_rsv;
+		rl->rb_user_max = max;
+		rl->rb_gc_max = 0;
 		rl->rb_state = PBLK_RL_HIGH;
 	} else if (free_blocks < rl->high) {
 		int shift = rl->high_pw - rl->rb_windows_pw;
 		int user_windows = free_blocks >> shift;
 		int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
-		int gc_max;
 
 		rl->rb_user_max = user_max;
-		gc_max = max - rl->rb_user_max;
-		rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
+		rl->rb_gc_max = max - user_max;
 
-		if (free_blocks > rl->low)
-			rl->rb_state = PBLK_RL_MID;
-		else
-			rl->rb_state = PBLK_RL_LOW;
+		if (free_blocks <= rl->rsv_blocks) {
+			rl->rb_user_max = 0;
+			rl->rb_gc_max = max;
+		}
+
+		/* In the worst case, we will need to GC lines in the low list
+		 * (high valid sector count). If there are lines to GC on high
+		 * or mid lists, these will be prioritized
+		 */
+		rl->rb_state = PBLK_RL_LOW;
 	}
 
 	return rl->rb_state;
 }
 
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
-	rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 {
 	struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
 {
-	struct pblk *pblk = container_of(rl, struct pblk, rl);
 	int blk_in_line = atomic_read(&line->blk_in_line);
-	int ret;
 
 	atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+	struct pblk_rl *rl = &pblk->rl;
+	int ret;
 
 	/* Rates will not change that often - no need to lock update */
 	ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
 		pblk_gc_should_stop(pblk);
 }
 
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
 {
 	return rl->high;
 }
 
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+	return rl->low;
+}
+
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
 {
 	return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
 
 void pblk_rl_init(struct pblk_rl *rl, int budget)
 {
+	struct pblk *pblk = container_of(rl, struct pblk, rl);
+	struct pblk_line_meta *lm = &pblk->lm;
+	int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
 	unsigned int rb_windows;
 
 	rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
-	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
 	rl->high_pw = get_count_order(rl->high);
 
+	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+	if (rl->low < min_blocks)
+		rl->low = min_blocks;
+
+	rl->rsv_blocks = min_blocks;
+
 	/* This will always be a power-of-2 */
 	rb_windows = budget / PBLK_MAX_REQ_ADDRS;
-	rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+	rl->rb_windows_pw = get_count_order(rb_windows);
 
 	/* To start with, all buffer is available to user I/O writers */
 	rl->rb_budget = budget;
 	rl->rb_user_max = budget;
-	atomic_set(&rl->rb_user_cnt, 0);
 	rl->rb_gc_max = 0;
 	rl->rb_state = PBLK_RL_HIGH;
+
+	atomic_set(&rl->rb_user_cnt, 0);
 	atomic_set(&rl->rb_gc_cnt, 0);
+	atomic_set(&rl->rb_space, -1);
 
 	setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
 	rl->rb_user_active = 0;
+	rl->rb_gc_active = 0;
 }

diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1..95fb434 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c

@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
 
 static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	int free_blocks, total_blocks;
 	int rb_user_max, rb_user_cnt;
-	int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+	int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
 
 	free_blocks = atomic_read(&pblk->rl.free_blocks);
 	rb_user_max = pblk->rl.rb_user_max;
 	rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
 	rb_gc_max = pblk->rl.rb_gc_max;
-	rb_gc_rsv = pblk->rl.rb_gc_rsv;
 	rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
 	rb_budget = pblk->rl.rb_budget;
 	rb_state = pblk->rl.rb_state;
 
-	total_blocks = geo->blks_per_lun * geo->nr_luns;
+	total_blocks = pblk->rl.total_blocks;
 
 	return snprintf(page, PAGE_SIZE,
-		"u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+		"u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
 				rb_user_cnt,
 				rb_user_max,
 				rb_gc_cnt,
 				rb_gc_max,
-				rb_gc_rsv,
 				rb_state,
 				rb_budget,
 				pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	ssize_t sz = 0;
 	int nr_free_lines;
 	int cur_data, cur_log;
-	int free_line_cnt = 0, closed_line_cnt = 0;
+	int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
 	int d_line_cnt = 0, l_line_cnt = 0;
 	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
-	int free = 0, bad = 0, cor = 0;
-	int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+	int bad = 0, cor = 0;
+	int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
 	int map_weight = 0, meta_weight = 0;
 
 	spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 		free_line_cnt++;
 	spin_unlock(&l_mg->free_lock);
 
+	spin_lock(&l_mg->close_lock);
+	list_for_each_entry(line, &l_mg->emeta_list, list)
+		emeta_line_cnt++;
+	spin_unlock(&l_mg->close_lock);
+
 	spin_lock(&l_mg->gc_lock);
 	list_for_each_entry(line, &l_mg->gc_full_list, list) {
 		if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 		gc_empty++;
 	}
 
-	list_for_each_entry(line, &l_mg->free_list, list)
-		free++;
 	list_for_each_entry(line, &l_mg->bad_list, list)
 		bad++;
 	list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	if (l_mg->data_line) {
 		cur_sec = l_mg->data_line->cur_sec;
 		msecs = l_mg->data_line->left_msecs;
-		ssecs = l_mg->data_line->left_ssecs;
-		vsc = l_mg->data_line->vsc;
+		vsc = le32_to_cpu(*l_mg->data_line->vsc);
 		sec_in_line = l_mg->data_line->sec_in_line;
 		meta_weight = bitmap_weight(&l_mg->meta_bitmap,
 							PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 	spin_unlock(&l_mg->free_lock);
 
 	if (nr_free_lines != free_line_cnt)
-		pr_err("pblk: corrupted free line list\n");
+		pr_err("pblk: corrupted free line list:%d/%d\n",
+						nr_free_lines, free_line_cnt);
 
 	sz = snprintf(page, PAGE_SIZE - sz,
 		"line: nluns:%d, nblks:%d, nsecs:%d\n",
 		geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
-		"lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+		"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
 					cur_data, cur_log,
-					free, nr_free_lines, bad, cor,
+					nr_free_lines,
+					emeta_line_cnt, meta_weight,
 					closed_line_cnt,
+					bad, cor,
 					d_line_cnt, l_line_cnt,
 					l_mg->nr_lines);
 
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
 			atomic_read(&pblk->gc.inflight_gc));
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
-		"data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
-			cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
-			map_weight, lm->sec_per_line, meta_weight);
+		"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+			cur_data, cur_sec, msecs, vsc, sec_in_line,
+			map_weight, lm->sec_per_line,
+			atomic_read(&pblk->inflight_io));
 
 	return sz;
 }
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
 					lm->smeta_len, lm->smeta_sec);
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
 				"emeta - len:%d, sec:%d, bb_start:%d\n",
-					lm->emeta_len, lm->emeta_sec,
+					lm->emeta_len[0], lm->emeta_sec[0],
 					lm->emeta_bb);
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
 				"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
 	return sz;
 }
 
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
 #ifdef CONFIG_NVM_DEBUG
 static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 {
@@ -303,35 +310,14 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 			atomic_long_read(&pblk->padded_wb),
 			atomic_long_read(&pblk->sub_writes),
 			atomic_long_read(&pblk->sync_writes),
-			atomic_long_read(&pblk->compl_writes),
 			atomic_long_read(&pblk->recov_writes),
 			atomic_long_read(&pblk->recov_gc_writes),
 			atomic_long_read(&pblk->recov_gc_reads),
+			atomic_long_read(&pblk->cache_reads),
 			atomic_long_read(&pblk->sync_reads));
 }
 #endif
 
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
-				     size_t len)
-{
-	struct pblk_gc *gc = &pblk->gc;
-	size_t c_len;
-	int value;
-
-	c_len = strcspn(page, "\n");
-	if (c_len >= len)
-		return -EINVAL;
-
-	if (kstrtouint(page, 0, &value))
-		return -EINVAL;
-
-	spin_lock(&gc->lock);
-	pblk_rl_set_gc_rsc(&pblk->rl, value);
-	spin_unlock(&gc->lock);
-
-	return len;
-}
-
 static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
 				   size_t len)
 {
@@ -345,10 +331,30 @@ static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
 	if (kstrtouint(page, 0, &force))
 		return -EINVAL;
 
-	if (force < 0 || force > 1)
+	pblk_gc_sysfs_force(pblk, force);
+
+	return len;
+}
+
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+					     const char *page, size_t len)
+{
+	size_t c_len;
+	int sec_per_write;
+
+	c_len = strcspn(page, "\n");
+	if (c_len >= len)
 		return -EINVAL;
 
-	pblk_gc_sysfs_force(pblk, force);
+	if (kstrtouint(page, 0, &sec_per_write))
+		return -EINVAL;
+
+	if (sec_per_write < pblk->min_write_pgs
+				|| sec_per_write > pblk->max_write_pgs
+				|| sec_per_write % pblk->min_write_pgs != 0)
+		return -EINVAL;
+
+	pblk_set_sec_per_write(pblk, sec_per_write);
 
 	return len;
 }
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
 	.mode = 0200,
 };
 
-static struct attribute sys_gc_rl_max = {
-	.name = "gc_rl_max",
-	.mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+	.name = "max_sec_per_write",
+	.mode = 0644,
 };
 
 #ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
 	&sys_errors_attr,
 	&sys_gc_state,
 	&sys_gc_force,
-	&sys_gc_rl_max,
+	&sys_max_sec_per_write,
 	&sys_rb_attr,
 	&sys_stats_ppaf_attr,
 	&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
 		return pblk_sysfs_lines(pblk, buf);
 	else if (strcmp(attr->name, "lines_info") == 0)
 		return pblk_sysfs_lines_info(pblk, buf);
+	else if (strcmp(attr->name, "max_sec_per_write") == 0)
+		return pblk_sysfs_get_sec_per_write(pblk, buf);
 #ifdef CONFIG_NVM_DEBUG
 	else if (strcmp(attr->name, "stats") == 0)
 		return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
 {
 	struct pblk *pblk = container_of(kobj, struct pblk, kobj);
 
-	if (strcmp(attr->name, "gc_rl_max") == 0)
-		return pblk_sysfs_rate_store(pblk, buf, len);
-	else if (strcmp(attr->name, "gc_force") == 0)
+	if (strcmp(attr->name, "gc_force") == 0)
 		return pblk_sysfs_gc_force(pblk, buf, len);
+	else if (strcmp(attr->name, "max_sec_per_write") == 0)
+		return pblk_sysfs_set_sec_per_write(pblk, buf, len);
 
 	return 0;
 }

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7..d62a8f4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c

@@ -17,18 +17,6 @@
 
 #include "pblk.h"
 
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
-	atomic_long_inc(&pblk->sync_writes);
-#endif
-
-	/* Counter protected by rb sync lock */
-	line->left_ssecs--;
-	if (!line->left_ssecs)
-		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
 static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 				    struct pblk_c_ctx *c_ctx)
 {
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	for (i = 0; i < c_ctx->nr_valid; i++) {
 		struct pblk_w_ctx *w_ctx;
-		struct ppa_addr p;
-		struct pblk_line *line;
 
 		w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
-		p = rqd->ppa_list[i];
-		line = &pblk->lines[pblk_dev_ppa_to_line(p)];
-		pblk_sync_line(pblk, line);
-
 		while ((original_bio = bio_list_pop(&w_ctx->bios)))
 			bio_endio(original_bio);
 	}
 
 #ifdef CONFIG_NVM_DEBUG
-	atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+	atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
 #endif
 
 	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
 	}
 
 	INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
-	queue_work(pblk->kw_wq, &recovery->ws_rec);
+	queue_work(pblk->close_wq, &recovery->ws_rec);
 
 out:
 	pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
 	}
 #ifdef CONFIG_NVM_DEBUG
 	else
-		WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
 #endif
 
 	pblk_complete_write(pblk, rqd, c_ctx);
+	atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+	struct pblk_line *line = m_ctx->private;
+	struct pblk_emeta *emeta = line->emeta;
+	int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+	struct pblk_lun *rlun = &pblk->luns[pos];
+	int sync;
+
+	up(&rlun->wr_sem);
+
+	if (rqd->error) {
+		pblk_log_write_err(pblk, rqd);
+		pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+	}
+#ifdef CONFIG_NVM_DEBUG
+	else
+		WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+	sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+	if (sync == emeta->nr_entries)
+		pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+								pblk->close_wq);
+
+	bio_put(rqd->bio);
+	pblk_free_rqd(pblk, rqd, READ);
+
+	atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			   unsigned int nr_secs)
+			   unsigned int nr_secs,
+			   nvm_end_io_fn(*end_io))
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	rqd->nr_ppas = nr_secs;
 	rqd->flags = pblk_set_progr_mode(pblk, WRITE);
 	rqd->private = pblk;
-	rqd->end_io = pblk_end_io_write;
+	rqd->end_io = end_io;
 
 	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
 							&rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 }
 
 static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			   struct pblk_c_ctx *c_ctx)
+			   struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line *e_line = pblk_line_get_data_next(pblk);
-	struct ppa_addr erase_ppa;
+	struct pblk_line *e_line = pblk_line_get_erase(pblk);
 	unsigned int valid = c_ctx->nr_valid;
 	unsigned int padded = c_ctx->nr_padded;
 	unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	int ret = 0;
 
 	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
-	if (!lun_bitmap) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!lun_bitmap)
+		return -ENOMEM;
 	c_ctx->lun_bitmap = lun_bitmap;
 
-	ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+	ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
 	if (ret) {
 		kfree(lun_bitmap);
-		goto out;
+		return ret;
 	}
 
-	ppa_set_empty(&erase_ppa);
 	if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
 		pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
 	else
 		pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
-							valid, &erase_ppa);
+							valid, erase_ppa);
 
-out:
-	if (unlikely(e_line && !ppa_empty(erase_ppa))) {
-		if (pblk_blk_erase_async(pblk, erase_ppa)) {
-			struct nvm_tgt_dev *dev = pblk->dev;
-			struct nvm_geo *geo = &dev->geo;
-			int bit;
-
-			atomic_inc(&e_line->left_eblks);
-			bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
-			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
-			up(&pblk->erase_sem);
-		}
-	}
-
-	return ret;
+	return 0;
 }
 
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
 	c_ctx->lun_bitmap = lun_bitmap;
 
-	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
 	if (ret)
 		return ret;
 
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 	return secs_to_sync;
 }
 
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+				      struct pblk_line *meta_line,
+				      struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *data_line;
+	struct ppa_addr ppa, ppa_opt;
+	u64 paddr;
+	int i;
+
+	data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+	paddr = pblk_lookup_page(pblk, meta_line);
+	ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+	if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+		return 1;
+
+	/* Schedule a metadata I/O that is half the distance from the data I/O
+	 * with regards to the number of LUNs forming the pblk instance. This
+	 * balances LUN conflicts across every I/O.
+	 *
+	 * When the LUN configuration changes (e.g., due to GC), this distance
+	 * can align, which would result on a LUN deadlock. In this case, modify
+	 * the distance to not be optimal, but allow metadata I/Os to succeed.
+	 */
+	ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+	if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+		data_line->meta_distance--;
+		return 0;
+	}
+
+	for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+		if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+					ppa_list[i].g.lun == ppa_opt.g.lun)
+			return 1;
+
+	if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+		for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+			if (ppa_list[i].g.ch == ppa.g.ch &&
+						ppa_list[i].g.lun == ppa.g.lun)
+				return 0;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_emeta *emeta = meta_line->emeta;
+	struct pblk_g_ctx *m_ctx;
+	struct pblk_lun *rlun;
+	struct bio *bio;
+	struct nvm_rq *rqd;
+	void *data;
+	u64 paddr;
+	int rq_ppas = pblk->min_write_pgs;
+	int id = meta_line->id;
+	int rq_len;
+	int i, j;
+	int ret;
+
+	rqd = pblk_alloc_rqd(pblk, READ);
+	if (IS_ERR(rqd)) {
+		pr_err("pblk: cannot allocate write req.\n");
+		return PTR_ERR(rqd);
+	}
+	m_ctx = nvm_rq_to_pdu(rqd);
+	m_ctx->private = meta_line;
+
+	rq_len = rq_ppas * geo->sec_size;
+	data = ((void *)emeta->buf) + emeta->mem;
+
+	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+					l_mg->emeta_alloc_type, GFP_KERNEL);
+	if (IS_ERR(bio)) {
+		ret = PTR_ERR(bio);
+		goto fail_free_rqd;
+	}
+	bio->bi_iter.bi_sector = 0; /* internal bio */
+	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+	rqd->bio = bio;
+
+	ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+	if (ret)
+		goto fail_free_bio;
+
+	for (i = 0; i < rqd->nr_ppas; ) {
+		spin_lock(&meta_line->lock);
+		paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+		spin_unlock(&meta_line->lock);
+		for (j = 0; j < rq_ppas; j++, i++, paddr++)
+			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+	}
+
+	rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	if (ret) {
+		pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+		goto fail_free_bio;
+	}
+
+	emeta->mem += rq_len;
+	if (emeta->mem >= lm->emeta_len[0]) {
+		spin_lock(&l_mg->close_lock);
+		list_del(&meta_line->list);
+		WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+				"pblk: corrupt meta line %d\n", meta_line->id);
+		spin_unlock(&l_mg->close_lock);
+	}
+
+	ret = pblk_submit_io(pblk, rqd);
+	if (ret) {
+		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+		goto fail_rollback;
+	}
+
+	return NVM_IO_OK;
+
+fail_rollback:
+	spin_lock(&l_mg->close_lock);
+	pblk_dealloc_page(pblk, meta_line, rq_ppas);
+	list_add(&meta_line->list, &meta_line->list);
+	spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+	if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+		bio_put(bio);
+fail_free_rqd:
+	pblk_free_rqd(pblk, rqd, READ);
+	return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+			       int prev_n)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line *meta_line;
+
+	spin_lock(&l_mg->close_lock);
+retry:
+	if (list_empty(&l_mg->emeta_list)) {
+		spin_unlock(&l_mg->close_lock);
+		return 0;
+	}
+	meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+	if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+		goto retry;
+	spin_unlock(&l_mg->close_lock);
+
+	if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+		return 0;
+
+	return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct ppa_addr erase_ppa;
+	int err;
+
+	ppa_set_empty(&erase_ppa);
+
+	/* Assign lbas to ppas and populate request structure */
+	err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+	if (err) {
+		pr_err("pblk: could not setup write request: %d\n", err);
+		return NVM_IO_ERR;
+	}
+
+	if (likely(ppa_empty(erase_ppa))) {
+		/* Submit metadata write for previous data line */
+		err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+		if (err) {
+			pr_err("pblk: metadata I/O submission failed: %d", err);
+			return NVM_IO_ERR;
+		}
+
+		/* Submit data write for current data line */
+		err = pblk_submit_io(pblk, rqd);
+		if (err) {
+			pr_err("pblk: data I/O submission failed: %d\n", err);
+			return NVM_IO_ERR;
+		}
+	} else {
+		/* Submit data write for current data line */
+		err = pblk_submit_io(pblk, rqd);
+		if (err) {
+			pr_err("pblk: data I/O submission failed: %d\n", err);
+			return NVM_IO_ERR;
+		}
+
+		/* Submit available erase for next data line */
+		if (pblk_blk_erase_async(pblk, erase_ppa)) {
+			struct pblk_line *e_line = pblk_line_get_erase(pblk);
+			struct nvm_tgt_dev *dev = pblk->dev;
+			struct nvm_geo *geo = &dev->geo;
+			int bit;
+
+			atomic_inc(&e_line->left_eblks);
+			bit = pblk_ppa_to_pos(geo, erase_ppa);
+			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+		}
+	}
+
+	return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct bio *bio = rqd->bio;
+
+	if (c_ctx->nr_padded)
+		pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
 static int pblk_submit_write(struct pblk *pblk)
 {
 	struct bio *bio;
 	struct nvm_rq *rqd;
-	struct pblk_c_ctx *c_ctx;
-	unsigned int pgs_read;
 	unsigned int secs_avail, secs_to_sync, secs_to_com;
 	unsigned int secs_to_flush;
 	unsigned long pos;
-	int err;
 
 	/* If there are no sectors in the cache, flushes (bios without data)
 	 * will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
 		pr_err("pblk: cannot allocate write req.\n");
 		return 1;
 	}
-	c_ctx = nvm_rq_to_pdu(rqd);
 
 	bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
 	if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
 	secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
 	pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
 
-	pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
-						secs_to_sync, secs_avail);
-	if (!pgs_read) {
+	if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+								secs_avail)) {
 		pr_err("pblk: corrupted write bio\n");
 		goto fail_put_bio;
 	}
 
-	if (c_ctx->nr_padded)
-		if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
-			goto fail_put_bio;
-
-	/* Assign lbas to ppas and populate request structure */
-	err = pblk_setup_w_rq(pblk, rqd, c_ctx);
-	if (err) {
-		pr_err("pblk: could not setup write request\n");
+	if (pblk_submit_io_set(pblk, rqd))
 		goto fail_free_bio;
-	}
-
-	err = pblk_submit_io(pblk, rqd);
-	if (err) {
-		pr_err("pblk: I/O submission failed: %d\n", err);
-		goto fail_free_bio;
-	}
 
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
 	return 0;
 
 fail_free_bio:
-	if (c_ctx->nr_padded)
-		pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+	pblk_free_write_rqd(pblk, rqd);
 fail_put_bio:
 	bio_put(bio);
 fail_free_rqd:

diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186..1593138 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h

@@ -40,6 +40,12 @@
 #define PBLK_MAX_REQ_ADDRS (64)
 #define PBLK_MAX_REQ_ADDRS_PW (6)
 
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
 #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
 
 #define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
 	PBLK_BLK_ST_CLOSED =	0x2,
 };
 
+struct pblk_sec_meta {
+	u64 reserved;
+	__le64 lba;
+};
+
 /* The number of GC lists and the rate-limiter states go together. This way the
  * rate-limiter can dictate how much GC is needed based on resource utilization.
  */
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
 
 enum {
 	PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
 	PBLK_RL_LOW = 3,
 };
 
-struct pblk_sec_meta {
-	u64 reserved;
-	__le64 lba;
-};
-
 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
 
-/* write completion context */
+/* write buffer completion context */
 struct pblk_c_ctx {
 	struct list_head list;		/* Head for out-of-order completion */
 
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
 	unsigned int nr_padded;
 };
 
-/* Read context */
-struct pblk_r_ctx {
-	struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+	void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+	struct pblk *pblk;
+	struct completion wait;
+	struct kref ref;
 };
 
 /* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
 struct pblk_gc_rq {
 	struct pblk_line *line;
 	void *data;
-	u64 *lba_list;
+	u64 lba_list[PBLK_MAX_REQ_ADDRS];
 	int nr_secs;
 	int secs_to_gc;
 	struct list_head list;
 };
 
 struct pblk_gc {
+	/* These states are not protected by a lock since (i) they are in the
+	 * fast path, and (ii) they are not critical.
+	 */
 	int gc_active;
 	int gc_enabled;
 	int gc_forced;
-	int gc_jobs_active;
-	atomic_t inflight_gc;
 
 	struct task_struct *gc_ts;
 	struct task_struct *gc_writer_ts;
+	struct task_struct *gc_reader_ts;
+
+	struct workqueue_struct *gc_line_reader_wq;
 	struct workqueue_struct *gc_reader_wq;
+
 	struct timer_list gc_timer;
 
+	struct semaphore gc_sem;
+	atomic_t inflight_gc;
 	int w_entries;
+
 	struct list_head w_list;
+	struct list_head r_list;
 
 	spinlock_t lock;
 	spinlock_t w_lock;
+	spinlock_t r_lock;
 };
 
 struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
 				 */
 	unsigned int high_pw;	/* High rounded up as a power of 2 */
 
-#define PBLK_USER_HIGH_THRS 2	/* Begin write limit at 50 percent
-				 * available blks
-				 */
-#define PBLK_USER_LOW_THRS 20	/* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8	/* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10	/* Aggressive GC at 10% available blocks */
 
 	int rb_windows_pw;	/* Number of rate windows in the write buffer
 				 * given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
 				 */
 	int rb_budget;		/* Total number of entries available for I/O */
 	int rb_user_max;	/* Max buffer entries available for user I/O */
-	atomic_t rb_user_cnt;	/* User I/O buffer counter */
 	int rb_gc_max;		/* Max buffer entries available for GC I/O */
 	int rb_gc_rsv;		/* Reserved buffer entries for GC I/O */
 	int rb_state;		/* Rate-limiter current state */
+
+	atomic_t rb_user_cnt;	/* User I/O buffer counter */
 	atomic_t rb_gc_cnt;	/* GC I/O buffer counter */
+	atomic_t rb_space;	/* Space limit in case of reaching capacity */
+
+	int rsv_blocks;		/* Reserved blocks for GC */
 
 	int rb_user_active;
+	int rb_gc_active;
+
 	struct timer_list u_timer;
 
 	unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
 	atomic_t free_blocks;
 };
 
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
 #define PBLK_LINE_EMPTY (~0U)
 
 enum {
@@ -310,16 +334,19 @@ struct line_smeta {
 	__le32 window_wr_lun;	/* Number of parallel LUNs to write */
 
 	__le32 rsvd[2];
+
+	__le64 lun_bitmap[];
 };
 
 /*
- * Metadata Layout:
- *	1. struct pblk_emeta
- *	2. nr_lbas u64 forming lba list
- *	3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- *	4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- *	3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ *	First sector:
+ *		1. struct line_emeta
+ *		2. bad block bitmap (u64 * window_wr_lun)
+ *	Mid sectors (start at lbas_sector):
+ *		3. nr_lbas (u64) forming lba list
+ *	Last sectors (start at vsc_sector):
+ *		4. u32 valid sector count (vsc) for all lines (~0U: free line)
  */
 struct line_emeta {
 	struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
 	__le32 next_id;		/* Line id for next line */
 	__le64 nr_lbas;		/* Number of lbas mapped in line */
 	__le64 nr_valid_lbas;	/* Number of valid lbas mapped in line */
+	__le64 bb_bitmap[];	/* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+	struct line_emeta *buf;		/* emeta buffer in media format */
+	int mem;			/* Write offset - points to next
+					 * writable entry in memory
+					 */
+	atomic_t sync;			/* Synced - backpointer that signals the
+					 * last entry that has been successfully
+					 * persisted to media
+					 */
+	unsigned int nr_entries;	/* Number of emeta entries */
+};
+
+struct pblk_smeta {
+	struct line_smeta *buf;		/* smeta buffer in persistent format */
 };
 
 struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
 
 	unsigned long *lun_bitmap;	/* Bitmap for LUNs mapped in line */
 
-	struct line_smeta *smeta;	/* Start metadata */
-	struct line_emeta *emeta;	/* End metadata */
+	struct pblk_smeta *smeta;	/* Start metadata */
+	struct pblk_emeta *emeta;	/* End medatada */
+
 	int meta_line;			/* Metadata line id */
+	int meta_distance;		/* Distance between data and metadata */
+
 	u64 smeta_ssec;			/* Sector where smeta starts */
 	u64 emeta_ssec;			/* Sector where emeta starts */
 
@@ -374,9 +421,10 @@ struct pblk_line {
 	atomic_t left_seblks;		/* Blocks left for sync erasing */
 
 	int left_msecs;			/* Sectors left for mapping */
-	int left_ssecs;			/* Sectors left to sync */
 	unsigned int cur_sec;		/* Sector map pointer */
-	unsigned int vsc;		/* Valid sector count in line */
+	unsigned int nr_valid_lbas;	/* Number of valid lbas in line */
+
+	__le32 *vsc;			/* Valid sector count in line */
 
 	struct kref ref;		/* Write buffer L2P references */
 
@@ -385,13 +433,15 @@ struct pblk_line {
 
 #define PBLK_DATA_LINES 4
 
-enum{
+enum {
 	PBLK_KMALLOC_META = 1,
 	PBLK_VMALLOC_META = 2,
 };
 
-struct pblk_line_metadata {
-	void *meta;
+enum {
+	PBLK_EMETA_TYPE_HEADER = 1,	/* struct line_emeta first sector */
+	PBLK_EMETA_TYPE_LLBA = 2,	/* lba list - type: __le64 */
+	PBLK_EMETA_TYPE_VSC = 3,	/* vsc list - type: __le32 */
 };
 
 struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
 	struct list_head bad_list;	/* Full lines bad */
 
 	/* GC lists - use gc_lock */
-	struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+	struct list_head *gc_lists[PBLK_GC_NR_LISTS];
 	struct list_head gc_high_list;	/* Full lines ready to GC, high isc */
 	struct list_head gc_mid_list;	/* Full lines ready to GC, mid isc */
 	struct list_head gc_low_list;	/* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
 	struct pblk_line *log_next;	/* Next FTL log line */
 	struct pblk_line *data_next;	/* Next data line */
 
+	struct list_head emeta_list;	/* Lines queued to schedule emeta */
+
+	__le32 *vsc_list;		/* Valid sector counts for all lines */
+
 	/* Metadata allocation type: VMALLOC | KMALLOC */
-	int smeta_alloc_type;
 	int emeta_alloc_type;
 
 	/* Pre-allocated metadata for data lines */
-	struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
-	struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+	struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+	struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
 	unsigned long meta_bitmap;
 
 	/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
 	unsigned long l_seq_nr;		/* Log line unique sequence number */
 
 	spinlock_t free_lock;
+	spinlock_t close_lock;
 	spinlock_t gc_lock;
 };
 
 struct pblk_line_meta {
 	unsigned int smeta_len;		/* Total length for smeta */
-	unsigned int smeta_sec;		/* Sectors needed for smeta*/
-	unsigned int emeta_len;		/* Total length for emeta */
-	unsigned int emeta_sec;		/* Sectors needed for emeta*/
+	unsigned int smeta_sec;		/* Sectors needed for smeta */
+
+	unsigned int emeta_len[4];	/* Lengths for emeta:
+					 *  [0]: Total length
+					 *  [1]: struct line_emeta length
+					 *  [2]: L2P portion length
+					 *  [3]: vsc list length
+					 */
+	unsigned int emeta_sec[4];	/* Sectors needed for emeta. Same layout
+					 * as emeta_len
+					 */
+
 	unsigned int emeta_bb;		/* Boundary for bb that affects emeta */
+
+	unsigned int vsc_list_len;	/* Length for vsc list */
 	unsigned int sec_bitmap_len;	/* Length for sector bitmap in line */
 	unsigned int blk_bitmap_len;	/* Length for block bitmap in line */
 	unsigned int lun_bitmap_len;	/* Length for lun bitmap in line */
 
 	unsigned int blk_per_line;	/* Number of blocks in a full line */
 	unsigned int sec_per_line;	/* Number of sectors in a line */
+	unsigned int dsec_per_line;	/* Number of data sectors in a line */
 	unsigned int min_blk_line;	/* Min. number of good blocks in line */
 
 	unsigned int mid_thrs;		/* Threshold for GC mid list */
 	unsigned int high_thrs;		/* Threshold for GC high list */
+
+	unsigned int meta_distance;	/* Distance between data and metadata */
 };
 
 struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
 	u8	sec_offset;
 };
 
+enum {
+	PBLK_STATE_RUNNING = 0,
+	PBLK_STATE_STOPPING = 1,
+	PBLK_STATE_RECOVERING = 2,
+	PBLK_STATE_STOPPED = 3,
+};
+
 struct pblk {
 	struct nvm_tgt_dev *dev;
 	struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
 
 	struct pblk_rb rwb;
 
+	int state;			/* pblk line state */
+
 	int min_write_pgs; /* Minimum amount of pages required by controller */
 	int max_write_pgs; /* Maximum amount of pages supported by controller */
 	int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
 	/* pblk provisioning values. Used by rate limiter */
 	struct pblk_rl rl;
 
-	struct semaphore erase_sem;
+	int sec_per_write;
 
 	unsigned char instance_uuid[16];
 #ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
 	atomic_long_t req_writes;	/* Sectors stored on write buffer */
 	atomic_long_t sub_writes;	/* Sectors submitted from buffer */
 	atomic_long_t sync_writes;	/* Sectors synced to media */
-	atomic_long_t compl_writes;	/* Sectors completed in write bio */
 	atomic_long_t inflight_reads;	/* Inflight sector read requests */
+	atomic_long_t cache_reads;	/* Read requests that hit the cache */
 	atomic_long_t sync_reads;	/* Completed sector read requests */
 	atomic_long_t recov_writes;	/* Sectors submitted from recovery */
 	atomic_long_t recov_gc_writes;	/* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
 	atomic_long_t write_failed;
 	atomic_long_t erase_failed;
 
+	atomic_t inflight_io;		/* General inflight I/O counter */
+
 	struct task_struct *writer_ts;
 
 	/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
 	mempool_t *page_pool;
 	mempool_t *line_ws_pool;
 	mempool_t *rec_pool;
-	mempool_t *r_rq_pool;
+	mempool_t *g_rq_pool;
 	mempool_t *w_rq_pool;
 	mempool_t *line_meta_pool;
 
-	struct workqueue_struct *kw_wq;
+	struct workqueue_struct *close_wq;
+	struct workqueue_struct *bb_wq;
+
 	struct timer_list wtimer;
 
 	struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
 	struct work_struct ws;
 };
 
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
 #define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
 
 /*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
 			    struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
 			    unsigned int pos);
 struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
 
 void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-				 struct pblk_c_ctx *c_ctx,
-				 unsigned int pos,
-				 unsigned int nr_entries,
-				 unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+				 struct bio *bio, unsigned int pos,
+				 unsigned int nr_entries, unsigned int count);
 unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
 				      struct list_head *list,
 				      unsigned int max);
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-			u64 pos, int bio_iter);
+			struct ppa_addr ppa, int bio_iter);
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
 
 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
 unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
 
 unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
 unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
 
 int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
  * pblk core
  */
 struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			struct pblk_c_ctx *c_ctx);
 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
 void pblk_discard(struct pblk *pblk, struct bio *bio);
 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      unsigned int nr_secs, unsigned int len,
-			      gfp_t gfp_mask);
+			      int alloc_type, gfp_t gfp_mask);
 struct pblk_line *pblk_line_get(struct pblk *pblk);
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
 struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_is_full(struct pblk_line *line);
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
 void pblk_line_mark_bb(struct work_struct *work);
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-		      void (*work)(struct work_struct *));
+		      void (*work)(struct work_struct *),
+		      struct workqueue_struct *wq);
 u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+			 void *emeta_buf);
 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
 void pblk_line_put(struct kref *ref);
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
 void pblk_end_io_sync(struct nvm_rq *rqd);
 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
 		       int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-			     u64 paddr);
 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
 			 int nr_pages);
 void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+			   u64 paddr);
 void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
 void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
 			   struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
 /*
  * pblk read path
  */
+extern struct bio_set *pblk_bio_set;
 int pblk_submit_read(struct pblk *pblk, struct bio *bio);
 int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 			unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
  */
 void pblk_submit_rec(struct work_struct *work);
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
 __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
 int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 			struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 /*
  * pblk gc
  */
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8	/* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128	/* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4		/* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1	/* Reserved lines for GC */
 
 int pblk_gc_init(struct pblk *pblk);
 void pblk_gc_exit(struct pblk *pblk);
 void pblk_gc_should_start(struct pblk *pblk);
 void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
 			      int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
 
 /*
  * pblk rate limiter
  */
 void pblk_rl_init(struct pblk_rl *rl, int budget);
 void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
 unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
 
 /*
  * pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
 	return c_ctx - sizeof(struct nvm_rq);
 }
 
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
 {
-	return (emeta) + 1;
+	return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+	return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
+{
+	return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+	int vsc;
+
+	spin_lock(&line->lock);
+	vsc = le32_to_cpu(*line->vsc);
+	spin_unlock(&line->lock);
+
+	return vsc;
 }
 
 #define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
 	ppa_addr->ppa = ADDR_EMPTY;
 }
 
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+	if (lppa.ppa == rppa.ppa)
+		return true;
+
+	return false;
+}
+
 static inline int pblk_addr_in_cache(struct ppa_addr ppa)
 {
 	return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
 }
 
 static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
-					    struct line_smeta *smeta)
+					    struct line_header *header)
 {
 	u32 crc = ~(u32)0;
 
-	crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+	crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
 				sizeof(struct line_header) - sizeof(crc));
 
 	return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
 
 	crc = crc32_le(crc, (unsigned char *)emeta +
 				sizeof(struct line_header) + sizeof(crc),
-				lm->emeta_len -
+				lm->emeta_len[0] -
 				sizeof(struct line_header) - sizeof(crc));
 
 	return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
 	return flags;
 }
 
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+	PBLK_READ_RANDOM	= 0,
+	PBLK_READ_SEQUENTIAL	= 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
 {
-	return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int flags;
+
+	flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+	if (type == PBLK_READ_SEQUENTIAL)
+		flags |= geo->plane_mode >> 1;
+
+	return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
+{
+	return !(nr_secs % pblk->min_write_pgs);
 }
 
 #ifdef CONFIG_NVM_DEBUG

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a..267f01a 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c

@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
 {
 	struct completion *waiting = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
 
 	complete(waiting);
 }
@@ -359,7 +359,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 			goto finished;
 		}
 		wait_for_completion_io(&wait);
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			rrpc_inflight_laddr_release(rrpc, rqd);
 			goto finished;
 		}
@@ -385,7 +385,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
 		wait_for_completion_io(&wait);
 
 		rrpc_inflight_laddr_release(rrpc, rqd);
-		if (bio->bi_error)
+		if (bio->bi_status)
 			goto finished;
 
 		bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
 	struct nvm_rq *rqd;
 	int err;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (bio_op(bio) == REQ_OP_DISCARD) {
 		rrpc_discard(rrpc, bio);

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c..dee542f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h

@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
-			      int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+			      blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+		const char *);
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e8..866dcf7 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c

@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
 	bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct btree *b = container_of(cl, struct btree, io);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		set_btree_node_io_error(b);
 
-	bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+	bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
 	closure_put(cl);
 }
 

diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 9b80417..73da1f5 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h

@@ -207,7 +207,7 @@ void bkey_put(struct cache_set *c, struct bkey *k);
 
 struct btree_op {
 	/* for waiting on btree reserve in btree_split() */
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 
 	/* Btree level at which we start taking write locks */
 	short			lock;

diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f5505..35a5a72 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c

@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 	struct bio_vec bv, cbv;
 	struct bvec_iter iter, citer = { 0 };
 
-	check = bio_clone(bio, GFP_NOIO);
+	check = bio_clone_kmalloc(bio, GFP_NOIO);
 	if (!check)
 		return;
 	check->bi_opf = REQ_OP_READ;

diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88..6a9b850 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c

@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
 {
 	/*
 	 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
 }
 
 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
-			      int error, const char *m)
+			      blk_status_t error, const char *m)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 }
 
 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-		    int error, const char *m)
+		    blk_status_t error, const char *m)
 {
 	struct closure *cl = bio->bi_private;
 

diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53..0352d05 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c

@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
 {
 	struct journal_write *w = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, w->c, "journal io error");
+	cache_set_err_on(bio->bi_status, w->c, "journal io error");
 	closure_put(&w->c->journal.io);
 }
 

diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a90..f633b30 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c

@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
 	struct moving_io *io = container_of(bio->bi_private,
 					    struct moving_io, cl);
 
-	if (bio->bi_error)
-		io->op.error = bio->bi_error;
+	if (bio->bi_status)
+		io->op.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(io->op.c, &b->key, 0)) {
-		io->op.error = -EINTR;
+		io->op.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+	bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
 }
 
 static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
 	struct data_insert_op *op = &io->op;
 
-	if (!op->error) {
+	if (!op->status) {
 		moving_init(io);
 
 		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc..019b3df 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c

@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
 	if (ret == -ESRCH) {
 		op->replace_collision = true;
 	} else if (ret) {
-		op->error		= -ENOMEM;
+		op->status		= BLK_STS_RESOURCE;
 		op->insert_data_done	= true;
 	}
 
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* TODO: We could try to recover from this. */
 		if (op->writeback)
-			op->error = bio->bi_error;
+			op->status = bio->bi_status;
 		else if (!op->replace)
 			set_closure_fn(cl, bch_data_insert_error, op->wq);
 		else
 			set_closure_fn(cl, NULL, NULL);
 	}
 
-	bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+	bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
 static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
 	 * from the backing device.
 	 */
 
-	if (bio->bi_error)
-		s->iop.error = bio->bi_error;
+	if (bio->bi_status)
+		s->iop.status = bio->bi_status;
 	else if (!KEY_DIRTY(&b->key) &&
 		 ptr_stale(s->iop.c, &b->key, 0)) {
 		atomic_long_inc(&s->iop.c->cache_read_races);
-		s->iop.error = -EINTR;
+		s->iop.status = BLK_STS_IOERR;
 	}
 
-	bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+	bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
 }
 
 /*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct search *s = container_of(cl, struct search, cl);
-		s->iop.error = bio->bi_error;
+		s->iop.status = bio->bi_status;
 		/* Only cache read errors are recoverable */
 		s->recoverable = false;
 	}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
-		s->orig_bio->bi_error = s->iop.error;
+		s->orig_bio->bi_status = s->iop.status;
 		bio_endio(s->orig_bio);
 		s->orig_bio = NULL;
 	}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.inode		= d->id;
 	s->iop.write_point	= hash_long((unsigned long) current, 16);
 	s->iop.write_prio	= 0;
-	s->iop.error		= 0;
+	s->iop.status		= 0;
 	s->iop.flags		= 0;
 	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
 	s->iop.wq		= bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
 		/* Retry from the backing device: */
 		trace_bcache_read_retry(s->orig_bio);
 
-		s->iop.error = 0;
+		s->iop.status = 0;
 		do_bio_hook(s, s->orig_bio);
 
 		/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
 				  !s->cache_miss, s->iop.bypass);
 	trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
 
-	if (s->iop.error)
+	if (s->iop.status)
 		continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
 	else if (s->iop.bio || verify(dc, &s->bio.bio))
 		continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);

diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff3687..7689176 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h

@@ -10,7 +10,7 @@ struct data_insert_op {
 	unsigned		inode;
 	uint16_t		write_point;
 	uint16_t		write_prio;
-	short			error;
+	blk_status_t		status;
 
 	union {
 		uint16_t	flags;

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e..8352fad 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c

@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+	bch_count_io_errors(ca, bio->bi_status, "writing superblock");
 	closure_put(&ca->set->sb_write);
 }
 
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
 	struct closure *cl = bio->bi_private;
 	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
-	cache_set_err_on(bio->bi_error, c, "accessing uuids");
+	cache_set_err_on(bio->bi_status, c, "accessing uuids");
 	bch_bbio_free(bio, c);
 	closure_put(cl);
 }
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
 {
 	struct cache *ca = bio->bi_private;
 
-	cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+	cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
 	bch_bbio_free(bio, ca->set);
 	closure_put(&ca->prio);
 }
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	minor *= BCACHE_MINORS;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_minor, minor);
 		return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+					   BIOSET_NEED_BVECS |
+					   BIOSET_NEED_RESCUER)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48..42c66e7 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c

@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
 	struct keybuf_key *w = bio->bi_private;
 	struct dirty_io *io = w->private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		SET_KEY_DIRTY(&w->key, false);
 
 	closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
 	struct dirty_io *io = w->private;
 
 	bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-			    bio->bi_error, "reading dirty data from cache");
+			    bio->bi_status, "reading dirty data from cache");
 
 	dirty_endio(bio);
 }

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index bf7419a..f4eace5 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c

@@ -485,10 +485,10 @@ void bitmap_print_sb(struct bitmap *bitmap)
 	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
 	pr_debug("       version: %d\n", le32_to_cpu(sb->version));
 	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
-		 *(__u32 *)(sb->uuid+0),
-		 *(__u32 *)(sb->uuid+4),
-		 *(__u32 *)(sb->uuid+8),
-		 *(__u32 *)(sb->uuid+12));
+		 le32_to_cpu(*(__u32 *)(sb->uuid+0)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+4)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+8)),
+		 le32_to_cpu(*(__u32 *)(sb->uuid+12)));
 	pr_debug("        events: %llu\n",
 		 (unsigned long long) le64_to_cpu(sb->events));
 	pr_debug("events cleared: %llu\n",

diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c..82d2738 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c

@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error)
+		   struct dm_bio_prison_cell *cell, blk_status_t error)
 {
 	struct bio_list bios;
 	struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
 	dm_cell_release(prison, cell, &bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }

diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac..cec52ac 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h

@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 			       struct dm_bio_prison_cell *cell,
 			       struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-		   struct dm_bio_prison_cell *cell, int error);
+		   struct dm_bio_prison_cell *cell, blk_status_t error);
 
 /*
  * Visits the cell and then releases.  Guarantees no new inmates are

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cd81395..850ff6c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c

@@ -145,8 +145,8 @@ struct dm_buffer {
 	enum data_mode data_mode;
 	unsigned char list_mode;		/* LIST_* */
 	unsigned hold_count;
-	int read_error;
-	int write_error;
+	blk_status_t read_error;
+	blk_status_t write_error;
 	unsigned long state;
 	unsigned long last_accessed;
 	struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
 {
 	struct dm_buffer *b = context;
 
-	b->bio.bi_error = error ? -EIO : 0;
+	b->bio.bi_status = error ? BLK_STS_IOERR : 0;
 	b->bio.bi_end_io(&b->bio);
 }
 
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
 	r = dm_io(&io_req, 1, &region, NULL);
 	if (r) {
-		b->bio.bi_error = r;
+		b->bio.bi_status = errno_to_blk_status(r);
 		end_io(&b->bio);
 	}
 }
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 static void inline_endio(struct bio *bio)
 {
 	bio_end_io_t *end_fn = bio->bi_private;
-	int error = bio->bi_error;
+	blk_status_t status = bio->bi_status;
 
 	/*
 	 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
 	 */
 	bio_reset(bio);
 
-	bio->bi_error = error;
+	bio->bi_status = status;
 	end_fn(bio);
 }
 
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->write_error = bio->bi_error;
-	if (unlikely(bio->bi_error)) {
+	b->write_error = bio->bi_status;
+	if (unlikely(bio->bi_status)) {
 		struct dm_bufio_client *c = b->c;
-		int error = bio->bi_error;
-		(void)cmpxchg(&c->async_write_error, 0, error);
+
+		(void)cmpxchg(&c->async_write_error, 0,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
 {
 	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-	b->read_error = bio->bi_error;
+	b->read_error = bio->bi_status;
 
 	BUG_ON(!test_bit(B_READING, &b->state));
 
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
-		int error = b->read_error;
+		int error = blk_status_to_errno(b->read_error);
 
 		dm_bufio_release(b);
 
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
  */
 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 {
-	int a, f;
+	blk_status_t a;
+	int f;
 	unsigned long buffers_processed = 0;
 	struct dm_buffer *b, *tmp;
 
@@ -1334,7 +1336,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = REQ_PREFLUSH,
+		.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = c->dm_io,

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a05..c5ea03f 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c

@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct continuation {
 	struct work_struct ws;
-	int input;
+	blk_status_t input;
 };
 
 static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
 	/*
 	 * The operation that everyone is waiting for.
 	 */
-	int (*commit_op)(void *context);
+	blk_status_t (*commit_op)(void *context);
 	void *commit_context;
 
 	/*
@@ -171,8 +171,7 @@ struct batcher {
 static void __commit(struct work_struct *_ws)
 {
 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
-	int r;
+	blk_status_t r;
 	unsigned long flags;
 	struct list_head work_items;
 	struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
 
 	while ((bio = bio_list_pop(&bios))) {
 		if (r) {
-			bio->bi_error = r;
+			bio->bi_status = r;
 			bio_endio(bio);
 		} else
 			b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
 }
 
 static void batcher_init(struct batcher *b,
-			 int (*commit_op)(void *),
+			 blk_status_t (*commit_op)(void *),
 			 void *commit_context,
 			 void (*issue_op)(struct bio *bio, void *),
 			 void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bio_endio(bio);
 		return;
 	}
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
 
 	if (read_err || write_err)
-		mg->k.input = -EIO;
+		mg->k.input = BLK_STS_IOERR;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
 
 	dm_unhook_bio(&pb->hook_info, bio);
 
-	if (bio->bi_error)
-		mg->k.input = bio->bi_error;
+	if (bio->bi_status)
+		mg->k.input = bio->bi_status;
 
 	queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
 		if (mg->overwrite_bio) {
 			if (success)
 				force_set_dirty(cache, cblock);
+			else if (mg->k.input)
+				mg->overwrite_bio->bi_status = mg->k.input;
 			else
-				mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+				mg->overwrite_bio->bi_status = BLK_STS_IOERR;
 			bio_endio(mg->overwrite_bio);
 		} else {
 			if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
 		r = copy(mg, is_policy_promote);
 		if (r) {
 			DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-			mg->k.input = -EIO;
+			mg->k.input = BLK_STS_IOERR;
 			mg_complete(mg, false);
 		}
 	}
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
 /*
  * Used by the batcher.
  */
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
 {
 	struct cache *cache = context;
 
 	if (dm_cache_changed_this_transaction(cache->cmd))
-		return commit(cache, false);
+		return errno_to_blk_status(commit(cache, false));
 
 	return 0;
 }
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
 	bio_list_init(&cache->deferred_bios);
 
 	while ((bio = bio_list_pop(&bios))) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 	}
 }
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct cache *cache = ti->private;
 	unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 	bio_drop_shared_lock(cache, bio);
 	accounted_complete(cache, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static int write_dirty_bitset(struct cache *cache)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72..9e1b72e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c

@@ -71,7 +71,7 @@ struct dm_crypt_io {
 	struct convert_context ctx;
 
 	atomic_t io_pending;
-	int error;
+	blk_status_t error;
 	sector_t sector;
 
 	struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
 	unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
 		 */
 		case -EBADMSG:
 			atomic_dec(&ctx->cc_pending);
-			return -EILSEQ;
+			return BLK_STS_PROTECTION;
 		/*
 		 * There was an error while processing the request.
 		 */
 		default:
 			atomic_dec(&ctx->cc_pending);
-			return -EIO;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *base_bio = io->base_bio;
-	int error = io->error;
+	blk_status_t error = io->error;
 
 	if (!atomic_dec_and_test(&io->io_pending))
 		return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 	else
 		kfree(io->integrity_metadata);
 
-	base_bio->bi_error = error;
+	base_bio->bi_status = error;
 	bio_endio(base_bio);
 }
 
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
 	struct dm_crypt_io *io = clone->bi_private;
 	struct crypt_config *cc = io->cc;
 	unsigned rw = bio_data_dir(clone);
-	int error;
+	blk_status_t error;
 
 	/*
 	 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
 	if (rw == WRITE)
 		crypt_free_buffer_pages(cc, clone);
 
-	error = clone->bi_error;
+	error = clone->bi_status;
 	bio_put(clone);
 
 	if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
 
 	crypt_inc_pending(io);
 	if (kcryptd_io_read(io, GFP_NOIO))
-		io->error = -ENOMEM;
+		io->error = BLK_STS_RESOURCE;
 	crypt_dec_pending(io);
 }
 
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 	sector_t sector;
 	struct rb_node **rbp, *parent;
 
-	if (unlikely(io->error < 0)) {
+	if (unlikely(io->error)) {
 		crypt_free_buffer_pages(cc, clone);
 		bio_put(clone);
 		crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	struct bio *clone;
 	int crypt_finished;
 	sector_t sector = io->sector;
-	int r;
+	blk_status_t r;
 
 	/*
 	 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
 	if (unlikely(!clone)) {
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 		goto dec;
 	}
 
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
-	int r = 0;
+	blk_status_t r;
 
 	crypt_inc_pending(io);
 
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 			   io->sector);
 
 	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
+	if (r)
 		io->error = r;
 
 	if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 	if (error == -EBADMSG) {
 		DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
 			    (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
-		io->error = -EILSEQ;
+		io->error = BLK_STS_PROTECTION;
 	} else if (error < 0)
-		io->error = -EIO;
+		io->error = BLK_STS_IOERR;
 
 	crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0);
+	cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+					    BIOSET_NEED_RESCUER));
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	 * and is aligned to this size as defined in IO hints.
 	 */
 	if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	io = dm_per_bio_data(bio, cc->per_bio_data_size);
 	crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));

diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a1..3d04d5c 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c

@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 		if (bio_data_dir(bio) == READ) {
 			if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
 			    !test_bit(ERROR_WRITES, &fc->flags))
-				return -EIO;
+				return DM_MAPIO_KILL;
 			goto map_bio;
 		}
 
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 		/*
 		 * By default, error all I/O.
 		 */
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 map_bio:
@@ -358,12 +358,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-	if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+	if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
 		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
 		    all_corrupt_bio_flags_match(bio, fc)) {
 			/*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 			 * Error read during the down_interval if drop_writes
 			 * and error_writes were not configured.
 			 */
-			return -EIO;
+			*error = BLK_STS_IOERR;
 		}
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static void flakey_status(struct dm_target *ti, status_type_t type,

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index c7f7c8d..1b224aa 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c

@@ -246,7 +246,7 @@ struct dm_integrity_io {
 	unsigned metadata_offset;
 
 	atomic_t in_flight;
-	int bi_error;
+	blk_status_t bi_status;
 
 	struct completion *completion;
 
@@ -783,7 +783,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi
 			for (i = 0; i < commit_sections; i++)
 				rw_section_mac(ic, commit_start + i, true);
 		}
-		rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp);
+		rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start,
+			   commit_sections, &io_comp);
 	} else {
 		unsigned to_end;
 		io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
@@ -1104,18 +1105,21 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
 {
 	struct bio *bio;
-	spin_lock_irq(&ic->endio_wait.lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ic->endio_wait.lock, flags);
 	bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 	bio_list_add(&ic->flush_bio_list, bio);
-	spin_unlock_irq(&ic->endio_wait.lock);
+	spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
 	queue_work(ic->commit_wq, &ic->commit_work);
 }
 
 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
 {
 	int r = dm_integrity_failed(ic);
-	if (unlikely(r) && !bio->bi_error)
-		bio->bi_error = r;
+	if (unlikely(r) && !bio->bi_status)
+		bio->bi_status = errno_to_blk_status(r);
 	bio_endio(bio);
 }
 
@@ -1123,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
 {
 	struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-	if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+	if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
 		submit_flush_bio(ic, dio);
 	else
 		do_endio(ic, bio);
@@ -1142,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
 
 		bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-		if (unlikely(dio->bi_error) && !bio->bi_error)
-			bio->bi_error = dio->bi_error;
-		if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+		if (unlikely(dio->bi_status) && !bio->bi_status)
+			bio->bi_status = dio->bi_status;
+		if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
 			dio->range.logical_sector += dio->range.n_sectors;
 			bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
 			INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1318,7 +1322,7 @@ static void integrity_metadata(struct work_struct *w)
 	dec_in_flight(dio);
 	return;
 error:
-	dio->bi_error = r;
+	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
 }
 
@@ -1331,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 	sector_t area, offset;
 
 	dio->ic = ic;
-	dio->bi_error = 0;
+	dio->bi_status = 0;
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
 		submit_flush_bio(ic, dio);
@@ -1352,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 		DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
 		      (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
 		      (unsigned long long)ic->provided_data_sectors);
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 	if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
 		DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
 		      ic->sectors_per_block,
 		      (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (ic->sectors_per_block > 1) {
@@ -1368,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 			if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
 				DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
 					bv.bv_offset, bv.bv_len, ic->sectors_per_block);
-				return -EIO;
+				return DM_MAPIO_KILL;
 			}
 		}
 	}
@@ -1383,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
 				wanted_tag_size *= ic->tag_size;
 			if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
 				DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
-				return -EIO;
+				return DM_MAPIO_KILL;
 			}
 		}
 	} else {
 		if (unlikely(bip != NULL)) {
 			DMERR("Unexpected integrity data when using internal hash");
-			return -EIO;
+			return DM_MAPIO_KILL;
 		}
 	}
 
 	if (unlikely(ic->mode == 'R') && unlikely(dio->write))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
 	dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
@@ -2374,21 +2378,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
 	blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
 }
 
-/* FIXME: use new kvmalloc */
-static void *dm_integrity_kvmalloc(size_t size, gfp_t gfp)
-{
-	void *ptr = NULL;
-
-	if (size <= PAGE_SIZE)
-		ptr = kmalloc(size, GFP_KERNEL | gfp);
-	if (!ptr && size <= KMALLOC_MAX_SIZE)
-		ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | gfp);
-	if (!ptr)
-		ptr = __vmalloc(size, GFP_KERNEL | gfp, PAGE_KERNEL);
-
-	return ptr;
-}
-
 static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
 {
 	unsigned i;
@@ -2407,7 +2396,7 @@ static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
 	struct page_list *pl;
 	unsigned i;
 
-	pl = dm_integrity_kvmalloc(page_list_desc_size, __GFP_ZERO);
+	pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
 	if (!pl)
 		return NULL;
 
@@ -2437,7 +2426,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
 	struct scatterlist **sl;
 	unsigned i;
 
-	sl = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), __GFP_ZERO);
+	sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO);
 	if (!sl)
 		return NULL;
 
@@ -2453,7 +2442,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int
 
 		n_pages = (end_index - start_index + 1);
 
-		s = dm_integrity_kvmalloc(n_pages * sizeof(struct scatterlist), 0);
+		s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL);
 		if (!s) {
 			dm_integrity_free_journal_scatterlist(ic, sl);
 			return NULL;
@@ -2617,7 +2606,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 				goto bad;
 			}
 
-			sg = dm_integrity_kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), 0);
+			sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL);
 			if (!sg) {
 				*error = "Unable to allocate sg list";
 				r = -ENOMEM;
@@ -2673,7 +2662,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 				r = -ENOMEM;
 				goto bad;
 			}
-			ic->sk_requests = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), __GFP_ZERO);
+			ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO);
 			if (!ic->sk_requests) {
 				*error = "Unable to allocate sk requests";
 				r = -ENOMEM;
@@ -2740,7 +2729,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 		r = -ENOMEM;
 		goto bad;
 	}
-	ic->journal_tree = dm_integrity_kvmalloc(journal_tree_size, 0);
+	ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
 	if (!ic->journal_tree) {
 		*error = "Could not allocate memory for journal tree";
 		r = -ENOMEM;
@@ -3054,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		ti->error = "The device is too small";
 		goto bad;
 	}
+	if (ti->len > ic->provided_data_sectors) {
+		r = -EINVAL;
+		ti->error = "Not enough provided sectors for requested mapping size";
+		goto bad;
+	}
 
 	if (!buffer_sectors)
 		buffer_sectors = 1;

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3702e50..2503960 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c

@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0);
+	client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+						  BIOSET_NEED_RESCUER));
 	if (!client->bios)
 		goto bad;
 
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
 	fn(error_bits, context);
 }
 
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
 	if (error)
 		set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
 {
 	struct io *io;
 	unsigned region;
-	int error;
+	blk_status_t error;
 
-	if (bio->bi_error && bio_data_dir(bio) == READ)
+	if (bio->bi_status && bio_data_dir(bio) == READ)
 		zero_fill_bio(bio);
 
 	/*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
 	 */
 	retrieve_io_and_region_from_bio(bio, &io, &region);
 
-	error = bio->bi_error;
+	error = bio->bi_status;
 	bio_put(bio);
 
 	dec_count(io, region, error);
@@ -317,9 +318,9 @@ static void do_region(int op, int op_flags, unsigned region,
 	else if (op == REQ_OP_WRITE_SAME)
 		special_cmd_max_sectors = q->limits.max_write_same_sectors;
 	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
-	     op == REQ_OP_WRITE_SAME)  &&
-	    special_cmd_max_sectors == 0) {
-		dec_count(io, region, -EOPNOTSUPP);
+	     op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+		atomic_inc(&io->count);
+		dec_count(io, region, BLK_STS_NOTSUPP);
 		return;
 	}
 

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0555b44..41852ae 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c

@@ -1710,12 +1710,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
 	}
 
 	/*
-	 * Try to avoid low memory issues when a device is suspended.
+	 * Use __GFP_HIGH to avoid low memory issues when a device is
+	 * suspended and the ioctl is needed to resume it.
 	 * Use kmalloc() rather than vmalloc() when we can.
 	 */
 	dmi = NULL;
 	noio_flag = memalloc_noio_save();
-	dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL);
+	dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH);
 	memalloc_noio_restore(noio_flag);
 
 	if (!dmi) {

diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe386..a1da0eb 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c

@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
 {
 	struct log_writes_c *lc = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		unsigned long flags;
 
-		DMERR("Error writing log block, error=%d", bio->bi_error);
+		DMERR("Error writing log block, error=%d", bio->bi_status);
 		spin_lock_irqsave(&lc->blocks_lock, flags);
 		lc->logging_enabled = false;
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 		spin_lock_irq(&lc->blocks_lock);
 		lc->logging_enabled = false;
 		spin_unlock_irq(&lc->blocks_lock);
-		return -ENOMEM;
+		return DM_MAPIO_KILL;
 	}
 	INIT_LIST_HEAD(&block->list);
 	pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 			spin_lock_irq(&lc->blocks_lock);
 			lc->logging_enabled = false;
 			spin_unlock_irq(&lc->blocks_lock);
-			return -ENOMEM;
+			return DM_MAPIO_KILL;
 		}
 
 		src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct log_writes_c *lc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
 		spin_unlock_irqrestore(&lc->blocks_lock, flags);
 	}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 /*

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3df056b..0e8ab5b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c

@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
 		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
 			return DM_MAPIO_REQUEUE;
 		dm_report_EIO(m);
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	mpio->pgpath = pgpath;
 	mpio->nr_bytes = nr_bytes;
 
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	bio->bi_bdev = pgpath->path.dev->bdev;
 	bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
 	blk_start_plug(&plug);
 	while ((bio = bio_list_pop(&bios))) {
 		r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
-		if (r < 0 || r == DM_MAPIO_REQUEUE) {
-			bio->bi_error = r;
+		switch (r) {
+		case DM_MAPIO_KILL:
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
-		} else if (r == DM_MAPIO_REMAPPED)
+			break;
+		case DM_MAPIO_REQUEUE:
+			bio->bi_status = BLK_STS_DM_REQUEUE;
+			bio_endio(bio);
+			break;
+		case DM_MAPIO_REMAPPED:
 			generic_make_request(bio);
+			break;
+		}
 	}
 	blk_finish_plug(&plug);
 }
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
 	activate_or_offline_path(pgpath);
 }
 
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
 {
 	switch (error) {
-	case -EBADE:
-		/*
-		 * EBADE signals an reservation conflict.
-		 * We shouldn't fail the path here as we can communicate with
-		 * the target.  We should failover to the next path, but in
-		 * doing so we might be causing a ping-pong between paths.
-		 * So just return the reservation conflict error.
-		 */
-	case -EOPNOTSUPP:
-	case -EREMOTEIO:
-	case -EILSEQ:
-	case -ENODATA:
-	case -ENOSPC:
+	case BLK_STS_NOTSUPP:
+	case BLK_STS_NOSPC:
+	case BLK_STS_TARGET:
+	case BLK_STS_NEXUS:
+	case BLK_STS_MEDIUM:
+	case BLK_STS_RESOURCE:
 		return 1;
 	}
 
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
 }
 
 static int multipath_end_io(struct dm_target *ti, struct request *clone,
-			    int error, union map_info *map_context)
+			    blk_status_t error, union map_info *map_context)
 {
 	struct dm_mpath_io *mpio = get_mpio(map_context);
 	struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
 		if (atomic_read(&m->nr_valid_paths) == 0 &&
 		    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (error == -EIO)
+			if (error == BLK_STS_IOERR)
 				dm_report_EIO(m);
 			/* complete with the original error */
 			r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 	return r;
 }
 
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
-			 int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+		blk_status_t *error)
 {
+	struct multipath *m = ti->private;
+	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+	struct pgpath *pgpath = mpio->pgpath;
 	unsigned long flags;
+	int r = DM_ENDIO_DONE;
 
-	if (!error)
-		return 0;	/* I/O complete */
+	if (!*error || noretry_error(*error))
+		goto done;
 
-	if (noretry_error(error))
-		return error;
-
-	if (mpio->pgpath)
-		fail_path(mpio->pgpath);
+	if (pgpath)
+		fail_path(pgpath);
 
 	if (atomic_read(&m->nr_valid_paths) == 0 &&
 	    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 		dm_report_EIO(m);
-		return -EIO;
+		*error = BLK_STS_IOERR;
+		goto done;
 	}
 
 	/* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
 	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
 		queue_work(kmultipathd, &m->process_queued_bios);
 
-	return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
-	struct multipath *m = ti->private;
-	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
-	struct pgpath *pgpath;
-	struct path_selector *ps;
-	int r;
-
-	BUG_ON(!mpio);
-
-	r = do_end_io_bio(m, clone, error, mpio);
-	pgpath = mpio->pgpath;
+	r = DM_ENDIO_INCOMPLETE;
+done:
 	if (pgpath) {
-		ps = &pgpath->pg->ps;
+		struct path_selector *ps = &pgpath->pg->ps;
+
 		if (ps->type->end_io)
 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
 	}

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7d89322..b4b75da 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c

@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
 	/********************************************************************
 	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
 	 *
-	 * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
 	 */
 
 	__le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
 	sb->layout = cpu_to_le32(mddev->layout);
 	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
 
+	/********************************************************************
+	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+	 *
+	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+	 */
 	sb->new_level = cpu_to_le32(mddev->new_level);
 	sb->new_layout = cpu_to_le32(mddev->new_layout);
 	sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
 	mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
 
 	if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
-		/* Retrieve device size stored in superblock to be prepared for shrink */
-		rdev->sectors = le64_to_cpu(sb->sectors);
+		/*
+		 * Retrieve rdev size stored in superblock to be prepared for shrink.
+		 * Check extended superblock members are present otherwise the size
+		 * will not be set!
+		 */
+		if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+			rdev->sectors = le64_to_cpu(sb->sectors);
+
 		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
 		if (rdev->recovery_offset == MaxSector)
 			set_bit(In_sync, &rdev->flags);

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index a95cbb8..a4fbd91 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c

@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
 
 struct dm_raid1_bio_record {
 	struct mirror *m;
+	/* if details->bi_bdev == NULL, details were not saved */
 	struct dm_bio_details details;
 	region_t write_region;
 };
@@ -260,7 +261,7 @@ static int mirror_flush(struct dm_target *ti)
 	struct mirror *m;
 	struct dm_io_request io_req = {
 		.bi_op = REQ_OP_WRITE,
-		.bi_op_flags = REQ_PREFLUSH,
+		.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = NULL,
 		.client = ms->io_client,
@@ -490,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
 		 * If device is suspended, complete the bio.
 		 */
 		if (dm_noflush_suspending(ms->ti))
-			bio->bi_error = DM_ENDIO_REQUEUE;
+			bio->bi_status = BLK_STS_DM_REQUEUE;
 		else
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 		bio_endio(bio);
 		return;
@@ -626,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
 	 * degrade the array.
 	 */
 	if (bio_op(bio) == REQ_OP_DISCARD) {
-		bio->bi_error = -EOPNOTSUPP;
+		bio->bi_status = BLK_STS_NOTSUPP;
 		bio_endio(bio);
 		return;
 	}
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	struct dm_raid1_bio_record *bio_record =
 	  dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
 
+	bio_record->details.bi_bdev = NULL;
+
 	if (rw == WRITE) {
 		/* Save region for mirror_end_io() handler */
 		bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1207,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 
 	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
 	if (r < 0 && r != -EWOULDBLOCK)
-		return r;
+		return DM_MAPIO_KILL;
 
 	/*
 	 * If region is not in-sync queue the bio.
 	 */
 	if (!r || (r == -EWOULDBLOCK)) {
 		if (bio->bi_opf & REQ_RAHEAD)
-			return -EWOULDBLOCK;
+			return DM_MAPIO_KILL;
 
 		queue_bio(ms, bio, rw);
 		return DM_MAPIO_SUBMITTED;
@@ -1226,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	 */
 	m = choose_mirror(ms, bio->bi_iter.bi_sector);
 	if (unlikely(!m))
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	dm_bio_record(&bio_record->details, bio);
 	bio_record->m = m;
@@ -1236,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	int rw = bio_data_dir(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1252,16 +1256,26 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		if (!(bio->bi_opf & REQ_PREFLUSH) &&
 		    bio_op(bio) != REQ_OP_DISCARD)
 			dm_rh_dec(ms->rh, bio_record->write_region);
-		return error;
+		return DM_ENDIO_DONE;
 	}
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == BLK_STS_NOTSUPP)
+		goto out;
 
-	if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-		return error;
+	if (bio->bi_opf & REQ_RAHEAD)
+		goto out;
 
-	if (unlikely(error)) {
+	if (unlikely(*error)) {
+		if (!bio_record->details.bi_bdev) {
+			/*
+			 * There wasn't enough memory to record necessary
+			 * information for a retry or there was no other
+			 * mirror in-sync.
+			 */
+			DMERR_LIMIT("Mirror read failed.");
+			return DM_ENDIO_DONE;
+		}
+
 		m = bio_record->m;
 
 		DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,7 +1291,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 			bd = &bio_record->details;
 
 			dm_bio_restore(bd, bio);
-			bio->bi_error = 0;
+			bio_record->details.bi_bdev = NULL;
+			bio->bi_status = 0;
 
 			queue_bio(ms, bio, rw);
 			return DM_ENDIO_INCOMPLETE;
@@ -1285,7 +1300,10 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 		DMERR("All replicated volumes dead, failing I/O");
 	}
 
-	return error;
+out:
+	bio_record->details.bi_bdev = NULL;
+
+	return DM_ENDIO_DONE;
 }
 
 static void mirror_presuspend(struct dm_target *ti)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7..c6ebc5b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c

@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
 
 static void dm_mq_start_queue(struct request_queue *q)
 {
-	blk_mq_start_stopped_hw_queues(q, true);
+	blk_mq_unquiesce_queue(q);
 	blk_mq_kick_requeue_list(q);
 }
 
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
 	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-	int error = clone->bi_error;
+	blk_status_t error = clone->bi_status;
 
 	bio_put(clone);
 
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
 	 * Do not use blk_end_request() here, because it may complete
 	 * the original request before the clone, and break the ordering.
 	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
+	blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
 }
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  * Must be called without clone's queue lock held,
  * see end_clone_request() for more details.
  */
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
 {
 	int rw = rq_data_dir(clone);
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	rq_completed(md, rw, false);
 }
 
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 {
 	int r = DM_ENDIO_DONE;
 	struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 			r = rq_end_io(tio->ti, clone, error, &tio->info);
 	}
 
-	if (unlikely(error == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (req_op(clone) == REQ_OP_WRITE_SAME &&
 		    !clone->q->limits.max_write_same_sectors)
 			disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
  * Complete the clone and the original request with the error status
  * through softirq context.
  */
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Target's rq_end_io() function isn't called.
  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 {
 	rq->rq_flags |= RQF_FAILED;
 	dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 /*
  * Called with the clone's queue lock held (in the case of .request_fn)
  */
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
 
 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
-	int r;
+	blk_status_t r;
 
 	if (blk_queue_io_stat(clone->q))
 		clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
 		break;
 	case DM_MAPIO_KILL:
 		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(rq, -EIO);
+		dm_kill_unmapped_request(rq, BLK_STS_IOERR);
 		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	return __dm_rq_init_rq(set->driver_data, rq);
 }
 
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	}
 
 	if (ti->type->busy && ti->type->busy(ti))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	dm_start_request(md, rq);
 
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
 		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops dm_mq_ops = {

diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d2..9813922 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h

@@ -24,7 +24,7 @@ struct dm_rq_target_io {
 	struct dm_target *ti;
 	struct request *orig, *clone;
 	struct kthread_work work;
-	int error;
+	blk_status_t error;
 	union map_info info;
 	struct dm_stats_aux stats_aux;
 	unsigned long duration_jiffies;

diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index b93476c..c5534d2 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c

@@ -741,7 +741,8 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 	/*
 	 * Commit exceptions to disk.
 	 */
-	if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA))
+	if (ps->valid && area_io(ps, REQ_OP_WRITE,
+				 REQ_PREFLUSH | REQ_FUA | REQ_SYNC))
 		ps->valid = 0;
 
 	/*

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d98..1ba4104 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c

@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
 {
 	void *callback_data = bio->bi_private;
 
-	dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+	dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
 }
 
 static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 	/* Full snapshots are not usable */
 	/* To get here the table must be live so s->active is always set. */
 	if (!s->valid)
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	/* FIXME: should only take write lock if we need
 	 * to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
 	if (!s->valid || (unlikely(s->snapshot_overflowed) &&
 	    bio_data_dir(bio) == WRITE)) {
-		r = -EIO;
+		r = DM_MAPIO_KILL;
 		goto out_unlock;
 	}
 
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
 			if (!s->valid || s->snapshot_overflowed) {
 				free_pending_exception(pe);
-				r = -EIO;
+				r = DM_MAPIO_KILL;
 				goto out_unlock;
 			}
 
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 					DMERR("Snapshot overflowed: Unable to allocate exception.");
 				} else
 					__invalidate_snapshot(s, -ENOMEM);
-				r = -EIO;
+				r = DM_MAPIO_KILL;
 				goto out_unlock;
 			}
 		}
@@ -1851,14 +1851,15 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	struct dm_snapshot *s = ti->private;
 
 	if (is_bio_tracked(bio))
 		stop_tracking_chunk(s, bio);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void snapshot_merge_presuspend(struct dm_target *ti)

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 7515248..11621a0 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c

@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
 	}
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+		blk_status_t *error)
 {
 	unsigned i;
 	char major_minor[16];
 	struct stripe_c *sc = ti->private;
 
-	if (!error)
-		return 0; /* I/O complete */
+	if (!*error)
+		return DM_ENDIO_DONE; /* I/O complete */
 
-	if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-		return error;
+	if (bio->bi_opf & REQ_RAHEAD)
+		return DM_ENDIO_DONE;
 
-	if (error == -EOPNOTSUPP)
-		return error;
+	if (*error == BLK_STS_NOTSUPP)
+		return DM_ENDIO_DONE;
 
 	memset(major_minor, 0, sizeof(major_minor));
 	sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
 				schedule_work(&sc->trigger_event);
 		}
 
-	return error;
+	return DM_ENDIO_DONE;
 }
 
 static int stripe_iterate_devices(struct dm_target *ti,

diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b75..c0d7e60 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c

@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
 
 static int io_err_map(struct dm_target *tt, struct bio *bio)
 {
-	return -EIO;
+	return DM_MAPIO_KILL;
 }
 
 static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 17ad50d..9dec2f8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c

@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
 	 * Even if r is set, there could be sub discards in flight that we
 	 * need to wait for.
 	 */
-	if (r && !op->parent_bio->bi_error)
-		op->parent_bio->bi_error = r;
+	if (r && !op->parent_bio->bi_status)
+		op->parent_bio->bi_status = errno_to_blk_status(r);
 	bio_endio(op->parent_bio);
 }
 
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
 }
 
 static void cell_error_with_code(struct pool *pool,
-				 struct dm_bio_prison_cell *cell, int error_code)
+		struct dm_bio_prison_cell *cell, blk_status_t error_code)
 {
 	dm_cell_error(pool->prison, cell, error_code);
 	dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
 {
-	return pool->out_of_data_space ? -ENOSPC : -EIO;
+	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
 }
 
 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	int error = get_pool_io_error_code(pool);
-
-	cell_error_with_code(pool, cell, error);
+	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
 }
 
 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
 
 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-	cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
 }
 
 /*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
 	bio_list_init(master);
 }
 
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(bios))) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	}
 }
 
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+		blk_status_t error)
 {
 	struct bio_list bios;
 	unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
 	__merge_bio_list(&bios, &tc->retry_on_resume_list);
 	spin_unlock_irqrestore(&tc->lock, flags);
 
-	error_bio_list(&bios, DM_ENDIO_REQUEUE);
+	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
 	requeue_deferred_cells(tc);
 }
 
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
 {
 	struct thin_c *tc;
 
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
 
 static void error_retry_list(struct pool *pool)
 {
-	int error = get_pool_io_error_code(pool);
-
-	error_retry_list_with_code(pool, error);
+	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
 }
 
 /*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
 	 */
 	atomic_t prepare_actions;
 
-	int err;
+	blk_status_t status;
 	struct thin_c *tc;
 	dm_block_t virt_begin, virt_end;
 	dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	struct dm_thin_new_mapping *m = context;
 
-	m->err = read_err || write_err ? -EIO : 0;
+	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
 	complete_mapping_preparation(m);
 }
 
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
 
 	bio->bi_end_io = m->saved_bi_end_io;
 
-	m->err = bio->bi_error;
+	m->status = bio->bi_status;
 	complete_mapping_preparation(m);
 }
 
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 	struct bio *bio = m->bio;
 	int r;
 
-	if (m->err) {
+	if (m->status) {
 		cell_error(pool, m->cell);
 		goto out;
 	}
@@ -1094,6 +1091,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 		return;
 	}
 
+	/*
+	 * Increment the unmapped blocks.  This prevents a race between the
+	 * passdown io and reallocation of freed blocks.
+	 */
+	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+	if (r) {
+		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+		bio_io_error(m->bio);
+		cell_defer_no_holder(tc, m->cell);
+		mempool_free(m, pool->mapping_pool);
+		return;
+	}
+
 	discard_parent = bio_alloc(GFP_NOIO, 1);
 	if (!discard_parent) {
 		DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1124,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 			end_discard(&op, r);
 		}
 	}
-
-	/*
-	 * Increment the unmapped blocks.  This prevents a race between the
-	 * passdown io and reallocation of freed blocks.
-	 */
-	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
-	if (r) {
-		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
-		bio_io_error(m->bio);
-		cell_defer_no_holder(tc, m->cell);
-		mempool_free(m, pool->mapping_pool);
-		return;
-	}
 }
 
 static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
 	spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
 {
 	enum pool_mode m = get_pool_mode(pool);
 
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
 	case PM_WRITE:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	case PM_OUT_OF_DATA_SPACE:
-		return pool->pf.error_if_no_space ? -ENOSPC : 0;
+		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
 
 	case PM_READ_ONLY:
 	case PM_FAIL:
-		return -EIO;
+		return BLK_STS_IOERR;
 	default:
 		/* Shouldn't get here */
 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-	int error = should_error_unserviceable_bio(pool);
+	blk_status_t error = should_error_unserviceable_bio(pool);
 
 	if (error) {
-		bio->bi_error = error;
+		bio->bi_status = error;
 		bio_endio(bio);
 	} else
 		retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
 {
 	struct bio *bio;
 	struct bio_list bios;
-	int error;
+	blk_status_t error;
 
 	error = should_error_unserviceable_bio(pool);
 	if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 	unsigned count = 0;
 
 	if (tc->requeue_mode) {
-		error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+		error_thin_bio_list(tc, &tc->deferred_bio_list,
+				BLK_STS_DM_REQUEUE);
 		return;
 	}
 
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
 		notify_of_pool_mode_change_to_oods(pool);
-		error_retry_list_with_code(pool, -ENOSPC);
+		error_retry_list_with_code(pool, BLK_STS_NOSPC);
 	}
 }
 
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 	thin_hook_bio(tc, bio);
 
 	if (tc->requeue_mode) {
-		bio->bi_error = DM_ENDIO_REQUEUE;
+		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
 	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+		blk_status_t *err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 	if (h->cell)
 		cell_defer_no_holder(h->tc, h->cell);
 
-	return 0;
+	return DM_ENDIO_DONE;
 }
 
 static void thin_presuspend(struct dm_target *ti)

diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 97de961..b46705e 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c

@@ -166,7 +166,7 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
 		return r;
 	}
 
-	if (likely(v->version >= 1))
+	if (likely(v->salt_size && (v->version >= 1)))
 		r = verity_hash_update(v, req, v->salt, v->salt_size, res);
 
 	return r;
@@ -177,7 +177,7 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
 {
 	int r;
 
-	if (unlikely(!v->version)) {
+	if (unlikely(v->salt_size && (!v->version))) {
 		r = verity_hash_update(v, req, v->salt, v->salt_size, res);
 
 		if (r < 0) {
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
 /*
  * End one "io" structure with a given error.
  */
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
 {
 	struct dm_verity *v = io->v;
 	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
 	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_error = error;
+	bio->bi_status = status;
 
 	verity_fec_finish_io(io);
 
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
 {
 	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
 
-	verity_finish_io(io, verity_verify_io(io));
+	verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
 static void verity_end_io(struct bio *bio)
 {
 	struct dm_verity_io *io = bio->bi_private;
 
-	if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
-		verity_finish_io(io, bio->bi_error);
+	if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+		verity_finish_io(io, bio->bi_status);
 		return;
 	}
 
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
 	    ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
 		DMERR_LIMIT("unaligned io");
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (bio_end_sector(bio) >>
 	    (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
 		DMERR_LIMIT("io out of range");
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	if (bio_data_dir(bio) == WRITE)
-		return -EIO;
+		return DM_MAPIO_KILL;
 
 	io = dm_per_bio_data(bio, ti->per_io_data_size);
 	io->v = v;

diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11..b65ca8d 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c

@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 	case REQ_OP_READ:
 		if (bio->bi_opf & REQ_RAHEAD) {
 			/* readahead of null bytes only wastes buffer cache */
-			return -EIO;
+			return DM_MAPIO_KILL;
 		}
 		zero_fill_bio(bio);
 		break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 		/* writes get silently dropped */
 		break;
 	default:
-		return -EIO;
+		return DM_MAPIO_KILL;
 	}
 
 	bio_endio(bio);

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6ef9500..4029460 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
  */
 struct dm_io {
 	struct mapped_device *md;
-	int error;
+	blk_status_t status;
 	atomic_t io_count;
 	struct bio *bio;
 	unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
 {
 	unsigned long flags;
-	int io_error;
+	blk_status_t io_error;
 	struct bio *bio;
 	struct mapped_device *md = io->md;
 
 	/* Push-back supersedes any I/O errors */
 	if (unlikely(error)) {
 		spin_lock_irqsave(&io->endio_lock, flags);
-		if (!(io->error > 0 && __noflush_suspending(md)))
-			io->error = error;
+		if (!(io->status == BLK_STS_DM_REQUEUE &&
+				__noflush_suspending(md)))
+			io->status = error;
 		spin_unlock_irqrestore(&io->endio_lock, flags);
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->error == DM_ENDIO_REQUEUE) {
+		if (io->status == BLK_STS_DM_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
 			 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
 				bio_list_add_head(&md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
-				io->error = -EIO;
+				io->status = BLK_STS_IOERR;
 			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
-		io_error = io->error;
+		io_error = io->status;
 		bio = io->bio;
 		end_io_acct(io);
 		free_io(md, io);
 
-		if (io_error == DM_ENDIO_REQUEUE)
+		if (io_error == BLK_STS_DM_REQUEUE)
 			return;
 
 		if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			bio->bi_error = io_error;
+			bio->bi_status = io_error;
 			bio_endio(bio);
 		}
 	}
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
 
 static void clone_endio(struct bio *bio)
 {
-	int error = bio->bi_error;
-	int r = error;
+	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
 
-	if (endio) {
-		r = endio(tio->ti, bio, error);
-		if (r < 0 || r == DM_ENDIO_REQUEUE)
-			/*
-			 * error and requeue request are handled
-			 * in dec_pending().
-			 */
-			error = r;
-		else if (r == DM_ENDIO_INCOMPLETE)
-			/* The target will handle the io */
-			return;
-		else if (r) {
-			DMWARN("unimplemented target endio return value: %d", r);
-			BUG();
-		}
-	}
-
-	if (unlikely(r == -EREMOTEIO)) {
+	if (unlikely(error == BLK_STS_TARGET)) {
 		if (bio_op(bio) == REQ_OP_WRITE_SAME &&
 		    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
 			disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
 			disable_write_zeroes(md);
 	}
 
+	if (endio) {
+		int r = endio(tio->ti, bio, &error);
+		switch (r) {
+		case DM_ENDIO_REQUEUE:
+			error = BLK_STS_DM_REQUEUE;
+			/*FALLTHRU*/
+		case DM_ENDIO_DONE:
+			break;
+		case DM_ENDIO_INCOMPLETE:
+			/* The target will handle the io */
+			return;
+		default:
+			DMWARN("unimplemented target endio return value: %d", r);
+			BUG();
+		}
+	}
+
 	free_tio(tio);
 	dec_pending(io, error);
 }
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
 		while ((bio = bio_list_pop(&list))) {
 			struct bio_set *bs = bio->bi_pool;
-			if (unlikely(!bs) || bs == fs_bio_set) {
+			if (unlikely(!bs) || bs == fs_bio_set ||
+			    !bs->rescue_workqueue) {
 				bio_list_add(&current->bio_list[i], bio);
 				continue;
 			}
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
 	r = ti->type->map(ti, clone);
 	dm_offload_end(&o);
 
-	if (r == DM_MAPIO_REMAPPED) {
+	switch (r) {
+	case DM_MAPIO_SUBMITTED:
+		break;
+	case DM_MAPIO_REMAPPED:
 		/* the bio has been remapped so dispatch it */
-
 		trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
 				      tio->io->bio->bi_bdev->bd_dev, sector);
-
 		generic_make_request(clone);
-	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
-		/* error the io and bail out, or requeue it if needed */
-		dec_pending(tio->io, r);
+		break;
+	case DM_MAPIO_KILL:
+		dec_pending(tio->io, BLK_STS_IOERR);
 		free_tio(tio);
-	} else if (r != DM_MAPIO_SUBMITTED) {
+		break;
+	case DM_MAPIO_REQUEUE:
+		dec_pending(tio->io, BLK_STS_DM_REQUEUE);
+		free_tio(tio);
+		break;
+	default:
 		DMWARN("unimplemented target map return value: %d", r);
 		BUG();
 	}
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
 	ci.map = map;
 	ci.md = md;
 	ci.io = alloc_io(md);
-	ci.io->error = 0;
+	ci.io->status = 0;
 	atomic_set(&ci.io->io_count, 1);
 	ci.io->bio = bio;
 	ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
 	 * Initialize aspects of queue that aren't relevant for blk-mq
 	 */
 	md->queue->backing_dev_info->congested_fn = dm_any_congested;
-	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 }
 
 static void cleanup_mapped_device(struct mapped_device *md)
@@ -1657,7 +1663,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	bio_init(&md->flush_bio, NULL, 0);
 	md->flush_bio.bi_bdev = md->bdev;
-	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+	md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
 
 	dm_stats_init(&md->stats);
 
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 		BUG();
 	}
 
-	pools->bs = bioset_create_nobvec(pool_size, front_pad);
+	pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
 	if (!pools->bs)
 		goto out;
 

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 7299ce2..03082e1 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c

@@ -1311,8 +1311,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
 	cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
 	lock_comm(cinfo, 1);
 	ret = __sendmsg(cinfo, &cmsg);
-	if (ret)
+	if (ret) {
+		unlock_comm(cinfo);
 		return ret;
+	}
 	cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
 	ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
 	cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 10367ff..31bcbfb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	unsigned int sectors;
 	int cpu;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	}
 	if (mddev->ro == 1 && unlikely(rw == WRITE)) {
 		if (bio_sectors(bio) != 0)
-			bio->bi_error = -EROFS;
+			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
 		return BLK_QC_T_NONE;
 	}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
 	struct md_rdev *rdev = bio->bi_private;
 	struct mddev *mddev = rdev->mddev;
 
-	if (bio->bi_error) {
-		pr_err("md: super_written gets error=%d\n", bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("md: super_written gets error=%d\n", bio->bi_status);
 		md_error(mddev, rdev);
 		if (!test_bit(Faulty, &rdev->flags)
 		    && (bio->bi_opf & MD_FAILFAST)) {
@@ -765,7 +765,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
 	    test_bit(FailFast, &rdev->flags) &&
 	    !test_bit(LastDev, &rdev->flags))
 		ff = MD_FAILFAST;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
 
 	atomic_inc(&mddev->pending_writes);
 	submit_bio(bio);
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 
 	submit_bio_wait(bio);
 
-	ret = !bio->bi_error;
+	ret = !bio->bi_status;
 	bio_put(bio);
 	return ret;
 }
@@ -825,7 +825,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
 	return -EINVAL;
 }
 
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
 	return	sb1->set_uuid0 == sb2->set_uuid0 &&
 		sb1->set_uuid1 == sb2->set_uuid1 &&
@@ -833,7 +833,7 @@ static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 		sb1->set_uuid3 == sb2->set_uuid3;
 }
 
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
 	int ret;
 	mdp_super_t *tmp1, *tmp2;
@@ -1025,12 +1025,12 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
 	} else {
 		__u64 ev1, ev2;
 		mdp_super_t *refsb = page_address(refdev->sb_page);
-		if (!uuid_equal(refsb, sb)) {
+		if (!md_uuid_equal(refsb, sb)) {
 			pr_warn("md: %s has different UUID to %s\n",
 				b, bdevname(refdev->bdev,b2));
 			goto abort;
 		}
-		if (!sb_equal(refsb, sb)) {
+		if (!md_sb_equal(refsb, sb)) {
 			pr_warn("md: %s has same UUID but different superblock to %s\n",
 				b, bdevname(refdev->bdev, b2));
 			goto abort;
@@ -5174,6 +5174,18 @@ static void mddev_delayed_delete(struct work_struct *ws)
 
 static void no_op(struct percpu_ref *r) {}
 
+int mddev_init_writes_pending(struct mddev *mddev)
+{
+	if (mddev->writes_pending.percpu_count_ptr)
+		return 0;
+	if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
+		return -ENOMEM;
+	/* We want to start with the refcount at zero */
+	percpu_ref_put(&mddev->writes_pending);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
+
 static int md_alloc(dev_t dev, char *name)
 {
 	/*
@@ -5239,10 +5251,6 @@ static int md_alloc(dev_t dev, char *name)
 	blk_queue_make_request(mddev->queue, md_make_request);
 	blk_set_stacking_limits(&mddev->queue->limits);
 
-	if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
-		goto abort;
-	/* We want to start with the refcount at zero */
-	percpu_ref_put(&mddev->writes_pending);
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
 		blk_cleanup_queue(mddev->queue);
@@ -5420,7 +5428,7 @@ int md_run(struct mddev *mddev)
 	}
 
 	if (mddev->bio_set == NULL) {
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 		if (!mddev->bio_set)
 			return -ENOMEM;
 	}

diff --git a/drivers/md/md.h b/drivers/md/md.h
index 11f1514..0fa1de4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h

@@ -648,6 +648,7 @@ extern void md_unregister_thread(struct md_thread **threadp);
 extern void md_wakeup_thread(struct md_thread *thread);
 extern void md_check_recovery(struct mddev *mddev);
 extern void md_reap_sync_thread(struct mddev *mddev);
+extern int mddev_init_writes_pending(struct mddev *mddev);
 extern void md_write_start(struct mddev *mddev, struct bio *bi);
 extern void md_write_inc(struct mddev *mddev, struct bio *bi);
 extern void md_write_end(struct mddev *mddev);

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521..68d036e 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c

@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 {
 	struct bio *bio = mp_bh->master_bio;
 	struct mpconf *conf = mp_bh->mddev->private;
 
-	bio->bi_error = err;
+	bio->bi_status = status;
 	bio_endio(bio);
 	mempool_free(mp_bh, conf->pool);
 }
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
 	struct mpconf *conf = mp_bh->mddev->private;
 	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		multipath_end_bh_io(mp_bh, 0);
 	else if (!(bio->bi_opf & REQ_RAHEAD)) {
 		/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
 			(unsigned long long)bio->bi_iter.bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	} else
-		multipath_end_bh_io(mp_bh, bio->bi_error);
+		multipath_end_bh_io(mp_bh, bio->bi_status);
 	rdev_dec_pending(rdev, conf->mddev);
 }
 
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
 			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
 			       bdevname(bio->bi_bdev,b),
 			       (unsigned long long)bio->bi_iter.bi_sector);
-			multipath_end_bh_io(mp_bh, -EIO);
+			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
 		} else {
 			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
 			       bdevname(bio->bi_bdev,b),

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index af5056d..98ca2c1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
 	struct r1conf *conf = r1_bio->mddev->private;
 
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
 
 static void raid1_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = bio->bi_private;
 	struct r1conf *conf = r1_bio->mddev->private;
 	struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
 	struct md_rdev *rdev = conf->mirrors[mirror].rdev;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	/*
 	 * 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		set_bit(WriteErrorSeen,	&rdev->flags);
 		if (!test_and_set_bit(WantReplacement, &rdev->flags))
 			set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
 	if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
 
 static void end_sync_write(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r1bio *r1_bio = get_resync_r1bio(bio);
 	struct mddev *mddev = r1_bio->mddev;
 	struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 		idx ++;
 	}
 	set_bit(R1BIO_Uptodate, &r1_bio->state);
-	bio->bi_error = 0;
+	bio->bi_status = 0;
 	return 1;
 }
 
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		int j;
 		int size;
-		int error;
+		blk_status_t status;
 		struct bio_vec *bi;
 		struct bio *b = r1_bio->bios[i];
 		struct resync_pages *rp = get_resync_pages(b);
 		if (b->bi_end_io != end_sync_read)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
-		error = b->bi_error;
+		status = b->bi_status;
 		bio_reset(b);
-		b->bi_error = error;
+		b->bi_status = status;
 		b->bi_vcnt = vcnt;
 		b->bi_iter.bi_size = r1_bio->sectors << 9;
 		b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
 	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    !r1_bio->bios[primary]->bi_error) {
+		    !r1_bio->bios[primary]->bi_status) {
 			r1_bio->bios[primary]->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
 			break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
 		int j;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
-		int error = sbio->bi_error;
+		blk_status_t status = sbio->bi_status;
 		struct page **ppages = get_resync_pages(pbio)->pages;
 		struct page **spages = get_resync_pages(sbio)->pages;
 		struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
-		sbio->bi_error = 0;
+		sbio->bi_status = 0;
 
 		bio_for_each_segment_all(bi, sbio, j)
 			page_len[j] = bi->bv_len;
 
-		if (!error) {
+		if (!status) {
 			for (j = vcnt; j-- ; ) {
 				if (memcmp(page_address(ppages[j]),
 					   page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
 		if (j >= 0)
 			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
 		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && !error)) {
+			      && !status)) {
 			/* No need to write to this device. */
 			sbio->bi_end_io = NULL;
 			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
 		struct bio *bio = r1_bio->bios[m];
 		if (bio->bi_end_io == NULL)
 			continue;
-		if (!bio->bi_error &&
+		if (!bio->bi_status &&
 		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
 			rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
 		}
-		if (bio->bi_error &&
+		if (bio->bi_status &&
 		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
 			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
 				md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	if (!conf->r1bio_pool)
 		goto abort;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 
@@ -3063,6 +3063,8 @@ static int raid1_run(struct mddev *mddev)
 			mdname(mddev));
 		return -EIO;
 	}
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
 	/*
 	 * copy the already verified devices into our private RAID1
 	 * bookkeeping area. [whatever we allocate in run(),

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4343d7f..57a250f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 
 	bio_endio(bio);
 	/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 
 static void raid10_end_read_request(struct bio *bio)
 {
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct r10bio *r10_bio = bio->bi_private;
 	int slot, dev;
 	struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
 	struct bio *to_put = NULL;
 	bool discard_error;
 
-	discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+	discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
 	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (bio->bi_error && !discard_error) {
+	if (bio->bi_status && !discard_error) {
 		if (repl)
 			/* Never record new bad blocks to replacement,
 			 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
 			bio->bi_next = NULL;
 			bio->bi_bdev = rdev->bdev;
 			if (test_bit(Faulty, &rdev->flags)) {
-				bio->bi_error = -EIO;
+				bio->bi_status = BLK_STS_IOERR;
 				bio_endio(bio);
 			} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 					    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 		bio->bi_next = NULL;
 		bio->bi_bdev = rdev->bdev;
 		if (test_bit(Faulty, &rdev->flags)) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
 				    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
 {
 	struct r10conf *conf = r10_bio->mddev->private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
 	else
 		/* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
 	else
 		rdev = conf->mirrors[d].rdev;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (repl)
 			md_error(mddev, rdev);
 		else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
 	/* find the first device with a block */
 	for (i=0; i<conf->copies; i++)
-		if (!r10_bio->devs[i].bio->bi_error)
+		if (!r10_bio->devs[i].bio->bi_status)
 			break;
 
 	if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tpages = get_resync_pages(tbio)->pages;
 		d = r10_bio->devs[i].devnum;
 		rdev = conf->mirrors[d].rdev;
-		if (!r10_bio->devs[i].bio->bi_error) {
+		if (!r10_bio->devs[i].bio->bi_status) {
 			/* We know that the bi_io_vec layout is the same for
 			 * both 'first' and 'i', so we just compare them.
 			 * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			rdev = conf->mirrors[dev].rdev;
 			if (r10_bio->devs[m].bio == NULL)
 				continue;
-			if (!r10_bio->devs[m].bio->bi_error) {
+			if (!r10_bio->devs[m].bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (r10_bio->devs[m].repl_bio == NULL)
 				continue;
 
-			if (!r10_bio->devs[m].repl_bio->bi_error) {
+			if (!r10_bio->devs[m].repl_bio->bi_status) {
 				rdev_clear_badblocks(
 					rdev,
 					r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 					r10_bio->devs[m].addr,
 					r10_bio->sectors, 0);
 				rdev_dec_pending(rdev, conf->mddev);
-			} else if (bio != NULL && bio->bi_error) {
+			} else if (bio != NULL && bio->bi_status) {
 				fail = true;
 				if (!narrow_write_error(r10_bio, m)) {
 					md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 				r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
 			bio = r10_bio->devs[i].bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			rcu_read_lock();
 			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 			/* Need to set up for writing to the replacement */
 			bio = r10_bio->devs[i].repl_bio;
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 
 			sector = r10_bio->devs[i].addr;
 			bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
 		if (bio->bi_end_io == end_sync_read) {
 			md_sync_acct(bio->bi_bdev, nr_sectors);
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 			generic_make_request(bio);
 		}
 	}
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	if (!conf->r10bio_pool)
 		goto out;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto out;
 
@@ -3611,6 +3611,9 @@ static int raid10_run(struct mddev *mddev)
 	int first = 1;
 	bool discard_supported = false;
 
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
+
 	if (mddev->private == NULL) {
 		conf = setup_conf(mddev);
 		if (IS_ERR(conf))
@@ -4394,7 +4397,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	read_bio->bi_end_io = end_reshape_read;
 	bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
 	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-	read_bio->bi_error = 0;
+	read_bio->bi_status = 0;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_iter.bi_size = 0;
 	r10_bio->master_bio = read_bio;
@@ -4638,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
 		rdev = conf->mirrors[d].rdev;
 	}
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		/* FIXME should record badblock */
 		md_error(mddev, rdev);
 	}

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 4c00bc2..bfa1e90 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c

@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
 	struct r5l_log *log = io->log;
 	unsigned long flags;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
 	unsigned long flags;
 	struct r5l_io_unit *io;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(log->rdev->mddev, log->rdev);
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
@@ -1782,7 +1782,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
 	mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
 					     mb, PAGE_SIZE));
 	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
-			  REQ_FUA, false)) {
+			  REQ_SYNC | REQ_FUA, false)) {
 		__free_page(page);
 		return -EIO;
 	}
@@ -2388,7 +2388,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
 						     mb, PAGE_SIZE));
 		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
-			     REQ_OP_WRITE, REQ_FUA, false);
+			     REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false);
 		sh->log_start = ctx->pos;
 		list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
 		atomic_inc(&log->stripe_in_journal_count);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	if (!log->io_pool)
 		goto io_pool;
 
-	log->bs = bioset_create(R5L_POOL_SIZE, 0);
+	log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!log->bs)
 		goto io_bs;
 

diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 5d25beb..77cce35 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c

@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
 
 	pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		md_error(ppl_conf->mddev, log->rdev);
 
 	list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -907,8 +907,8 @@ static int ppl_write_empty_header(struct ppl_log *log)
 	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
 
 	if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
-			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_FUA, 0,
-			  false)) {
+			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
+			  REQ_FUA, 0, false)) {
 		md_error(rdev->mddev, rdev);
 		ret = -EIO;
 	}
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
 		goto err;
 	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+	ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
 	if (!ppl_conf->bs) {
 		ret = -ENOMEM;
 		goto err;

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9c4f765..62c965b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
 
 	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
 		s = sh->sector + rdev->new_data_offset;
 	else
 		s = sh->sector + rdev->data_offset;
-	if (!bi->bi_error) {
+	if (!bi->bi_status) {
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
 			/* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		bi->bi_error);
+		bi->bi_status);
 	if (i == disks) {
 		bio_reset(bi);
 		BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 
 	if (replacement) {
-		if (bi->bi_error)
+		if (bi->bi_status)
 			md_error(conf->mddev, rdev);
 		else if (is_badblock(rdev, sh->sector,
 				     STRIPE_SECTORS,
 				     &first_bad, &bad_sectors))
 			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
 	} else {
-		if (bi->bi_error) {
+		if (bi->bi_status) {
 			set_bit(STRIPE_DEGRADED, &sh->state);
 			set_bit(WriteErrorSeen, &rdev->flags);
 			set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
 	}
 	rdev_dec_pending(rdev, conf->mddev);
 
-	if (sh->batch_head && bi->bi_error && !replacement)
+	if (sh->batch_head && bi->bi_status && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
 	bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			md_write_end(conf->mddev);
 			bio_endio(bi);
 			bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 
-				bi->bi_error = -EIO;
+				bi->bi_status = BLK_STS_IOERR;
 				bio_endio(bi);
 				bi = nextbi;
 			}
@@ -4085,10 +4085,15 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
 			set_bit(STRIPE_INSYNC, &sh->state);
 		else {
 			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
+				pr_warn_ratelimited("%s: mismatch sector in range "
+						    "%llu-%llu\n", mdname(conf->mddev),
+						    (unsigned long long) sh->sector,
+						    (unsigned long long) sh->sector +
+						    STRIPE_SECTORS);
+			} else {
 				sh->check_state = check_state_compute_run;
 				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
 				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
@@ -4237,10 +4242,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
 			}
 		} else {
 			atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
 				/* don't try to repair!! */
 				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
+				pr_warn_ratelimited("%s: mismatch sector in range "
+						    "%llu-%llu\n", mdname(conf->mddev),
+						    (unsigned long long) sh->sector,
+						    (unsigned long long) sh->sector +
+						    STRIPE_SECTORS);
+			} else {
 				int *target = &sh->ops.target;
 
 				sh->ops.target = -1;
@@ -5144,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
-	int error = bi->bi_error;
+	blk_status_t error = bi->bi_status;
 
 	bio_put(bi);
 
@@ -5721,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
 			release_stripe_plug(mddev, sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
-			bi->bi_error = -EIO;
+			bi->bi_status = BLK_STS_IOERR;
 			break;
 		}
 	}
@@ -6933,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 			goto abort;
 	}
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
 	if (!conf->bio_split)
 		goto abort;
 	conf->mddev = mddev;
@@ -7108,6 +7118,9 @@ static int raid5_run(struct mddev *mddev)
 	long long min_offset_diff = 0;
 	int first = 1;
 
+	if (mddev_init_writes_pending(mddev) < 0)
+		return -ENOMEM;
+
 	if (mddev->recovery_cp != MaxSector)
 		pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
 			  mdname(mddev));

diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index b72edd2..55d9c2b 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig

@@ -2,6 +2,12 @@
 # Multimedia device configuration
 #
 
+config CEC_CORE
+	tristate
+
+config CEC_NOTIFIER
+	bool
+
 menuconfig MEDIA_SUPPORT
 	tristate "Multimedia support"
 	depends on HAS_IOMEM

diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index 523fea3..044503a 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile

@@ -4,8 +4,6 @@
 
 media-objs	:= media-device.o media-devnode.o media-entity.o
 
-obj-$(CONFIG_CEC_CORE) += cec/
-
 #
 # I2C drivers should come before other drivers, otherwise they'll fail
 # when compiled as builtin drivers
@@ -26,6 +24,8 @@
 # There are both core and drivers at RC subtree - merge before drivers
 obj-y += rc/
 
+obj-$(CONFIG_CEC_CORE) += cec/
+
 #
 # Finally, merge the drivers that require the core
 #

diff --git a/drivers/media/cec/Kconfig b/drivers/media/cec/Kconfig
index f944d93..43428cec 100644
--- a/drivers/media/cec/Kconfig
+++ b/drivers/media/cec/Kconfig

@@ -1,19 +1,6 @@
-config CEC_CORE
-	tristate
-	depends on MEDIA_CEC_SUPPORT
-	default y
-
-config MEDIA_CEC_NOTIFIER
-	bool
-
 config MEDIA_CEC_RC
 	bool "HDMI CEC RC integration"
 	depends on CEC_CORE && RC_CORE
+	depends on CEC_CORE=m || RC_CORE=y
 	---help---
 	  Pass on CEC remote control messages to the RC framework.
-
-config MEDIA_CEC_DEBUG
-	bool "HDMI CEC debugfs interface"
-	depends on CEC_CORE && DEBUG_FS
-	---help---
-	  Turns on the DebugFS interface for CEC devices.

diff --git a/drivers/media/cec/Makefile b/drivers/media/cec/Makefile
index 402a6c6..eaf408e 100644
--- a/drivers/media/cec/Makefile
+++ b/drivers/media/cec/Makefile

@@ -1,6 +1,6 @@
 cec-objs := cec-core.o cec-adap.o cec-api.o cec-edid.o
 
-ifeq ($(CONFIG_MEDIA_CEC_NOTIFIER),y)
+ifeq ($(CONFIG_CEC_NOTIFIER),y)
   cec-objs += cec-notifier.o
 endif
 

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index f5fe01c..9dfc798 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c

@@ -1864,7 +1864,7 @@ void cec_monitor_all_cnt_dec(struct cec_adapter *adap)
 		WARN_ON(call_op(adap, adap_monitor_all_enable, 0));
 }
 
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 /*
  * Log the current state of the CEC adapter.
  * Very useful for debugging.

diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c
index 0860fb4..999926f 100644
--- a/drivers/media/cec/cec-api.c
+++ b/drivers/media/cec/cec-api.c

@@ -271,16 +271,10 @@ static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
 			bool block, struct cec_msg __user *parg)
 {
 	struct cec_msg msg = {};
-	long err = 0;
+	long err;
 
 	if (copy_from_user(&msg, parg, sizeof(msg)))
 		return -EFAULT;
-	mutex_lock(&adap->lock);
-	if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
-		err = -ENONET;
-	mutex_unlock(&adap->lock);
-	if (err)
-		return err;
 
 	err = cec_receive_msg(fh, &msg, block);
 	if (err)

diff --git a/drivers/media/cec/cec-core.c b/drivers/media/cec/cec-core.c
index f9ebff9..2f87748 100644
--- a/drivers/media/cec/cec-core.c
+++ b/drivers/media/cec/cec-core.c

@@ -187,7 +187,7 @@ static void cec_devnode_unregister(struct cec_devnode *devnode)
 	put_device(&devnode->dev);
 }
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 static void cec_cec_notify(struct cec_adapter *adap, u16 pa)
 {
 	cec_s_phys_addr(adap, pa, false);
@@ -323,7 +323,7 @@ int cec_register_adapter(struct cec_adapter *adap,
 	}
 
 	dev_set_drvdata(&adap->devnode.dev, adap);
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 	if (!top_cec_dir)
 		return 0;
 
@@ -355,7 +355,7 @@ void cec_unregister_adapter(struct cec_adapter *adap)
 	adap->rc = NULL;
 #endif
 	debugfs_remove_recursive(adap->cec_dir);
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 	if (adap->notifier)
 		cec_notifier_unregister(adap->notifier);
 #endif
@@ -395,7 +395,7 @@ static int __init cec_devnode_init(void)
 		return ret;
 	}
 
-#ifdef CONFIG_MEDIA_CEC_DEBUG
+#ifdef CONFIG_DEBUG_FS
 	top_cec_dir = debugfs_create_dir("cec", NULL);
 	if (IS_ERR_OR_NULL(top_cec_dir)) {
 		pr_warn("cec: Failed to create debugfs cec dir\n");

diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index fd181c9..aaa9471 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig

@@ -220,7 +220,8 @@
 
 config VIDEO_ADV7604_CEC
 	bool "Enable Analog Devices ADV7604 CEC support"
-	depends on VIDEO_ADV7604 && CEC_CORE
+	depends on VIDEO_ADV7604
+	select CEC_CORE
 	---help---
 	  When selected the adv7604 will support the optional
 	  HDMI CEC feature.
@@ -240,7 +241,8 @@
 
 config VIDEO_ADV7842_CEC
 	bool "Enable Analog Devices ADV7842 CEC support"
-	depends on VIDEO_ADV7842 && CEC_CORE
+	depends on VIDEO_ADV7842
+	select CEC_CORE
 	---help---
 	  When selected the adv7842 will support the optional
 	  HDMI CEC feature.
@@ -478,7 +480,8 @@
 
 config VIDEO_ADV7511_CEC
 	bool "Enable Analog Devices ADV7511 CEC support"
-	depends on VIDEO_ADV7511 && CEC_CORE
+	depends on VIDEO_ADV7511
+	select CEC_CORE
 	---help---
 	  When selected the adv7511 will support the optional
 	  HDMI CEC feature.

diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index acef4ec..3251cba 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c

@@ -223,7 +223,7 @@ static void i2c_wr8(struct v4l2_subdev *sd, u16 reg, u8 val)
 static void i2c_wr8_and_or(struct v4l2_subdev *sd, u16 reg,
 		u8 mask, u8 val)
 {
-	i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 2) & mask) | val, 2);
+	i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 1) & mask) | val, 1);
 }
 
 static u16 i2c_rd16(struct v4l2_subdev *sd, u16 reg)

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index ac026ee..041cb80 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig

@@ -501,8 +501,9 @@
 
 config VIDEO_SAMSUNG_S5P_CEC
        tristate "Samsung S5P CEC driver"
-       depends on CEC_CORE && (PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST)
-       select MEDIA_CEC_NOTIFIER
+       depends on PLAT_S5P || ARCH_EXYNOS || COMPILE_TEST
+       select CEC_CORE
+       select CEC_NOTIFIER
        ---help---
          This is a driver for Samsung S5P HDMI CEC interface. It uses the
          generic CEC framework interface.
@@ -511,8 +512,9 @@
 
 config VIDEO_STI_HDMI_CEC
        tristate "STMicroelectronics STiH4xx HDMI CEC driver"
-       depends on CEC_CORE && (ARCH_STI || COMPILE_TEST)
-       select MEDIA_CEC_NOTIFIER
+       depends on ARCH_STI || COMPILE_TEST
+       select CEC_CORE
+       select CEC_NOTIFIER
        ---help---
          This is a driver for STIH4xx HDMI CEC interface. It uses the
          generic CEC framework interface.

diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
index 57a842f..b7731b1 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c

@@ -493,10 +493,10 @@ static int vdec_h264_get_param(unsigned long h_vdec,
 }
 
 static struct vdec_common_if vdec_h264_if = {
-	vdec_h264_init,
-	vdec_h264_decode,
-	vdec_h264_get_param,
-	vdec_h264_deinit,
+	.init		= vdec_h264_init,
+	.decode		= vdec_h264_decode,
+	.get_param	= vdec_h264_get_param,
+	.deinit		= vdec_h264_deinit,
 };
 
 struct vdec_common_if *get_h264_dec_comm_if(void);

diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
index 6e7a62a..b9fad6a 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c

@@ -620,10 +620,10 @@ static void vdec_vp8_deinit(unsigned long h_vdec)
 }
 
 static struct vdec_common_if vdec_vp8_if = {
-	vdec_vp8_init,
-	vdec_vp8_decode,
-	vdec_vp8_get_param,
-	vdec_vp8_deinit,
+	.init		= vdec_vp8_init,
+	.decode		= vdec_vp8_decode,
+	.get_param	= vdec_vp8_get_param,
+	.deinit		= vdec_vp8_deinit,
 };
 
 struct vdec_common_if *get_vp8_dec_comm_if(void);

diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
index 5539b18..1daee12 100644
--- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c
+++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c

@@ -979,10 +979,10 @@ static int vdec_vp9_get_param(unsigned long h_vdec,
 }
 
 static struct vdec_common_if vdec_vp9_if = {
-	vdec_vp9_init,
-	vdec_vp9_decode,
-	vdec_vp9_get_param,
-	vdec_vp9_deinit,
+	.init		= vdec_vp9_init,
+	.decode		= vdec_vp9_decode,
+	.get_param	= vdec_vp9_get_param,
+	.deinit		= vdec_vp9_deinit,
 };
 
 struct vdec_common_if *get_vp9_dec_comm_if(void);

diff --git a/drivers/media/platform/vivid/Kconfig b/drivers/media/platform/vivid/Kconfig
index b36ac19..154de92 100644
--- a/drivers/media/platform/vivid/Kconfig
+++ b/drivers/media/platform/vivid/Kconfig

@@ -26,7 +26,8 @@
 
 config VIDEO_VIVID_CEC
 	bool "Enable CEC emulation support"
-	depends on VIDEO_VIVID && CEC_CORE
+	depends on VIDEO_VIVID
+	select CEC_CORE
 	---help---
 	  When selected the vivid module will emulate the optional
 	  HDMI CEC feature.

diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c
index 90f66dc7..a2fc1a1 100644
--- a/drivers/media/rc/rc-ir-raw.c
+++ b/drivers/media/rc/rc-ir-raw.c

@@ -211,7 +211,7 @@ EXPORT_SYMBOL_GPL(ir_raw_event_set_idle);
  */
 void ir_raw_event_handle(struct rc_dev *dev)
 {
-	if (!dev->raw)
+	if (!dev->raw || !dev->raw->thread)
 		return;
 
 	wake_up_process(dev->raw->thread);
@@ -490,6 +490,7 @@ int ir_raw_event_register(struct rc_dev *dev)
 {
 	int rc;
 	struct ir_raw_handler *handler;
+	struct task_struct *thread;
 
 	if (!dev)
 		return -EINVAL;
@@ -507,13 +508,15 @@ int ir_raw_event_register(struct rc_dev *dev)
 	 * because the event is coming from userspace
 	 */
 	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
-		dev->raw->thread = kthread_run(ir_raw_event_thread, dev->raw,
-					       "rc%u", dev->minor);
+		thread = kthread_run(ir_raw_event_thread, dev->raw, "rc%u",
+				     dev->minor);
 
-		if (IS_ERR(dev->raw->thread)) {
-			rc = PTR_ERR(dev->raw->thread);
+		if (IS_ERR(thread)) {
+			rc = PTR_ERR(thread);
 			goto out;
 		}
+
+		dev->raw->thread = thread;
 	}
 
 	mutex_lock(&ir_raw_handler_lock);

diff --git a/drivers/media/rc/sir_ir.c b/drivers/media/rc/sir_ir.c
index e12ec50..90a5f8f 100644
--- a/drivers/media/rc/sir_ir.c
+++ b/drivers/media/rc/sir_ir.c

@@ -183,9 +183,15 @@ static irqreturn_t sir_interrupt(int irq, void *dev_id)
 	static unsigned long delt;
 	unsigned long deltintr;
 	unsigned long flags;
+	int counter = 0;
 	int iir, lsr;
 
 	while ((iir = inb(io + UART_IIR) & UART_IIR_ID)) {
+		if (++counter > 256) {
+			dev_err(&sir_ir_dev->dev, "Trapped in interrupt");
+			break;
+		}
+
 		switch (iir & UART_IIR_ID) { /* FIXME toto treba preriedit */
 		case UART_IIR_MSI:
 			(void)inb(io + UART_MSR);

diff --git a/drivers/media/usb/pulse8-cec/Kconfig b/drivers/media/usb/pulse8-cec/Kconfig
index 8937f39..18ead44 100644
--- a/drivers/media/usb/pulse8-cec/Kconfig
+++ b/drivers/media/usb/pulse8-cec/Kconfig

@@ -1,6 +1,7 @@
 config USB_PULSE8_CEC
 	tristate "Pulse Eight HDMI CEC"
-	depends on USB_ACM && CEC_CORE
+	depends on USB_ACM
+	select CEC_CORE
 	select SERIO
 	select SERIO_SERPORT
 	---help---

diff --git a/drivers/media/usb/rainshadow-cec/Kconfig b/drivers/media/usb/rainshadow-cec/Kconfig
index 3eb8660..030ef01 100644
--- a/drivers/media/usb/rainshadow-cec/Kconfig
+++ b/drivers/media/usb/rainshadow-cec/Kconfig

@@ -1,6 +1,7 @@
 config USB_RAINSHADOW_CEC
 	tristate "RainShadow Tech HDMI CEC"
-	depends on USB_ACM && CEC_CORE
+	depends on USB_ACM
+	select CEC_CORE
 	select SERIO
 	select SERIO_SERPORT
 	---help---

diff --git a/drivers/media/usb/rainshadow-cec/rainshadow-cec.c b/drivers/media/usb/rainshadow-cec/rainshadow-cec.c
index 541ca54..4126552 100644
--- a/drivers/media/usb/rainshadow-cec/rainshadow-cec.c
+++ b/drivers/media/usb/rainshadow-cec/rainshadow-cec.c

@@ -119,7 +119,7 @@ static void rain_irq_work_handler(struct work_struct *work)
 
 	while (true) {
 		unsigned long flags;
-		bool exit_loop;
+		bool exit_loop = false;
 		char data;
 
 		spin_lock_irqsave(&rain->buf_lock, flags);
@@ -336,6 +336,7 @@ static int rain_connect(struct serio *serio, struct serio_driver *drv)
 	serio_set_drvdata(serio, rain);
 	INIT_WORK(&rain->work, rain_irq_work_handler);
 	mutex_init(&rain->write_lock);
+	spin_lock_init(&rain->buf_lock);
 
 	err = serio_open(serio, drv);
 	if (err)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 94afbbf9..c0175ea 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c

@@ -868,7 +868,7 @@ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
 
 void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
 {
-	if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+	if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
 		return NULL;
 
 	return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);

diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c
index 35910f9..99e644c 100644
--- a/drivers/memory/atmel-ebi.c
+++ b/drivers/memory/atmel-ebi.c

@@ -581,7 +581,7 @@ static int atmel_ebi_probe(struct platform_device *pdev)
 	return of_platform_populate(np, NULL, NULL, dev);
 }
 
-static int atmel_ebi_resume(struct device *dev)
+static __maybe_unused int atmel_ebi_resume(struct device *dev)
 {
 	struct atmel_ebi *ebi = dev_get_drvdata(dev);
 	struct atmel_ebi_dev *ebid;

diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c..22de7f5 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c

@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
 		spin_lock_irqsave(&msb->q_lock, flags);
 
 		if (len)
-			if (!__blk_end_request(msb->req, 0, len))
+			if (!__blk_end_request(msb->req, BLK_STS_OK, len))
 				msb->req = NULL;
 
 		if (error && msb->req) {
+			blk_status_t ret = errno_to_blk_status(error);
 			dbg_verbose("IO: ending one sector of the request with error");
-			if (!__blk_end_request(msb->req, error, msb->page_size))
+			if (!__blk_end_request(msb->req, ret, msb->page_size))
 				msb->req = NULL;
 		}
 
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
 		WARN_ON(!msb->io_queue_stopped);
 
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a2..8897962 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c

@@ -709,7 +709,8 @@ static int mspro_block_issue_req(struct memstick_dev *card, int chunk)
 					       msb->req_sg);
 
 		if (!msb->seg_count) {
-			chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+			chunk = __blk_end_request_cur(msb->block_req,
+					BLK_STS_RESOURCE);
 			continue;
 		}
 
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 		if (error && !t_len)
 			t_len = blk_rq_cur_bytes(msb->block_req);
 
-		chunk = __blk_end_request(msb->block_req, error, t_len);
+		chunk = __blk_end_request(msb->block_req,
+				errno_to_blk_status(error), t_len);
 
 		error = mspro_block_issue_req(card, chunk);
 
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = blk_fetch_request(q)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 
 		return;
 	}

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 75488e6..8d46e3a 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c

@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
 	int ret;
 
 	ret = regmap_read_poll_timeout(arizona->regmap,
-				       ARIZONA_INTERRUPT_RAW_STATUS_5, val,
-				       ((val & mask) == target),
+				       reg, val, ((val & mask) == target),
 				       ARIZONA_REG_POLL_DELAY_US,
 				       timeout_ms * 1000);
 	if (ret)

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 4472ce1..8c32040 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c

@@ -45,7 +45,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	mutex_init(&ctx->mapping_lock);
 	ctx->mapping = NULL;
 
-	if (cxl_is_psl8(afu)) {
+	if (cxl_is_power8()) {
 		spin_lock_init(&ctx->sste_lock);
 
 		/*
@@ -189,7 +189,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 		if (start + len > ctx->afu->adapter->ps_size)
 			return -EINVAL;
 
-		if (cxl_is_psl9(ctx->afu)) {
+		if (cxl_is_power9()) {
 			/*
 			 * Make sure there is a valid problem state
 			 * area space for this AFU.
@@ -324,7 +324,7 @@ static void reclaim_ctx(struct rcu_head *rcu)
 {
 	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
-	if (cxl_is_psl8(ctx->afu))
+	if (cxl_is_power8())
 		free_page((u64)ctx->sstp);
 	if (ctx->ff_page)
 		__free_page(ctx->ff_page);

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index c8568ea..a03f8e7 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h

@@ -357,6 +357,7 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
 #define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
 #define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
 
 /****** CXL_PSL_TFC_An ******************************************************/
 #define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
@@ -844,24 +845,15 @@ static inline bool cxl_is_power8(void)
 
 static inline bool cxl_is_power9(void)
 {
-	/* intermediate solution */
-	if (!cxl_is_power8() &&
-	   (cpu_has_feature(CPU_FTRS_POWER9) ||
-	    cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (pvr_version_is(PVR_POWER9))
 		return true;
 	return false;
 }
 
-static inline bool cxl_is_psl8(struct cxl_afu *afu)
+static inline bool cxl_is_power9_dd1(void)
 {
-	if (afu->adapter->caia_major == 1)
-		return true;
-	return false;
-}
-
-static inline bool cxl_is_psl9(struct cxl_afu *afu)
-{
-	if (afu->adapter->caia_major == 2)
+	if ((pvr_version_is(PVR_POWER9)) &&
+	    cpu_has_feature(CPU_FTR_POWER9_DD1))
 		return true;
 	return false;
 }

diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 5344448..c79e39b 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c

@@ -187,7 +187,7 @@ static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 
 static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DS))
+	if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 		return true;
 
 	return false;
@@ -195,15 +195,22 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 
 static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DM))
+	u64 crs; /* Translation Checkout Response Status */
+
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 		return true;
 
-	if ((cxl_is_psl9(ctx->afu)) &&
-	   ((dsisr & CXL_PSL9_DSISR_An_CO_MASK) &
-		(CXL_PSL9_DSISR_An_PF_SLR | CXL_PSL9_DSISR_An_PF_RGC |
-		 CXL_PSL9_DSISR_An_PF_RGP | CXL_PSL9_DSISR_An_PF_HRH |
-		 CXL_PSL9_DSISR_An_PF_STEG)))
-		return true;
+	if (cxl_is_power9()) {
+		crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
+		if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
+		    (crs == CXL_PSL9_DSISR_An_URTCH)) {
+			return true;
+		}
+	}
 
 	return false;
 }

diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 17b433f..0761271 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c

@@ -159,11 +159,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
 	/* Do this outside the status_mutex to avoid a circular dependency with
 	 * the locking in cxl_mmap_fault() */
-	if (copy_from_user(&work, uwork,
-			   sizeof(struct cxl_ioctl_start_work))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	if (copy_from_user(&work, uwork, sizeof(work)))
+		return -EFAULT;
 
 	mutex_lock(&ctx->status_mutex);
 	if (ctx->status != OPENED) {

diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 1703655..c1ba0d4 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c

@@ -329,8 +329,15 @@ static int __init init_cxl(void)
 
 	cxl_debugfs_init();
 
-	if ((rc = register_cxl_calls(&cxl_calls)))
-		goto err;
+	/*
+	 * we don't register the callback on P9. slb callack is only
+	 * used for the PSL8 MMU and CX4.
+	 */
+	if (cxl_is_power8()) {
+		rc = register_cxl_calls(&cxl_calls);
+		if (rc)
+			goto err;
+	}
 
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		cxl_ops = &cxl_native_ops;
@@ -347,7 +354,8 @@ static int __init init_cxl(void)
 
 	return 0;
 err1:
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 err:
 	cxl_debugfs_exit();
 	cxl_file_exit();
@@ -366,7 +374,8 @@ static void exit_cxl(void)
 
 	cxl_debugfs_exit();
 	cxl_file_exit();
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 	idr_destroy(&cxl_adapter_idr);
 }
 

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 871a2f0..2b2f889 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c

@@ -105,11 +105,16 @@ static int native_afu_reset(struct cxl_afu *afu)
 			   CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
 			   false);
 
-	/* Re-enable any masked interrupts */
-	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-	serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
-	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
+	/*
+	 * Re-enable any masked interrupts when the AFU is not
+	 * activated to avoid side effects after attaching a process
+	 * in dedicated mode.
+	 */
+	if (afu->current_mode == 0) {
+		serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+		cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	}
 
 	return rc;
 }
@@ -139,9 +144,9 @@ int cxl_psl_purge(struct cxl_afu *afu)
 
 	pr_devel("PSL purge request\n");
 
-	if (cxl_is_psl8(afu))
+	if (cxl_is_power8())
 		trans_fault = CXL_PSL_DSISR_TRANS;
-	if (cxl_is_psl9(afu))
+	if (cxl_is_power9())
 		trans_fault = CXL_PSL9_DSISR_An_TF;
 
 	if (!cxl_ops->link_ok(afu->adapter, afu)) {
@@ -603,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
 		if (!test_tsk_thread_flag(current, TIF_32BIT))
 			sr |= CXL_PSL_SR_An_SF;
 	}
-	if (cxl_is_psl9(ctx->afu)) {
+	if (cxl_is_power9()) {
 		if (radix_enabled())
 			sr |= CXL_PSL_SR_An_XLAT_ror;
 		else
@@ -1117,10 +1122,10 @@ static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
 
 static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
 {
-	if ((cxl_is_psl8(afu)) && (dsisr & CXL_PSL_DSISR_TRANS))
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
 		return true;
 
-	if ((cxl_is_psl9(afu)) && (dsisr & CXL_PSL9_DSISR_An_TF))
+	if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
 		return true;
 
 	return false;
@@ -1194,10 +1199,10 @@ static void native_irq_wait(struct cxl_context *ctx)
 		if (ph != ctx->pe)
 			return;
 		dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
-		if (cxl_is_psl8(ctx->afu) &&
+		if (cxl_is_power8() &&
 		   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
 			return;
-		if (cxl_is_psl9(ctx->afu) &&
+		if (cxl_is_power9() &&
 		   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
 			return;
 		/*
@@ -1302,13 +1307,16 @@ int cxl_native_register_psl_err_irq(struct cxl *adapter)
 
 void cxl_native_release_psl_err_irq(struct cxl *adapter)
 {
-	if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
 		return;
 
 	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
 	cxl_unmap_irq(adapter->native->err_virq, adapter);
 	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
 	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
 }
 
 int cxl_native_register_serr_irq(struct cxl_afu *afu)
@@ -1346,13 +1354,15 @@ int cxl_native_register_serr_irq(struct cxl_afu *afu)
 
 void cxl_native_release_serr_irq(struct cxl_afu *afu)
 {
-	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
 		return;
 
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
 	cxl_unmap_irq(afu->serr_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
 	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
 }
 
 int cxl_native_register_psl_irq(struct cxl_afu *afu)
@@ -1375,12 +1385,15 @@ int cxl_native_register_psl_irq(struct cxl_afu *afu)
 
 void cxl_native_release_psl_irq(struct cxl_afu *afu)
 {
-	if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_hwirq))
 		return;
 
 	cxl_unmap_irq(afu->native->psl_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
 	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
 }
 
 static void recover_psl_err(struct cxl_afu *afu, u64 errstat)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 6dc1ee5..1eb9859 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c

@@ -436,7 +436,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	/* nMMU_ID Defaults to: b’000001001’*/
 	xsl_dsnctl |= ((u64)0x09 << (63-28));
 
-	if (cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+	if (!(cxl_is_power9_dd1())) {
 		/*
 		 * Used to identify CAPI packets which should be sorted into
 		 * the Non-Blocking queues by the PHB. This field should match
@@ -491,7 +491,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
 
 	/* Disable vc dd1 fix */
-	if ((cxl_is_power9() && cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (cxl_is_power9_dd1())
 		cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
 
 	return 0;
@@ -1439,8 +1439,7 @@ int cxl_pci_reset(struct cxl *adapter)
 	 * The adapter is about to be reset, so ignore errors.
 	 * Not supported on P9 DD1
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
@@ -1750,7 +1749,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
 	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
 	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
 	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
-	.err_irq_dump_registers = cxl_native_err_irq_dump_regs,
 	.debugfs_stop_trace = cxl_stop_trace_psl9,
 	.write_timebase_ctrl = write_timebase_ctrl_psl9,
 	.timebase_read = timebase_read_psl9,
@@ -1889,8 +1887,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
 	 * Flush adapter datacache as its about to be removed.
 	 * Not supported on P9 DD1.
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	cxl_deconfigure_adapter(adapter);

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index d1928fd..07aad85 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c

@@ -763,8 +763,10 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 {
 	struct mei_cl_device *cldev = to_mei_cl_device(dev);
 	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
 
-	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
+	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:%02X:",
+			 cldev->name, uuid, version);
 }
 static DEVICE_ATTR_RO(modalias);
 

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index c862cd4..b8069ee 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h

@@ -309,6 +309,9 @@ static inline enum xp_retval
 xpc_send(short partid, int ch_number, u32 flags, void *payload,
 	 u16 payload_size)
 {
+	if (!xpc_interface.send)
+		return xpNotLoaded;
+
 	return xpc_interface.send(partid, ch_number, flags, payload,
 				  payload_size);
 }
@@ -317,6 +320,9 @@ static inline enum xp_retval
 xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
 		u16 payload_size, xpc_notify_func func, void *key)
 {
+	if (!xpc_interface.send_notify)
+		return xpNotLoaded;
+
 	return xpc_interface.send_notify(partid, ch_number, flags, payload,
 					 payload_size, func, key);
 }
@@ -324,12 +330,16 @@ xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
 static inline void
 xpc_received(short partid, int ch_number, void *payload)
 {
-	return xpc_interface.received(partid, ch_number, payload);
+	if (xpc_interface.received)
+		xpc_interface.received(partid, ch_number, payload);
 }
 
 static inline enum xp_retval
 xpc_partid_to_nasids(short partid, void *nasids)
 {
+	if (!xpc_interface.partid_to_nasids)
+		return xpNotLoaded;
+
 	return xpc_interface.partid_to_nasids(partid, nasids);
 }
 

diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 01be66d..6d7f557 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c

@@ -69,23 +69,9 @@ struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
 EXPORT_SYMBOL_GPL(xpc_registrations);
 
 /*
- * Initialize the XPC interface to indicate that XPC isn't loaded.
+ * Initialize the XPC interface to NULL to indicate that XPC isn't loaded.
  */
-static enum xp_retval
-xpc_notloaded(void)
-{
-	return xpNotLoaded;
-}
-
-struct xpc_interface xpc_interface = {
-	(void (*)(int))xpc_notloaded,
-	(void (*)(int))xpc_notloaded,
-	(enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
-	(enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
-			   void *))xpc_notloaded,
-	(void (*)(short, int, void *))xpc_notloaded,
-	(enum xp_retval(*)(short, void *))xpc_notloaded
-};
+struct xpc_interface xpc_interface = { };
 EXPORT_SYMBOL_GPL(xpc_interface);
 
 /*
@@ -115,17 +101,7 @@ EXPORT_SYMBOL_GPL(xpc_set_interface);
 void
 xpc_clear_interface(void)
 {
-	xpc_interface.connect = (void (*)(int))xpc_notloaded;
-	xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
-	xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
-	    xpc_notloaded;
-	xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
-						       u16, xpc_notify_func,
-						       void *))xpc_notloaded;
-	xpc_interface.received = (void (*)(short, int, void *))
-	    xpc_notloaded;
-	xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
-	    xpc_notloaded;
+	memset(&xpc_interface, 0, sizeof(xpc_interface));
 }
 EXPORT_SYMBOL_GPL(xpc_clear_interface);
 
@@ -188,7 +164,8 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
 
 	mutex_unlock(&registration->mutex);
 
-	xpc_interface.connect(ch_number);
+	if (xpc_interface.connect)
+		xpc_interface.connect(ch_number);
 
 	return xpSuccess;
 }
@@ -237,7 +214,8 @@ xpc_disconnect(int ch_number)
 	registration->assigned_limit = 0;
 	registration->idle_limit = 0;
 
-	xpc_interface.disconnect(ch_number);
+	if (xpc_interface.disconnect)
+		xpc_interface.disconnect(ch_number);
 
 	mutex_unlock(&registration->mutex);
 

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b07..6ff94a9 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c

@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_DISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!mmc_can_erase(card)) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto fail;
 	}
 
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 		if (!err)
 			err = mmc_erase(card, from, nr, arg);
 	} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
-	if (!err)
+	if (err)
+		status = BLK_STS_IOERR;
+	else
 		mmc_blk_reset_success(md, type);
 fail:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	struct mmc_card *card = md->queue.card;
 	unsigned int from, nr, arg;
 	int err = 0, type = MMC_BLK_SECDISCARD;
+	blk_status_t status = BLK_STS_OK;
 
 	if (!(mmc_can_secure_erase_trim(card))) {
-		err = -EOPNOTSUPP;
+		status = BLK_STS_NOTSUPP;
 		goto out;
 	}
 
@@ -1254,8 +1258,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	err = mmc_erase(card, from, nr, arg);
 	if (err == -EIO)
 		goto out_retry;
-	if (err)
+	if (err) {
+		status = BLK_STS_IOERR;
 		goto out;
+	}
 
 	if (arg == MMC_SECURE_TRIM1_ARG) {
 		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
 		if (err == -EIO)
 			goto out_retry;
-		if (err)
+		if (err) {
+			status = BLK_STS_IOERR;
 			goto out;
+		}
 	}
 
 out_retry:
@@ -1280,7 +1288,7 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 	if (!err)
 		mmc_blk_reset_success(md, type);
 out:
-	blk_end_request(req, err, blk_rq_bytes(req));
+	blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	int ret = 0;
 
 	ret = mmc_flush_cache(card);
-	if (ret)
-		ret = -EIO;
-
-	blk_end_request_all(req, ret);
+	blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 /*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 {
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
-	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+	while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
 	mmc_queue_req_free(mq, mqrq);
 }
 
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
 	 */
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
-		blk_end_request_all(req, -EIO);
+		blk_end_request_all(req, BLK_STS_IOERR);
 		mmc_queue_req_free(mq, mqrq);
 		return;
 	}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 */
 			mmc_blk_reset_success(md, type);
 
-			req_pending = blk_end_request(old_req, 0,
+			req_pending = blk_end_request(old_req, BLK_STS_OK,
 						      brq->data.bytes_xfered);
 			/*
 			 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 * time, so we only reach here after trying to
 			 * read a single sector.
 			 */
-			req_pending = blk_end_request(old_req, -EIO,
+			req_pending = blk_end_request(old_req, BLK_STS_IOERR,
 						      brq->data.blksz);
 			if (!req_pending) {
 				mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		if (req) {
-			blk_end_request_all(req, -EIO);
+			blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		goto out;
 	}

diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 1304160..13ef162 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c

@@ -27,6 +27,7 @@ struct mmc_pwrseq_simple {
 	struct mmc_pwrseq pwrseq;
 	bool clk_enabled;
 	u32 post_power_on_delay_ms;
+	u32 power_off_delay_us;
 	struct clk *ext_clk;
 	struct gpio_descs *reset_gpios;
 };
@@ -78,6 +79,10 @@ static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
 
+	if (pwrseq->power_off_delay_us)
+		usleep_range(pwrseq->power_off_delay_us,
+			2 * pwrseq->power_off_delay_us);
+
 	if (!IS_ERR(pwrseq->ext_clk) && pwrseq->clk_enabled) {
 		clk_disable_unprepare(pwrseq->ext_clk);
 		pwrseq->clk_enabled = false;
@@ -119,6 +124,8 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 
 	device_property_read_u32(dev, "post-power-on-delay-ms",
 				 &pwrseq->post_power_on_delay_ms);
+	device_property_read_u32(dev, "power-off-delay-us",
+				 &pwrseq->power_off_delay_us);
 
 	pwrseq->pwrseq.dev = dev;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6b..b659a28 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c

@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
 			req->rq_flags |= RQF_QUIET;
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		}
 		return;
 	}
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 		mmc_queue_setup_discard(mq->queue, card);
 
 	if (card->bouncesz) {
-		blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 		blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
 		blk_queue_max_segments(mq->queue, card->bouncesz / 512);
 		blk_queue_max_segment_size(mq->queue, card->bouncesz);

diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 772d090..951d2cd 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c

@@ -108,7 +108,7 @@ static void octeon_mmc_release_bus(struct cvm_mmc_host *host)
 static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
 {
 	writeq(val, host->base + MIO_EMM_INT(host));
-	if (!host->dma_active || (host->dma_active && !host->has_ciu3))
+	if (!host->has_ciu3)
 		writeq(val, host->base + MIO_EMM_INT_EN(host));
 }
 
@@ -267,7 +267,7 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 	}
 
 	host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev,
-							 "power-gpios",
+							 "power",
 							 GPIOD_OUT_HIGH);
 	if (IS_ERR(host->global_pwr_gpiod)) {
 		dev_err(&pdev->dev, "Invalid power GPIO\n");
@@ -288,11 +288,20 @@ static int octeon_mmc_probe(struct platform_device *pdev)
 		if (ret) {
 			dev_err(&pdev->dev, "Error populating slots\n");
 			octeon_mmc_set_shared_power(host, 0);
-			return ret;
+			goto error;
 		}
 		i++;
 	}
 	return 0;
+
+error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
+	return ret;
 }
 
 static int octeon_mmc_remove(struct platform_device *pdev)

diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index fe3d772..b9cc959 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c

@@ -146,6 +146,12 @@ static int thunder_mmc_probe(struct pci_dev *pdev,
 	return 0;
 
 error:
+	for (i = 0; i < CAVIUM_MAX_MMC; i++) {
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+		if (host->slot_pdev[i])
+			of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL);
+	}
 	clk_disable_unprepare(host->clk);
 	return ret;
 }

diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
index 58b51ba..b8aaf0f 100644
--- a/drivers/mmc/host/cavium.c
+++ b/drivers/mmc/host/cavium.c

@@ -839,14 +839,14 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		cvm_mmc_reset_bus(slot);
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 0);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		break;
 
 	case MMC_POWER_UP:
 		if (host->global_pwr_gpiod)
 			host->set_shared_power(host, 1);
-		else
+		else if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		break;
 	}
@@ -968,20 +968,15 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot)
 		return -EINVAL;
 	}
 
-	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
-	if (IS_ERR(mmc->supply.vmmc)) {
-		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		/*
-		 * Legacy Octeon firmware has no regulator entry, fall-back to
-		 * a hard-coded voltage to get a sane OCR.
-		 */
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+	/*
+	 * Legacy Octeon firmware has no regulator entry, fall-back to
+	 * a hard-coded voltage to get a sane OCR.
+	 */
+	if (IS_ERR(mmc->supply.vmmc))
 		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	} else {
-		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
-		if (ret > 0)
-			mmc->ocr_avail = ret;
-	}
 
 	/* Common MMC bindings */
 	ret = mmc_of_parse(mmc);

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 1842ed3..de962c2 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c

@@ -210,6 +210,15 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	int i;
 	bool use_desc_chain_mode = true;
 
+	/*
+	 * Broken SDIO with AP6255-based WiFi on Khadas VIM Pro has been
+	 * reported. For some strange reason this occurs in descriptor
+	 * chain mode only. So let's fall back to bounce buffer mode
+	 * for command SD_IO_RW_EXTENDED.
+	 */
+	if (mrq->cmd->opcode == SD_IO_RW_EXTENDED)
+		return;
+
 	for_each_sg(data->sg, sg, data->sg_len, i)
 		/* check for 8 byte alignment */
 		if (sg->offset & 7) {

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3275d49..61666d2 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c

@@ -187,7 +187,8 @@ static const struct sdhci_iproc_data iproc_cygnus_data = {
 };
 
 static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
-	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+	.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
 	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
 	.ops = &sdhci_iproc_ops,
 };

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 92fc3f7..18957fe 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c

@@ -404,10 +404,9 @@ struct intel_host {
 	bool	d3_retune;
 };
 
-const u8 intel_dsm_uuid[] = {
-	0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
-	0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
-};
+static const guid_t intel_dsm_guid =
+	GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F,
+		  0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61);
 
 static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 		       unsigned int fn, u32 *result)
@@ -416,7 +415,7 @@ static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
 	int err = 0;
 	size_t len;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL);
 	if (!obj)
 		return -EOPNOTSUPP;
 

diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index 6356781..f7e26b0 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c

@@ -787,14 +787,6 @@ int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios)
 	return ret;
 }
 
-void xenon_clean_phy(struct sdhci_host *host)
-{
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-	kfree(priv->phy_params);
-}
-
 static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 			 const char *phy_name)
 {
@@ -819,11 +811,7 @@ static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
 	if (ret)
 		return ret;
 
-	ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
-	if (ret)
-		xenon_clean_phy(host);
-
-	return ret;
+	return xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
 }
 
 int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host)

diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 6724665..bc1781b 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c

@@ -486,7 +486,7 @@ static int xenon_probe(struct platform_device *pdev)
 
 	err = xenon_sdhc_prepare(host);
 	if (err)
-		goto clean_phy_param;
+		goto err_clk;
 
 	err = sdhci_add_host(host);
 	if (err)
@@ -496,8 +496,6 @@ static int xenon_probe(struct platform_device *pdev)
 
 remove_sdhc:
 	xenon_sdhc_unprepare(host);
-clean_phy_param:
-	xenon_clean_phy(host);
 err_clk:
 	clk_disable_unprepare(pltfm_host->clk);
 free_pltfm:
@@ -510,8 +508,6 @@ static int xenon_remove(struct platform_device *pdev)
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	xenon_clean_phy(host);
-
 	sdhci_remove_host(host, 0);
 
 	xenon_sdhc_unprepare(host);

diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h
index 6e6523e..73debb4 100644
--- a/drivers/mmc/host/sdhci-xenon.h
+++ b/drivers/mmc/host/sdhci-xenon.h

@@ -93,7 +93,6 @@ struct xenon_priv {
 };
 
 int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios);
-void xenon_clean_phy(struct sdhci_host *host);
 int xenon_phy_parse_dt(struct device_node *np,
 		       struct sdhci_host *host);
 void xenon_soc_pad_ctrl(struct sdhci_host *host,

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd..f336a9b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 }
 
 
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
 {
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
 	buf = bio_data(req->bio);
 
-	if (req_op(req) == REQ_OP_FLUSH)
-		return tr->flush(dev);
+	if (req_op(req) == REQ_OP_FLUSH) {
+		if (tr->flush(dev))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
+	}
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
-		return tr->discard(dev, block, nsect);
+		if (tr->discard(dev, block, nsect))
+			return BLK_STS_IOERR;
+		return BLK_STS_OK;
 	case REQ_OP_READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return -EIO;
+				return BLK_STS_IOERR;
 		rq_flush_dcache_pages(req);
-		return 0;
+		return BLK_STS_OK;
 	case REQ_OP_WRITE:
 		if (!tr->writesect)
-			return -EIO;
+			return BLK_STS_IOERR;
 
 		rq_flush_dcache_pages(req);
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return -EIO;
-		return 0;
+				return BLK_STS_IOERR;
 	default:
-		return -EIO;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
 	spin_lock_irq(rq->queue_lock);
 
 	while (1) {
-		int res;
+		blk_status_t res;
 
 		dev->bg_stop = false;
 		if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
 
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
-			__blk_end_request_all(req, -ENODEV);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 	else
 		queue_work(dev->wq, &dev->work);
 }
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
+	blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
 

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index d474378..b1dd127 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -202,7 +202,7 @@ static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section,
 	return 0;
 }
 
-const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
+static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = {
 	.ecc = nand_ooblayout_ecc_lp_hamming,
 	.free = nand_ooblayout_free_lp_hamming,
 };
@@ -4361,7 +4361,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	/* Initialize the ->data_interface field. */
 	ret = nand_init_data_interface(chip);
 	if (ret)
-		return ret;
+		goto err_nand_init;
 
 	/*
 	 * Setup the data interface correctly on the chip and controller side.
@@ -4373,7 +4373,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	 */
 	ret = nand_setup_data_interface(chip);
 	if (ret)
-		return ret;
+		goto err_nand_init;
 
 	nand_maf_id = chip->id.data[0];
 	nand_dev_id = chip->id.data[1];
@@ -4404,6 +4404,12 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	mtd->size = i * chip->chipsize;
 
 	return 0;
+
+err_nand_init:
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
+
+	return ret;
 }
 EXPORT_SYMBOL(nand_scan_ident);
 
@@ -4574,18 +4580,23 @@ int nand_scan_tail(struct mtd_info *mtd)
 
 	/* New bad blocks should be marked in OOB, flash-based BBT, or both */
 	if (WARN_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) &&
-		   !(chip->bbt_options & NAND_BBT_USE_FLASH)))
-		return -EINVAL;
+		   !(chip->bbt_options & NAND_BBT_USE_FLASH))) {
+		ret = -EINVAL;
+		goto err_ident;
+	}
 
 	if (invalid_ecc_page_accessors(chip)) {
 		pr_err("Invalid ECC page accessors setup\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_ident;
 	}
 
 	if (!(chip->options & NAND_OWN_BUFFERS)) {
 		nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL);
-		if (!nbuf)
-			return -ENOMEM;
+		if (!nbuf) {
+			ret = -ENOMEM;
+			goto err_ident;
+		}
 
 		nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL);
 		if (!nbuf->ecccalc) {
@@ -4608,8 +4619,10 @@ int nand_scan_tail(struct mtd_info *mtd)
 
 		chip->buffers = nbuf;
 	} else {
-		if (!chip->buffers)
-			return -ENOMEM;
+		if (!chip->buffers) {
+			ret = -ENOMEM;
+			goto err_ident;
+		}
 	}
 
 	/* Set the internal oob buffer location, just after the page data */
@@ -4842,7 +4855,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 		return 0;
 
 	/* Build bad block table */
-	return chip->scan_bbt(mtd);
+	ret = chip->scan_bbt(mtd);
+	if (ret)
+		goto err_free;
+	return 0;
+
 err_free:
 	if (nbuf) {
 		kfree(nbuf->databuf);
@@ -4850,6 +4867,13 @@ int nand_scan_tail(struct mtd_info *mtd)
 		kfree(nbuf->ecccalc);
 		kfree(nbuf);
 	}
+
+err_ident:
+	/* Clean up nand_scan_ident(). */
+
+	/* Free manufacturer priv data. */
+	nand_manufacturer_cleanup(chip);
+
 	return ret;
 }
 EXPORT_SYMBOL(nand_scan_tail);

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 9d5ca0e..92e2cf8 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c

@@ -6,7 +6,6 @@
  * published by the Free Software Foundation.
  *
  */
-#include <linux/module.h>
 #include <linux/mtd/nand.h>
 #include <linux/sizes.h>
 

diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c
index 9cfc403..1e07559 100644
--- a/drivers/mtd/nand/nand_samsung.c
+++ b/drivers/mtd/nand/nand_samsung.c

@@ -84,6 +84,9 @@ static void samsung_nand_decode_id(struct nand_chip *chip)
 			case 7:
 				chip->ecc_strength_ds = 60;
 				break;
+			default:
+				WARN(1, "Could not decode ECC info");
+				chip->ecc_step_ds = 0;
 			}
 		}
 	} else {

diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 05b6e10..49b286c 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c

@@ -55,10 +55,10 @@
  * byte 1 for other packets in the page (PKT_N, for N > 0)
  * ERR_COUNT_PKT_N is the max error count over all but the first packet.
  */
-#define DECODE_OK_PKT_0(v)	((v) & BIT(7))
-#define DECODE_OK_PKT_N(v)	((v) & BIT(15))
 #define ERR_COUNT_PKT_0(v)	(((v) >> 0) & 0x3f)
 #define ERR_COUNT_PKT_N(v)	(((v) >> 8) & 0x3f)
+#define DECODE_FAIL_PKT_0(v)	(((v) & BIT(7)) == 0)
+#define DECODE_FAIL_PKT_N(v)	(((v) & BIT(15)) == 0)
 
 /* Offsets relative to pbus_base */
 #define PBUS_CS_CTRL	0x83c
@@ -193,6 +193,8 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf)
 						  chip->ecc.strength);
 		if (res < 0)
 			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += res;
 
 		bitflips = max(res, bitflips);
 		buf += pkt_size;
@@ -202,9 +204,11 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf)
 	return bitflips;
 }
 
-static int decode_error_report(struct tango_nfc *nfc)
+static int decode_error_report(struct nand_chip *chip)
 {
 	u32 status, res;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 
 	status = readl_relaxed(nfc->reg_base + NFC_XFER_STATUS);
 	if (status & PAGE_IS_EMPTY)
@@ -212,10 +216,14 @@ static int decode_error_report(struct tango_nfc *nfc)
 
 	res = readl_relaxed(nfc->mem_base + ERROR_REPORT);
 
-	if (DECODE_OK_PKT_0(res) && DECODE_OK_PKT_N(res))
-		return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res));
+	if (DECODE_FAIL_PKT_0(res) || DECODE_FAIL_PKT_N(res))
+		return -EBADMSG;
 
-	return -EBADMSG;
+	/* ERR_COUNT_PKT_N is max, not sum, but that's all we have */
+	mtd->ecc_stats.corrected +=
+		ERR_COUNT_PKT_0(res) + ERR_COUNT_PKT_N(res);
+
+	return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res));
 }
 
 static void tango_dma_callback(void *arg)
@@ -282,7 +290,7 @@ static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (err)
 		return err;
 
-	res = decode_error_report(nfc);
+	res = decode_error_report(chip);
 	if (res < 0) {
 		chip->ecc.read_oob_raw(mtd, chip, page);
 		res = check_erased_page(chip, buf);
@@ -663,6 +671,7 @@ static const struct of_device_id tango_nand_ids[] = {
 	{ .compatible = "sigma,smp8758-nand" },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, tango_nand_ids);
 
 static struct platform_driver tango_nand_driver = {
 	.probe	= tango_nand_probe,

diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65..c3963f8 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c

@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
 	ret = ubiblock_read(pdu);
 	rq_flush_dcache_pages(req);
 
-	blk_mq_end_request(req, ret);
+	blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 			     const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
 	case REQ_OP_READ:
 		ubi_sgl_init(&pdu->usgl);
 		queue_work(dev->wq, &pdu->work);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 }

diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 62ee439..53a1cb5 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c

@@ -756,6 +756,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct arcnet_local *lp;
 	int recbuf, status, diagstatus, didsomething, boguscount;
+	unsigned long flags;
 	int retval = IRQ_NONE;
 
 	arc_printk(D_DURING, dev, "\n");
@@ -765,7 +766,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	lp = netdev_priv(dev);
 	BUG_ON(!lp);
 
-	spin_lock(&lp->lock);
+	spin_lock_irqsave(&lp->lock, flags);
 
 	/* RESET flag was enabled - if device is not running, we must
 	 * clear it right away (but nothing else).
@@ -774,7 +775,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 		if (lp->hw.status(dev) & RESETflag)
 			lp->hw.command(dev, CFLAGScmd | RESETclear);
 		lp->hw.intmask(dev, 0);
-		spin_unlock(&lp->lock);
+		spin_unlock_irqrestore(&lp->lock, flags);
 		return retval;
 	}
 
@@ -998,7 +999,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
 	udelay(1);
 	lp->hw.intmask(dev, lp->intmask);
 
-	spin_unlock(&lp->lock);
+	spin_unlock_irqrestore(&lp->lock, flags);
 	return retval;
 }
 EXPORT_SYMBOL(arcnet_interrupt);

diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 2056878..4fa2e46 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c

@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
 	ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
 	ackpkt->soft.cap.mes.ack = acked;
 
-	arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+	arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
 		   *((int *)&ackpkt->soft.cap.cookie[0]));
 
 	ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 239de38..47f80b8 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c

@@ -135,6 +135,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
 	for (i = 0; i < ci->devcount; i++) {
 		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
 		struct com20020_dev *card;
+		int dev_id_mask = 0xf;
 
 		dev = alloc_arcdev(device);
 		if (!dev) {
@@ -166,6 +167,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
 		arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
 		arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
 
+		SET_NETDEV_DEV(dev, &pdev->dev);
 		dev->base_addr = ioaddr;
 		dev->dev_addr[0] = node;
 		dev->irq = pdev->irq;
@@ -179,8 +181,8 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
 		/* Get the dev_id from the PLX rotary coder */
 		if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-			dev->dev_id = 0xc;
-		dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+			dev_id_mask = 0x3;
+		dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
 
 		snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
 

diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c
index 13d9ad4..78043a9 100644
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c

@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
 		return -ENODEV;
 	}
 
-	dev->base_addr = ioaddr;
-
 	arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
 		   lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
 

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c5fd425..e5386ab 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c

@@ -90,10 +90,13 @@ enum ad_link_speed_type {
 	AD_LINK_SPEED_100MBPS,
 	AD_LINK_SPEED_1000MBPS,
 	AD_LINK_SPEED_2500MBPS,
+	AD_LINK_SPEED_5000MBPS,
 	AD_LINK_SPEED_10000MBPS,
+	AD_LINK_SPEED_14000MBPS,
 	AD_LINK_SPEED_20000MBPS,
 	AD_LINK_SPEED_25000MBPS,
 	AD_LINK_SPEED_40000MBPS,
+	AD_LINK_SPEED_50000MBPS,
 	AD_LINK_SPEED_56000MBPS,
 	AD_LINK_SPEED_100000MBPS,
 };
@@ -259,10 +262,13 @@ static inline int __check_agg_selection_timer(struct port *port)
  *     %AD_LINK_SPEED_100MBPS,
  *     %AD_LINK_SPEED_1000MBPS,
  *     %AD_LINK_SPEED_2500MBPS,
+ *     %AD_LINK_SPEED_5000MBPS,
  *     %AD_LINK_SPEED_10000MBPS
+ *     %AD_LINK_SPEED_14000MBPS,
  *     %AD_LINK_SPEED_20000MBPS
  *     %AD_LINK_SPEED_25000MBPS
  *     %AD_LINK_SPEED_40000MBPS
+ *     %AD_LINK_SPEED_50000MBPS
  *     %AD_LINK_SPEED_56000MBPS
  *     %AD_LINK_SPEED_100000MBPS
  */
@@ -296,10 +302,18 @@ static u16 __get_link_speed(struct port *port)
 			speed = AD_LINK_SPEED_2500MBPS;
 			break;
 
+		case SPEED_5000:
+			speed = AD_LINK_SPEED_5000MBPS;
+			break;
+
 		case SPEED_10000:
 			speed = AD_LINK_SPEED_10000MBPS;
 			break;
 
+		case SPEED_14000:
+			speed = AD_LINK_SPEED_14000MBPS;
+			break;
+
 		case SPEED_20000:
 			speed = AD_LINK_SPEED_20000MBPS;
 			break;
@@ -312,6 +326,10 @@ static u16 __get_link_speed(struct port *port)
 			speed = AD_LINK_SPEED_40000MBPS;
 			break;
 
+		case SPEED_50000:
+			speed = AD_LINK_SPEED_50000MBPS;
+			break;
+
 		case SPEED_56000:
 			speed = AD_LINK_SPEED_56000MBPS;
 			break;
@@ -707,9 +725,15 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 		case AD_LINK_SPEED_2500MBPS:
 			bandwidth = nports * 2500;
 			break;
+		case AD_LINK_SPEED_5000MBPS:
+			bandwidth = nports * 5000;
+			break;
 		case AD_LINK_SPEED_10000MBPS:
 			bandwidth = nports * 10000;
 			break;
+		case AD_LINK_SPEED_14000MBPS:
+			bandwidth = nports * 14000;
+			break;
 		case AD_LINK_SPEED_20000MBPS:
 			bandwidth = nports * 20000;
 			break;
@@ -719,6 +743,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 		case AD_LINK_SPEED_40000MBPS:
 			bandwidth = nports * 40000;
 			break;
+		case AD_LINK_SPEED_50000MBPS:
+			bandwidth = nports * 50000;
+			break;
 		case AD_LINK_SPEED_56000MBPS:
 			bandwidth = nports * 56000;
 			break;
@@ -2577,7 +2604,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 		return -1;
 
 	ad_info->aggregator_id = aggregator->aggregator_identifier;
-	ad_info->ports = aggregator->num_of_ports;
+	ad_info->ports = __agg_active_ports(aggregator);
 	ad_info->actor_key = aggregator->actor_oper_aggregator_key;
 	ad_info->partner_key = aggregator->partner_oper_aggregator_key;
 	ether_addr_copy(ad_info->partner_system,

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2be7880..8ab6bdb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -2612,11 +2612,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		unsigned long trans_start = dev_trans_start(slave->dev);
 
+		slave->new_link = BOND_LINK_NOCHANGE;
+
 		if (slave->link != BOND_LINK_UP) {
 			if (bond_time_in_interval(bond, trans_start, 1) &&
 			    bond_time_in_interval(bond, slave->last_rx, 1)) {
 
-				slave->link  = BOND_LINK_UP;
+				slave->new_link = BOND_LINK_UP;
 				slave_state_changed = 1;
 
 				/* primary_slave has no meaning in round-robin
@@ -2643,7 +2645,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 			if (!bond_time_in_interval(bond, trans_start, 2) ||
 			    !bond_time_in_interval(bond, slave->last_rx, 2)) {
 
-				slave->link  = BOND_LINK_DOWN;
+				slave->new_link = BOND_LINK_DOWN;
 				slave_state_changed = 1;
 
 				if (slave->link_failure_count < UINT_MAX)
@@ -2674,6 +2676,11 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
 		if (!rtnl_trylock())
 			goto re_arm;
 
+		bond_for_each_slave(bond, slave, iter) {
+			if (slave->new_link != BOND_LINK_NOCHANGE)
+				slave->link = slave->new_link;
+		}
+
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
 			if (BOND_MODE(bond) == BOND_MODE_XOR)
@@ -4185,7 +4192,6 @@ static void bond_destructor(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
-	free_netdev(bond_dev);
 }
 
 void bond_setup(struct net_device *bond_dev)
@@ -4205,7 +4211,8 @@ void bond_setup(struct net_device *bond_dev)
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
 
-	bond_dev->destructor = bond_destructor;
+	bond_dev->needs_free_netdev = true;
+	bond_dev->priv_destructor = bond_destructor;
 
 	SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
 
@@ -4271,10 +4278,10 @@ static int bond_check_params(struct bond_params *params)
 	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
 	struct bond_opt_value newval;
 	const struct bond_opt_value *valptr;
-	int arp_all_targets_value;
+	int arp_all_targets_value = 0;
 	u16 ad_actor_sys_prio = 0;
 	u16 ad_user_port_key = 0;
-	__be32 arp_target[BOND_MAX_ARP_TARGETS];
+	__be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
 	int arp_ip_count;
 	int bond_mode	= BOND_MODE_ROUNDROBIN;
 	int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
@@ -4501,7 +4508,6 @@ static int bond_check_params(struct bond_params *params)
 		arp_validate_value = 0;
 	}
 
-	arp_all_targets_value = 0;
 	if (arp_all_targets) {
 		bond_opt_initstr(&newval, arp_all_targets);
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
@@ -4730,7 +4736,7 @@ int bond_create(struct net *net, const char *name)
 
 	rtnl_unlock();
 	if (res < 0)
-		bond_destructor(bond_dev);
+		free_netdev(bond_dev);
 	return res;
 }
 

diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index ddabce7..71a7c3b 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c

@@ -1121,7 +1121,7 @@ static void cfhsi_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->netdev_ops = &cfhsi_netdevops;
 	for (i = 0; i < CFHSI_PRIO_LAST; ++i)
 		skb_queue_head_init(&cfhsi->qhead[i]);

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index c2dea49..76e1d35 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c

@@ -428,7 +428,7 @@ static void caifdev_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&serdev->head);
 	serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
 	serdev->common.use_frag = true;

diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 3a529fb..fc21afe 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c

@@ -712,7 +712,7 @@ static void cfspi_setup(struct net_device *dev)
 	dev->flags = IFF_NOARP | IFF_POINTOPOINT;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->mtu = SPI_MAX_PAYLOAD_SIZE;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	skb_queue_head_init(&cfspi->qhead);
 	skb_queue_head_init(&cfspi->chead);
 	cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;

diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 6122768..1794ea0 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c

@@ -617,7 +617,7 @@ static void cfv_netdev_setup(struct net_device *netdev)
 	netdev->tx_queue_len = 100;
 	netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	netdev->mtu = CFV_DEF_MTU_SIZE;
-	netdev->destructor = free_netdev;
+	netdev->needs_free_netdev = true;
 }
 
 /* Create debugfs counters for the device */

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 611d16a..ae4ed03 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c

@@ -391,6 +391,9 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 	can_update_state_error_stats(dev, new_state);
 	priv->state = new_state;
 
+	if (!cf)
+		return;
+
 	if (unlikely(new_state == CAN_STATE_BUS_OFF)) {
 		cf->can_id |= CAN_ERR_BUSOFF;
 		return;

diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 0d57be5..85268be 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c

@@ -489,7 +489,7 @@ int peak_canfd_handle_msgs_list(struct peak_canfd_priv *priv,
 				struct pucan_rx_msg *msg_list, int msg_count)
 {
 	void *msg_ptr = msg_list;
-	int i, msg_size;
+	int i, msg_size = 0;
 
 	for (i = 0; i < msg_count; i++) {
 		msg_size = peak_canfd_handle_msg(priv, msg_ptr);

diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index eb71737..6a6e896 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c

@@ -417,7 +417,7 @@ static int slc_open(struct net_device *dev)
 static void slc_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slcan_devs[i] = NULL;
 }
 
@@ -436,7 +436,8 @@ static const struct net_device_ops slc_netdev_ops = {
 static void slc_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &slc_netdev_ops;
-	dev->destructor		= slc_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= slc_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -761,8 +762,6 @@ static void __exit slcan_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index eecee7f..afcc131 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c

@@ -265,6 +265,8 @@ static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
 			     sizeof(*dm),
 			     1000);
 
+	kfree(dm);
+
 	return rc;
 }
 

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 57913db..1ca76e0 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c

@@ -908,8 +908,6 @@ static int peak_usb_probe(struct usb_interface *intf,
 	const struct peak_usb_adapter *peak_usb_adapter = NULL;
 	int i, err = -ENOMEM;
 
-	usb_dev = interface_to_usbdev(intf);
-
 	/* get corresponding PCAN-USB adapter */
 	for (i = 0; i < ARRAY_SIZE(peak_usb_adapters_list); i++)
 		if (peak_usb_adapters_list[i]->device_id == usb_id_product) {
@@ -920,7 +918,7 @@ static int peak_usb_probe(struct usb_interface *intf,
 	if (!peak_usb_adapter) {
 		/* should never come except device_id bad usage in this file */
 		pr_err("%s: didn't find device id. 0x%x in devices list\n",
-			PCAN_USB_DRIVER_NAME, usb_dev->descriptor.idProduct);
+			PCAN_USB_DRIVER_NAME, usb_id_product);
 		return -ENODEV;
 	}
 

diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index facca33..a8cb3326 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c

@@ -152,7 +152,7 @@ static const struct net_device_ops vcan_netdev_ops = {
 static void vcan_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_CAN;
-	dev->mtu		= CAN_MTU;
+	dev->mtu		= CANFD_MTU;
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 0;
@@ -163,7 +163,7 @@ static void vcan_setup(struct net_device *dev)
 		dev->flags |= IFF_ECHO;
 
 	dev->netdev_ops		= &vcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {

diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 7fbb247..cfe889e 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c

@@ -150,13 +150,13 @@ static const struct net_device_ops vxcan_netdev_ops = {
 static void vxcan_setup(struct net_device *dev)
 {
 	dev->type		= ARPHRD_CAN;
-	dev->mtu		= CAN_MTU;
+	dev->mtu		= CANFD_MTU;
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 0;
 	dev->flags		= (IFF_NOARP|IFF_ECHO);
 	dev->netdev_ops		= &vxcan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /* forward declaration for rtnl_create_link() */

diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 96046bb..14c0be9 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h

@@ -114,13 +114,13 @@ static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
 	return -EOPNOTSUPP;
 }
 
-int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
-			   int src_port, u16 data)
+static inline int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip,
+					 int src_dev, int src_port, u16 data)
 {
 	return -EOPNOTSUPP;
 }
 
-int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
+static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
 }

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 149244a..9905b52 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c

@@ -328,7 +328,6 @@ static void dummy_free_netdev(struct net_device *dev)
 	struct dummy_priv *priv = netdev_priv(dev);
 
 	kfree(priv->vfinfo);
-	free_netdev(dev);
 }
 
 static void dummy_setup(struct net_device *dev)
@@ -338,7 +337,8 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &dummy_netdev_ops;
 	dev->ethtool_ops = &dummy_ethtool_ops;
-	dev->destructor = dummy_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = dummy_free_netdev;
 
 	/* Fill in device structure with ethernet-generic values. */
 	dev->flags |= IFF_NOARP;

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index b0a3b85..db02bc2 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c

@@ -748,13 +748,13 @@ static int ax_init_dev(struct net_device *dev)
 
 	ret = ax_mii_init(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_irq;
+		goto err_out;
 
 	netdev_info(dev, "%dbit, irq %d, %lx, MAC: %pM\n",
 		    ei_local->word16 ? 16 : 8, dev->irq, dev->base_addr,
@@ -762,9 +762,6 @@ static int ax_init_dev(struct net_device *dev)
 
 	return 0;
 
- out_irq:
-	/* cleanup irq */
-	free_irq(dev->irq, dev);
  err_out:
 	return ret;
 }

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 08d11ce..f5b237e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c

@@ -61,6 +61,8 @@
 
 #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
 
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
 /*****************************************************************************/
 /*****************************************************************************/
 /*****************************************************************************/
@@ -232,11 +234,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
 	tail_masked = admin_queue->sq.tail & queue_size_mask;
 
 	/* In case of queue FULL */
-	cnt = admin_queue->sq.tail - admin_queue->sq.head;
+	cnt = atomic_read(&admin_queue->outstanding_cmds);
 	if (cnt >= admin_queue->q_depth) {
-		pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
-			 admin_queue->sq.tail, admin_queue->sq.head,
-			 admin_queue->q_depth);
+		pr_debug("admin queue is full.\n");
 		admin_queue->stats.out_of_space++;
 		return ERR_PTR(-ENOSPC);
 	}
@@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
 						     struct ena_com_admin_queue *admin_queue)
 {
-	unsigned long flags;
-	u32 start_time;
+	unsigned long flags, timeout;
 	int ret;
 
-	start_time = ((u32)jiffies_to_usecs(jiffies));
+	timeout = jiffies + ADMIN_CMD_TIMEOUT_US;
 
-	while (comp_ctx->status == ENA_CMD_SUBMITTED) {
-		if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
-		    ADMIN_CMD_TIMEOUT_US) {
+	while (1) {
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		if (comp_ctx->status != ENA_CMD_SUBMITTED)
+			break;
+
+		if (time_is_before_jiffies(timeout)) {
 			pr_err("Wait for completion (polling) timeout\n");
 			/* ENA didn't have any completion */
 			spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
 			goto err;
 		}
 
-		spin_lock_irqsave(&admin_queue->q_lock, flags);
-		ena_com_handle_admin_completion(admin_queue);
-		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-
 		msleep(100);
 	}
 
@@ -1455,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
 
 void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
 {
+	u32 mask_value = 0;
+
+	if (polling)
+		mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+	writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
 	ena_dev->admin_queue.polling = polling;
 }
 

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 67b2338f..3ee55e2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c

@@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(tx_poll),
 	ENA_STAT_TX_ENTRY(doorbells),
 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
-	ENA_STAT_TX_ENTRY(missing_tx_comp),
 	ENA_STAT_TX_ENTRY(bad_req_id),
 };
 
@@ -94,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
 	ENA_STAT_RX_ENTRY(dma_mapping_err),
 	ENA_STAT_RX_ENTRY(bad_desc_num),
 	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+	ENA_STAT_RX_ENTRY(empty_rx_ring),
 };
 
 static const struct ena_stats ena_stats_ena_com_strings[] = {

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 7c1214d..4f16ed3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c

@@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 		rxr->sgl_size = adapter->max_rx_sgl_size;
 		rxr->smoothed_interval =
 			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+		rxr->empty_rx_queue = 0;
 	}
 }
 
@@ -1078,6 +1079,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
 	rx_ring->per_napi_bytes = 0;
 }
 
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+					struct ena_ring *rx_ring)
+{
+	struct ena_eth_io_intr_reg intr_reg;
+
+	/* Update intr register: rx intr delay,
+	 * tx intr delay and interrupt unmask
+	 */
+	ena_com_update_intr_reg(&intr_reg,
+				rx_ring->smoothed_interval,
+				tx_ring->smoothed_interval,
+				true);
+
+	/* It is a shared MSI-X.
+	 * Tx and Rx CQ have pointer to it.
+	 * So we use one of them to reach the intr reg
+	 */
+	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
 static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 					     struct ena_ring *rx_ring)
 {
@@ -1108,7 +1129,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 {
 	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
 	struct ena_ring *tx_ring, *rx_ring;
-	struct ena_eth_io_intr_reg intr_reg;
 
 	u32 tx_work_done;
 	u32 rx_work_done;
@@ -1149,22 +1169,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
 				ena_adjust_intr_moderation(rx_ring, tx_ring);
 
-			/* Update intr register: rx intr delay,
-			 * tx intr delay and interrupt unmask
-			 */
-			ena_com_update_intr_reg(&intr_reg,
-						rx_ring->smoothed_interval,
-						tx_ring->smoothed_interval,
-						true);
-
-			/* It is a shared MSI-X.
-			 * Tx and Rx CQ have pointer to it.
-			 * So we use one of them to reach the intr reg
-			 */
-			ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+			ena_unmask_interrupt(tx_ring, rx_ring);
 		}
 
-
 		ena_update_ring_numa_node(tx_ring, rx_ring);
 
 		ret = rx_work_done;
@@ -1485,6 +1492,11 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
 	ena_napi_enable_all(adapter);
 
+	/* Enable completion queues interrupt */
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_unmask_interrupt(&adapter->tx_ring[i],
+				     &adapter->rx_ring[i]);
+
 	/* schedule napi in case we had pending packets
 	 * from the last time we disable napi
 	 */
@@ -1532,6 +1544,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
 			  qid, rc);
 		ena_com_destroy_io_queue(ena_dev, ena_qid);
+		return rc;
 	}
 
 	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1596,6 +1609,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
 			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
 			  qid, rc);
 		ena_com_destroy_io_queue(ena_dev, ena_qid);
+		return rc;
 	}
 
 	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1981,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_info->tx_descs = nb_hw_desc;
 	tx_info->last_jiffies = jiffies;
+	tx_info->print_once = 0;
 
 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 		tx_ring->ring_size);
@@ -2550,13 +2565,44 @@ static void ena_fw_reset_device(struct work_struct *work)
 		"Reset attempt failed. Can not reset the device\n");
 }
 
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static int check_missing_comp_in_queue(struct ena_adapter *adapter,
+				       struct ena_ring *tx_ring)
 {
 	struct ena_tx_buffer *tx_buf;
 	unsigned long last_jiffies;
+	u32 missed_tx = 0;
+	int i;
+
+	for (i = 0; i < tx_ring->ring_size; i++) {
+		tx_buf = &tx_ring->tx_buffer_info[i];
+		last_jiffies = tx_buf->last_jiffies;
+		if (unlikely(last_jiffies &&
+			     time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+			if (!tx_buf->print_once)
+				netif_notice(adapter, tx_err, adapter->netdev,
+					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+					     tx_ring->qid, i);
+
+			tx_buf->print_once = 1;
+			missed_tx++;
+
+			if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+				netif_err(adapter, tx_err, adapter->netdev,
+					  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+					  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+				set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+				return -EIO;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
 	struct ena_ring *tx_ring;
-	int i, j, budget;
-	u32 missed_tx;
+	int i, budget, rc;
 
 	/* Make sure the driver doesn't turn the device in other process */
 	smp_rmb();
@@ -2572,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
 		tx_ring = &adapter->tx_ring[i];
 
-		for (j = 0; j < tx_ring->ring_size; j++) {
-			tx_buf = &tx_ring->tx_buffer_info[j];
-			last_jiffies = tx_buf->last_jiffies;
-			if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
-				netif_notice(adapter, tx_err, adapter->netdev,
-					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
-					     tx_ring->qid, j);
-
-				u64_stats_update_begin(&tx_ring->syncp);
-				missed_tx = tx_ring->tx_stats.missing_tx_comp++;
-				u64_stats_update_end(&tx_ring->syncp);
-
-				/* Clear last jiffies so the lost buffer won't
-				 * be counted twice.
-				 */
-				tx_buf->last_jiffies = 0;
-
-				if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
-					netif_err(adapter, tx_err, adapter->netdev,
-						  "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
-						  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
-					set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-				}
-			}
-		}
+		rc = check_missing_comp_in_queue(adapter, tx_ring);
+		if (unlikely(rc))
+			return;
 
 		budget--;
 		if (!budget)
@@ -2606,6 +2630,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	adapter->last_monitored_tx_qid = i % adapter->num_queues;
 }
 
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+	struct ena_ring *rx_ring;
+	int i, refill_required;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return;
+
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rx_ring = &adapter->rx_ring[i];
+
+		refill_required =
+			ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+			rx_ring->empty_rx_queue++;
+
+			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+				u64_stats_update_begin(&rx_ring->syncp);
+				rx_ring->rx_stats.empty_rx_ring++;
+				u64_stats_update_end(&rx_ring->syncp);
+
+				netif_err(adapter, drv, adapter->netdev,
+					  "trigger refill for ring %d\n", i);
+
+				napi_schedule(rx_ring->napi);
+				rx_ring->empty_rx_queue = 0;
+			}
+		} else {
+			rx_ring->empty_rx_queue = 0;
+		}
+	}
+}
+
 /* Check for keep alive expiration */
 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
 {
@@ -2660,6 +2736,8 @@ static void ena_timer_service(unsigned long data)
 
 	check_for_missing_tx_completions(adapter);
 
+	check_for_empty_rx_ring(adapter);
+
 	if (debug_area)
 		ena_dump_stats_to_buf(adapter, debug_area);
 
@@ -2840,6 +2918,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 {
 	int release_bars;
 
+	if (ena_dev->mem_bar)
+		devm_iounmap(&pdev->dev, ena_dev->mem_bar);
+
+	devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+
 	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
 	pci_release_selected_regions(pdev, release_bars);
 }
@@ -2927,8 +3010,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_free_ena_dev;
 	}
 
-	ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
-				   pci_resource_len(pdev, ENA_REG_BAR));
+	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+					pci_resource_start(pdev, ENA_REG_BAR),
+					pci_resource_len(pdev, ENA_REG_BAR));
 	if (!ena_dev->reg_bar) {
 		dev_err(&pdev->dev, "failed to remap regs bar\n");
 		rc = -EFAULT;
@@ -2948,8 +3032,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
 
 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-		ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
-					      pci_resource_len(pdev, ENA_MEM_BAR));
+		ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+						   pci_resource_start(pdev, ENA_MEM_BAR),
+						   pci_resource_len(pdev, ENA_MEM_BAR));
 		if (!ena_dev->mem_bar) {
 			rc = -EFAULT;
 			goto err_device_destroy;

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 0e22bce..a4d3d5e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h

@@ -45,7 +45,7 @@
 
 #define DRV_MODULE_VER_MAJOR	1
 #define DRV_MODULE_VER_MINOR	1
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 7
 
 #define DRV_MODULE_NAME		"ena"
 #ifndef DRV_MODULE_VERSION
@@ -146,7 +146,18 @@ struct ena_tx_buffer {
 	u32 tx_descs;
 	/* num of buffers used by this skb */
 	u32 num_of_bufs;
-	/* Save the last jiffies to detect missing tx packets */
+
+	/* Used for detect missing tx packets to limit the number of prints */
+	u32 print_once;
+	/* Save the last jiffies to detect missing tx packets
+	 *
+	 * sets to non zero value on ena_start_xmit and set to zero on
+	 * napi and timer_Service_routine.
+	 *
+	 * while this value is not protected by lock,
+	 * a given packet is not expected to be handled by ena_start_xmit
+	 * and by napi/timer_service at the same time.
+	 */
 	unsigned long last_jiffies;
 	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
 } ____cacheline_aligned;
@@ -170,7 +181,6 @@ struct ena_stats_tx {
 	u64 napi_comp;
 	u64 tx_poll;
 	u64 doorbells;
-	u64 missing_tx_comp;
 	u64 bad_req_id;
 };
 
@@ -184,6 +194,7 @@ struct ena_stats_rx {
 	u64 dma_mapping_err;
 	u64 bad_desc_num;
 	u64 rx_copybreak_pkt;
+	u64 empty_rx_ring;
 };
 
 struct ena_ring {
@@ -231,6 +242,7 @@ struct ena_ring {
 		struct ena_stats_tx tx_stats;
 		struct ena_stats_rx rx_stats;
 	};
+	int empty_rx_queue;
 } ____cacheline_aligned;
 
 struct ena_stats_dev {

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index b3bc87f..0a98c36 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c

@@ -324,7 +324,7 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 			      struct xgbe_ring *ring,
 			      struct xgbe_ring_data *rdata)
 {
-	int order, ret;
+	int ret;
 
 	if (!ring->rx_hdr_pa.pages) {
 		ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
@@ -333,9 +333,8 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
 	}
 
 	if (!ring->rx_buf_pa.pages) {
-		order = max_t(int, PAGE_ALLOC_COSTLY_ORDER - 1, 0);
 		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
-				       order);
+				       PAGE_ALLOC_COSTLY_ORDER);
 		if (ret)
 			return ret;
 	}

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index b8e3d88..a66aee5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h

@@ -193,9 +193,6 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
 			     struct aq_hw_caps_s *aq_hw_caps,
 			     u32 *regs_buff);
 
-int hw_atl_utils_hw_get_settings(struct aq_hw_s *self,
-				 struct ethtool_cmd *cmd);
-
 int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
 			      unsigned int power_state);
 

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 63f2dee..77a1c03 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c

@@ -1353,6 +1353,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
 		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		printk(KERN_ERR "atl2: No usable DMA configuration, aborting\n");
+		err = -EIO;
 		goto err_dma;
 	}
 
@@ -1366,10 +1367,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * pcibios_set_master to do the needed arch specific settings */
 	pci_set_master(pdev);
 
-	err = -ENOMEM;
 	netdev = alloc_etherdev(sizeof(struct atl2_adapter));
-	if (!netdev)
+	if (!netdev) {
+		err = -ENOMEM;
 		goto err_alloc_etherdev;
+	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
@@ -1408,8 +1410,6 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_sw_init;
 
-	err = -EIO;
-
 	netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 099b374..5274501 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c

@@ -2026,9 +2026,12 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	priv->num_rx_desc_words = params->num_rx_desc_words;
 
 	priv->irq0 = platform_get_irq(pdev, 0);
-	if (!priv->is_lite)
+	if (!priv->is_lite) {
 		priv->irq1 = platform_get_irq(pdev, 1);
-	priv->wol_irq = platform_get_irq(pdev, 2);
+		priv->wol_irq = platform_get_irq(pdev, 2);
+	} else {
+		priv->wol_irq = platform_get_irq(pdev, 1);
+	}
 	if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) {
 		dev_err(&pdev->dev, "invalid interrupts\n");
 		ret = -EINVAL;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index eccb3d1..f619c4c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

@@ -1926,7 +1926,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp);
+	return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
@@ -3883,15 +3883,26 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* when transmitting in a vf, start bd must hold the ethertype
 		 * for fw to enforce it
 		 */
+		u16 vlan_tci = 0;
 #ifndef BNX2X_STOP_ON_ERROR
-		if (IS_VF(bp))
+		if (IS_VF(bp)) {
 #endif
-			tx_start_bd->vlan_or_ethertype =
-				cpu_to_le16(ntohs(eth->h_proto));
+			/* Still need to consider inband vlan for enforced */
+			if (__vlan_get_tag(skb, &vlan_tci)) {
+				tx_start_bd->vlan_or_ethertype =
+					cpu_to_le16(ntohs(eth->h_proto));
+			} else {
+				tx_start_bd->bd_flags.as_bitfield |=
+					(X_ETH_INBAND_VLAN <<
+					 ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+				tx_start_bd->vlan_or_ethertype =
+					cpu_to_le16(vlan_tci);
+			}
 #ifndef BNX2X_STOP_ON_ERROR
-		else
+		} else {
 			/* used by FW for packet accounting */
 			tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+		}
 #endif
 	}
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a851f95..349a465 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
 	} else {
 		/* If no mc addresses are required, flush the configuration */
 		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-		if (rc)
+		if (rc < 0)
 			BNX2X_ERR("Failed to clear multicast configuration %d\n",
 				  rc);
 	}

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index bdfd53b..9ca994d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

@@ -901,6 +901,8 @@ static void bnx2x_vf_flr(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	/* release VF resources */
 	bnx2x_vf_free_resc(bp, vf);
 
+	vf->malicious = false;
+
 	/* re-open the mailbox */
 	bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
 	return;
@@ -1822,9 +1824,11 @@ int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem)
 		   vf->abs_vfid, qidx);
 		bnx2x_vf_handle_rss_update_eqe(bp, vf);
 	case EVENT_RING_OPCODE_VF_FLR:
-	case EVENT_RING_OPCODE_MALICIOUS_VF:
 		/* Do nothing for now */
 		return 0;
+	case EVENT_RING_OPCODE_MALICIOUS_VF:
+		vf->malicious = true;
+		return 0;
 	}
 
 	return 0;
@@ -1905,6 +1909,13 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 			continue;
 		}
 
+		if (vf->malicious) {
+			DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+			       "vf %d malicious so no stats for it\n",
+			       vf->abs_vfid);
+			continue;
+		}
+
 		DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
 		       "add addresses for vf %d\n", vf->abs_vfid);
 		for_each_vfq(vf, j) {
@@ -3042,7 +3053,7 @@ void bnx2x_vf_pci_dealloc(struct bnx2x *bp)
 {
 	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
 		       sizeof(struct bnx2x_vf_mbx_msg));
-	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->pf2vf_bulletin_mapping,
+	BNX2X_PCI_FREE(bp->pf2vf_bulletin, bp->pf2vf_bulletin_mapping,
 		       sizeof(union pf_vf_bulletin));
 }
 

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 888d0b6..53466f6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h

@@ -141,6 +141,7 @@ struct bnx2x_virtf {
 #define VF_RESET	3	/* VF FLR'd, pending cleanup */
 
 	bool flr_clnup_stage;	/* true during flr cleanup */
+	bool malicious;		/* true if FW indicated so, until FLR */
 
 	/* dma */
 	dma_addr_t fw_stat_map;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 03f55da..74e8e21 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

@@ -1301,10 +1301,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		cp_cons = NEXT_CMP(cp_cons);
 	}
 
-	if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+	if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
 		bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
-		netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
-			    agg_bufs, (int)MAX_SKB_FRAGS);
+		if (agg_bufs > MAX_SKB_FRAGS)
+			netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+				    agg_bufs, (int)MAX_SKB_FRAGS);
 		return NULL;
 	}
 
@@ -1562,6 +1563,45 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	return rc;
 }
 
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+				 u32 *raw_cons, u8 *event)
+{
+	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+	u32 tmp_raw_cons = *raw_cons;
+	struct rx_cmp_ext *rxcmp1;
+	struct rx_cmp *rxcmp;
+	u16 cp_cons;
+	u8 cmp_type;
+
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp = (struct rx_cmp *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+	cp_cons = RING_CMP(tmp_raw_cons);
+	rxcmp1 = (struct rx_cmp_ext *)
+			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+	if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+		return -EBUSY;
+
+	cmp_type = RX_CMP_TYPE(rxcmp);
+	if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+		rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+			cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+		struct rx_tpa_end_cmp_ext *tpa_end1;
+
+		tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+		tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+			cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+	}
+	return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
 #define BNXT_GET_EVENT_PORT(data)	\
 	((data) &			\
 	 ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1744,7 +1784,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			if (unlikely(tx_pkts > bp->tx_wake_thresh))
 				rx_pkts = budget;
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-			rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			if (likely(budget))
+				rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+			else
+				rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+							   &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
 			else if (rc == -EBUSY)	/* partial completion */
@@ -6663,12 +6707,11 @@ static void bnxt_poll_controller(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 	int i;
 
-	for (i = 0; i < bp->cp_nr_rings; i++) {
-		struct bnxt_irq *irq = &bp->irq_tbl[i];
+	/* Only process tx rings/combined rings in netpoll mode. */
+	for (i = 0; i < bp->tx_nr_rings; i++) {
+		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 
-		disable_irq(irq->vector);
-		irq->handler(irq->vector, bp->bnapi[i]);
-		enable_irq(irq->vector);
+		napi_schedule(&txr->bnapi->napi);
 	}
 }
 #endif

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 3ef42db..d46a850 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
 
 	__le32 rx_tpa_end_cmp_errors_v2;
 	#define RX_TPA_END_CMP_V2				(0x1 << 0)
-	#define RX_TPA_END_CMP_ERRORS				(0x7fff << 1)
+	#define RX_TPA_END_CMP_ERRORS				(0x3 << 1)
 	#define RX_TPA_END_CMPL_ERRORS_SHIFT			 1
 
 	u32 rx_tpa_end_cmp_start_opaque;
 };
 
+#define TPA_END_ERRORS(rx_tpa_end_ext)					\
+	((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &			\
+	 cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
 #define DB_IRQ_DIS						(0x1 << 27)

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index bed9ef1..7ccffbb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h

@@ -144,7 +144,7 @@ static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
 	int errno = 0;
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
@@ -171,7 +171,7 @@ sleep_timeout_cond(wait_queue_head_t *wait_queue,
 		   int *condition,
 		   int timeout)
 {
-	wait_queue_t we;
+	wait_queue_entry_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 38a5c67..53309f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -2171,9 +2171,10 @@ static int cxgb_up(struct adapter *adap)
 {
 	int err;
 
+	mutex_lock(&uld_mutex);
 	err = setup_sge_queues(adap);
 	if (err)
-		goto out;
+		goto rel_lock;
 	err = setup_rss(adap);
 	if (err)
 		goto freeq;
@@ -2196,23 +2197,28 @@ static int cxgb_up(struct adapter *adap)
 		if (err)
 			goto irq_err;
 	}
+
 	enable_rx(adap);
 	t4_sge_start(adap);
 	t4_intr_enable(adap);
 	adap->flags |= FULL_INIT_DONE;
+	mutex_unlock(&uld_mutex);
+
 	notify_ulds(adap, CXGB4_STATE_UP);
 #if IS_ENABLED(CONFIG_IPV6)
 	update_clip(adap);
 #endif
 	/* Initialize hash mac addr list*/
 	INIT_LIST_HEAD(&adap->mac_hlist);
- out:
 	return err;
+
  irq_err:
 	dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
  freeq:
 	t4_free_sge_resources(adap);
-	goto out;
+ rel_lock:
+	mutex_unlock(&uld_mutex);
+	return err;
 }
 
 static void cxgb_down(struct adapter *adapter)
@@ -2771,6 +2777,9 @@ void t4_fatal_err(struct adapter *adap)
 {
 	int port;
 
+	if (pci_channel_offline(adap->pdev))
+		return;
+
 	/* Disable the SGE since ULDs are going to free resources that
 	 * could be exposed to the adapter.  RDMA MWs for example...
 	 */
@@ -3882,9 +3891,10 @@ static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
 	spin_lock(&adap->stats_lock);
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
-
-		netif_device_detach(dev);
-		netif_carrier_off(dev);
+		if (dev) {
+			netif_device_detach(dev);
+			netif_carrier_off(dev);
+		}
 	}
 	spin_unlock(&adap->stats_lock);
 	disable_interrupts(adap);
@@ -3963,12 +3973,13 @@ static void eeh_resume(struct pci_dev *pdev)
 	rtnl_lock();
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
-
-		if (netif_running(dev)) {
-			link_start(dev);
-			cxgb_set_rxmode(dev);
+		if (dev) {
+			if (netif_running(dev)) {
+				link_start(dev);
+				cxgb_set_rxmode(dev);
+			}
+			netif_device_attach(dev);
 		}
-		netif_device_attach(dev);
 	}
 	rtnl_unlock();
 }
@@ -4516,7 +4527,7 @@ static void dummy_setup(struct net_device *dev)
 	/* Initialize the device structure. */
 	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
 	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static int config_mgmt_dev(struct pci_dev *pdev)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index aded42b96..3a34aa6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

@@ -4557,8 +4557,13 @@ void t4_intr_enable(struct adapter *adapter)
  */
 void t4_intr_disable(struct adapter *adapter)
 {
-	u32 whoami = t4_read_reg(adapter, PL_WHOAMI_A);
-	u32 pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+	u32 whoami, pf;
+
+	if (pci_channel_offline(adapter->pdev))
+		return;
+
+	whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+	pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
 			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 
 	t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE_A), 0);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 3549d387..f2d623a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h

@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x2B
+#define T4FW_VERSION_MICRO 0x2D
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x2B
+#define T5FW_VERSION_MICRO 0x2D
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x2B
+#define T6FW_VERSION_MICRO 0x2D
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f3a09ab..4eee18c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -5078,9 +5078,11 @@ static netdev_features_t be_features_check(struct sk_buff *skb,
 	struct be_adapter *adapter = netdev_priv(dev);
 	u8 l4_hdr = 0;
 
-	/* The code below restricts offload features for some tunneled packets.
+	/* The code below restricts offload features for some tunneled and
+	 * Q-in-Q packets.
 	 * Offload features for normal (non tunnel) packets are unchanged.
 	 */
+	features = vlan_features_check(skb, features);
 	if (!skb->encapsulation ||
 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
 		return features;

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index e863ba7..8bb0db9 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c

@@ -739,6 +739,8 @@ static int ethoc_open(struct net_device *dev)
 	if (ret)
 		return ret;
 
+	napi_enable(&priv->napi);
+
 	ethoc_init_ring(priv, dev->mem_start);
 	ethoc_reset(priv);
 
@@ -754,7 +756,6 @@ static int ethoc_open(struct net_device *dev)
 	priv->old_duplex = -1;
 
 	phy_start(dev->phydev);
-	napi_enable(&priv->napi);
 
 	if (netif_msg_ifup(priv)) {
 		dev_info(&dev->dev, "I/O: %08lx Memory: %08lx-%08lx\n",

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 9a520e4..290ad05 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c

@@ -2647,7 +2647,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
 	/* device used for DMA mapping */
-	arch_setup_dma_ops(dev, 0, 0, NULL, false);
+	set_dma_ops(dev, get_dma_ops(&pdev->dev));
 	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
 	if (err) {
 		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56a563f..f7c8649 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -3192,7 +3192,7 @@ static int fec_reset_phy(struct platform_device *pdev)
 {
 	int err, phy_reset;
 	bool active_high = false;
-	int msec = 1;
+	int msec = 1, phy_post_delay = 0;
 	struct device_node *np = pdev->dev.of_node;
 
 	if (!np)
@@ -3209,6 +3209,11 @@ static int fec_reset_phy(struct platform_device *pdev)
 	else if (!gpio_is_valid(phy_reset))
 		return 0;
 
+	err = of_property_read_u32(np, "phy-reset-post-delay", &phy_post_delay);
+	/* valid reset duration should be less than 1s */
+	if (!err && phy_post_delay > 1000)
+		return -EINVAL;
+
 	active_high = of_property_read_bool(np, "phy-reset-active-high");
 
 	err = devm_gpio_request_one(&pdev->dev, phy_reset,
@@ -3226,6 +3231,15 @@ static int fec_reset_phy(struct platform_device *pdev)
 
 	gpio_set_value_cansleep(phy_reset, !active_high);
 
+	if (!phy_post_delay)
+		return 0;
+
+	if (phy_post_delay > 20)
+		msleep(phy_post_delay);
+	else
+		usleep_range(phy_post_delay * 1000,
+			     phy_post_delay * 1000 + 1000);
+
 	return 0;
 }
 #else /* CONFIG_OF */

diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index dc0850b..8870a9a 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig

@@ -2,6 +2,7 @@
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
+	depends on HAS_DMA
 	select PHYLIB
 	default n
 	help

diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 0b31f85..6e67d22 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c

@@ -623,6 +623,8 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
 		goto no_mem;
 	}
 
+	set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
+
 	ret = platform_device_add_data(pdev, &data, sizeof(data));
 	if (ret)
 		goto err;

diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 446c7b3..a10de1e 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c

@@ -381,7 +381,7 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *id =
 		of_match_device(fsl_pq_mdio_match, &pdev->dev);
-	const struct fsl_pq_mdio_data *data = id->data;
+	const struct fsl_pq_mdio_data *data;
 	struct device_node *np = pdev->dev.of_node;
 	struct resource res;
 	struct device_node *tbi;
@@ -389,6 +389,13 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 	struct mii_bus *new_bus;
 	int err;
 
+	if (!id) {
+		dev_err(&pdev->dev, "Failed to match device\n");
+		return -ENODEV;
+	}
+
+	data = id->data;
+
 	dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible);
 
 	new_bus = mdiobus_alloc_size(sizeof(*priv));

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index e13aa06..7a8addd 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c

@@ -29,10 +29,9 @@ enum _dsm_rst_type {
 	HNS_ROCE_RESET_FUNC     = 0x7,
 };
 
-const u8 hns_dsaf_acpi_dsm_uuid[] = {
-	0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
-	0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
-};
+static const guid_t hns_dsaf_acpi_dsm_guid =
+	GUID_INIT(0x1A85AA1A, 0xE293, 0x415E,
+		  0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A);
 
 static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
 {
@@ -151,7 +150,7 @@ static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+				&hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
 	if (!obj) {
 		dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
 			 port_type, port);
@@ -434,7 +433,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -474,7 +473,7 @@ int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 	argv4.package.elements = &obj_args,
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_GET_SFP_STAT_FUNC, &argv4);
 
 	if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -565,7 +564,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
 	argv4.package.elements = obj_args;
 
 	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
-				hns_dsaf_acpi_dsm_uuid, 0,
+				&hns_dsaf_acpi_dsm_guid, 0,
 				HNS_OP_SERDES_LP_FUNC, &argv4);
 	if (!obj) {
 		dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index b8fab14..e95795b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

@@ -288,9 +288,15 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 
 		/* Force 1000M Link, Default is 0x0200 */
 		phy_write(phy_dev, 7, 0x20C);
-		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 
-		/* Enable PHY loop-back */
+		/* Powerup Fiber */
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+		val = phy_read(phy_dev, COPPER_CONTROL_REG);
+		val &= ~PHY_POWER_DOWN;
+		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
+		/* Enable Phy Loopback */
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 		val = phy_read(phy_dev, COPPER_CONTROL_REG);
 		val |= PHY_LOOP_BACK;
 		val &= ~PHY_POWER_DOWN;
@@ -299,6 +305,12 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
 		phy_write(phy_dev, 1, 0x400);
 		phy_write(phy_dev, 7, 0x200);
+
+		phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+		val = phy_read(phy_dev, COPPER_CONTROL_REG);
+		val |= PHY_POWER_DOWN;
+		phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
 		phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 		phy_write(phy_dev, 9, 0xF00);
 

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 508923f..259e69a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c

@@ -343,6 +343,7 @@ static int emac_reset(struct emac_instance *dev)
 {
 	struct emac_regs __iomem *p = dev->emacp;
 	int n = 20;
+	bool __maybe_unused try_internal_clock = false;
 
 	DBG(dev, "reset" NL);
 
@@ -355,6 +356,7 @@ static int emac_reset(struct emac_instance *dev)
 	}
 
 #ifdef CONFIG_PPC_DCR_NATIVE
+do_retry:
 	/*
 	 * PPC460EX/GT Embedded Processor Advanced User's Manual
 	 * section 28.10.1 Mode Register 0 (EMACx_MR0) states:
@@ -362,10 +364,19 @@ static int emac_reset(struct emac_instance *dev)
 	 * of the EMAC. If none is present, select the internal clock
 	 * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1).
 	 * After a soft reset, select the external clock.
+	 *
+	 * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the
+	 * ethernet cable is not attached. This causes the reset to timeout
+	 * and the PHY detection code in emac_init_phy() is unable to
+	 * communicate and detect the AR8035-A PHY. As a result, the emac
+	 * driver bails out early and the user has no ethernet.
+	 * In order to stay compatible with existing configurations, the
+	 * driver will temporarily switch to the internal clock, after
+	 * the first reset fails.
 	 */
 	if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-		if (dev->phy_address == 0xffffffff &&
-		    dev->phy_map == 0xffffffff) {
+		if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+					   dev->phy_map == 0xffffffff)) {
 			/* No PHY: select internal loop clock before reset */
 			dcri_clrset(SDR0, SDR0_ETH_CFG,
 				    0, SDR0_ETH_CFG_ECS << dev->cell_index);
@@ -383,8 +394,15 @@ static int emac_reset(struct emac_instance *dev)
 
 #ifdef CONFIG_PPC_DCR_NATIVE
 	if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-		if (dev->phy_address == 0xffffffff &&
-		    dev->phy_map == 0xffffffff) {
+		if (!n && !try_internal_clock) {
+			/* first attempt has timed out. */
+			n = 20;
+			try_internal_clock = true;
+			goto do_retry;
+		}
+
+		if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+					   dev->phy_map == 0xffffffff)) {
 			/* No PHY: restore external clock source after reset */
 			dcri_clrset(SDR0, SDR0_ETH_CFG,
 				    SDR0_ETH_CFG_ECS << dev->cell_index, 0);
@@ -2460,20 +2478,24 @@ static int emac_mii_bus_reset(struct mii_bus *bus)
 	return emac_reset(dev);
 }
 
+static int emac_mdio_phy_start_aneg(struct mii_phy *phy,
+				    struct phy_device *phy_dev)
+{
+	phy_dev->autoneg = phy->autoneg;
+	phy_dev->speed = phy->speed;
+	phy_dev->duplex = phy->duplex;
+	phy_dev->advertising = phy->advertising;
+	return phy_start_aneg(phy_dev);
+}
+
 static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise)
 {
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
 
-	dev->phy.autoneg = AUTONEG_ENABLE;
-	dev->phy.speed = SPEED_1000;
-	dev->phy.duplex = DUPLEX_FULL;
-	dev->phy.advertising = advertise;
 	phy->autoneg = AUTONEG_ENABLE;
-	phy->speed = dev->phy.speed;
-	phy->duplex = dev->phy.duplex;
 	phy->advertising = advertise;
-	return phy_start_aneg(dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
@@ -2481,13 +2503,10 @@ static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
 
-	dev->phy.autoneg =  AUTONEG_DISABLE;
-	dev->phy.speed = speed;
-	dev->phy.duplex = fd;
 	phy->autoneg = AUTONEG_DISABLE;
 	phy->speed = speed;
 	phy->duplex = fd;
-	return phy_start_aneg(dev->phy_dev);
+	return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_poll_link(struct mii_phy *phy)
@@ -2509,16 +2528,17 @@ static int emac_mdio_read_link(struct mii_phy *phy)
 {
 	struct net_device *ndev = phy->dev;
 	struct emac_instance *dev = netdev_priv(ndev);
+	struct phy_device *phy_dev = dev->phy_dev;
 	int res;
 
-	res = phy_read_status(dev->phy_dev);
+	res = phy_read_status(phy_dev);
 	if (res)
 		return res;
 
-	dev->phy.speed = phy->speed;
-	dev->phy.duplex = phy->duplex;
-	dev->phy.pause = phy->pause;
-	dev->phy.asym_pause = phy->asym_pause;
+	phy->speed = phy_dev->speed;
+	phy->duplex = phy_dev->duplex;
+	phy->pause = phy_dev->pause;
+	phy->asym_pause = phy_dev->asym_pause;
 	return 0;
 }
 
@@ -2528,13 +2548,6 @@ static int emac_mdio_init_phy(struct mii_phy *phy)
 	struct emac_instance *dev = netdev_priv(ndev);
 
 	phy_start(dev->phy_dev);
-	dev->phy.autoneg = phy->autoneg;
-	dev->phy.speed = phy->speed;
-	dev->phy.duplex = phy->duplex;
-	dev->phy.advertising = phy->advertising;
-	dev->phy.pause = phy->pause;
-	dev->phy.asym_pause = phy->asym_pause;
-
 	return phy_init_hw(dev->phy_dev);
 }
 

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4f2d329..c0fbeb3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c

@@ -81,7 +81,7 @@
 static const char ibmvnic_driver_name[] = "ibmvnic";
 static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver";
 
-MODULE_AUTHOR("Santiago Leon <santi_leon@yahoo.com>");
+MODULE_AUTHOR("Santiago Leon");
 MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
@@ -1468,6 +1468,11 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 }
 #endif
 
+static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_open		= ibmvnic_open,
 	.ndo_stop		= ibmvnic_close,
@@ -1479,6 +1484,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
 #endif
+	.ndo_change_mtu		= ibmvnic_change_mtu,
 };
 
 /* ethtool functions */

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index cdde3cc2..44d9610 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h

@@ -399,6 +399,7 @@ struct i40e_pf {
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(1)
 #define I40E_FLAG_MSI_ENABLED			BIT_ULL(2)
 #define I40E_FLAG_MSIX_ENABLED			BIT_ULL(3)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(4)
 #define I40E_FLAG_RSS_ENABLED			BIT_ULL(6)
 #define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(7)
 #define I40E_FLAG_IWARP_ENABLED			BIT_ULL(10)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 7a8eb48..894c8e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c

@@ -224,7 +224,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
 	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
-	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_CAPABLE, 0),
+	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 };
 
@@ -4092,7 +4092,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 
 	/* Only allow ATR evict on hardware that is capable of handling it */
 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
-		pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+		pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_ENABLED;
 
 	if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
 		u16 sw_flags = 0, valid_flags = 0;

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d5c9c9e..a7a4b28 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -295,7 +295,7 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
  **/
 void i40e_service_event_schedule(struct i40e_pf *pf)
 {
-	if (!test_bit(__I40E_VSI_DOWN, pf->state) &&
+	if (!test_bit(__I40E_DOWN, pf->state) &&
 	    !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
 		queue_work(i40e_wq, &pf->service_task);
 }
@@ -3611,7 +3611,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 		 * this is not a performance path and napi_schedule()
 		 * can deal with rescheduling.
 		 */
-		if (!test_bit(__I40E_VSI_DOWN, pf->state))
+		if (!test_bit(__I40E_DOWN, pf->state))
 			napi_schedule_irqoff(&q_vector->napi);
 	}
 
@@ -3687,7 +3687,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 enable_intr:
 	/* re-enable interrupt causes */
 	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
-	if (!test_bit(__I40E_VSI_DOWN, pf->state)) {
+	if (!test_bit(__I40E_DOWN, pf->state)) {
 		i40e_service_event_schedule(pf);
 		i40e_irq_dynamic_enable_icr0(pf, false);
 	}
@@ -6203,7 +6203,7 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
 {
 
 	/* if interface is down do nothing */
-	if (test_bit(__I40E_VSI_DOWN, pf->state))
+	if (test_bit(__I40E_DOWN, pf->state))
 		return;
 
 	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
@@ -6344,7 +6344,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 	int i;
 
 	/* if interface is down do nothing */
-	if (test_bit(__I40E_VSI_DOWN, pf->state) ||
+	if (test_bit(__I40E_DOWN, pf->state) ||
 	    test_bit(__I40E_CONFIG_BUSY, pf->state))
 		return;
 
@@ -6399,9 +6399,9 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
 		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
 		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
 	}
-	if (test_bit(__I40E_VSI_DOWN_REQUESTED, pf->state)) {
-		reset_flags |= BIT(__I40E_VSI_DOWN_REQUESTED);
-		clear_bit(__I40E_VSI_DOWN_REQUESTED, pf->state);
+	if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
+		clear_bit(__I40E_DOWN_REQUESTED, pf->state);
 	}
 
 	/* If there's a recovery already waiting, it takes
@@ -6415,7 +6415,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
 
 	/* If we're already down or resetting, just bail */
 	if (reset_flags &&
-	    !test_bit(__I40E_VSI_DOWN, pf->state) &&
+	    !test_bit(__I40E_DOWN, pf->state) &&
 	    !test_bit(__I40E_CONFIG_BUSY, pf->state)) {
 		rtnl_lock();
 		i40e_do_reset(pf, reset_flags, true);
@@ -7002,7 +7002,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	u32 val;
 	int v;
 
-	if (test_bit(__I40E_VSI_DOWN, pf->state))
+	if (test_bit(__I40E_DOWN, pf->state))
 		goto clear_recovery;
 	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
 
@@ -8821,11 +8821,12 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		    (pf->hw.aq.api_min_ver > 4))) {
 		/* Supported in FW API version higher than 1.4 */
 		pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-		pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
-	} else {
-		pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
 	}
 
+	/* Enable HW ATR eviction if possible */
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
 	pf->eeprom_version = 0xDEAD;
 	pf->lan_veb = I40E_NO_VEB;
 	pf->lan_vsi = I40E_NO_VSI;
@@ -9767,7 +9768,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 		return -ENODEV;
 	}
 	if (vsi == pf->vsi[pf->lan_vsi] &&
-	    !test_bit(__I40E_VSI_DOWN, pf->state)) {
+	    !test_bit(__I40E_DOWN, pf->state)) {
 		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
 		return -ENODEV;
 	}
@@ -11003,7 +11004,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	pf->next_vsi = 0;
 	pf->pdev = pdev;
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 
 	hw = &pf->hw;
 	hw->back = pf;
@@ -11293,7 +11294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * before setting up the misc vector or we get a race and the vector
 	 * ends up disabled forever.
 	 */
-	clear_bit(__I40E_VSI_DOWN, pf->state);
+	clear_bit(__I40E_DOWN, pf->state);
 
 	/* In case of MSIX we are going to setup the misc vector right here
 	 * to handle admin queue events etc. In case of legacy and MSI
@@ -11448,7 +11449,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Unwind what we've done if something failed in the setup */
 err_vsis:
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	i40e_clear_interrupt_scheme(pf);
 	kfree(pf->vsi);
 err_switch_setup:
@@ -11500,7 +11501,7 @@ static void i40e_remove(struct pci_dev *pdev)
 
 	/* no more scheduling of any task */
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	if (pf->service_timer.data)
 		del_timer_sync(&pf->service_timer);
 	if (pf->service_task.func)
@@ -11740,7 +11741,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	struct i40e_hw *hw = &pf->hw;
 
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 	rtnl_lock();
 	i40e_prep_for_reset(pf, true);
 	rtnl_unlock();
@@ -11789,7 +11790,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	int retval = 0;
 
 	set_bit(__I40E_SUSPENDED, pf->state);
-	set_bit(__I40E_VSI_DOWN, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
 
 	if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
@@ -11841,7 +11842,7 @@ static int i40e_resume(struct pci_dev *pdev)
 
 	/* handling the reset will rebuild the device state */
 	if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
-		clear_bit(__I40E_VSI_DOWN, pf->state);
+		clear_bit(__I40E_DOWN, pf->state);
 		rtnl_lock();
 		i40e_reset_and_rebuild(pf, false, true);
 		rtnl_unlock();

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 29321a6..77115c2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c

@@ -1854,7 +1854,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
 #endif
 	struct sk_buff *skb;
 
@@ -2340,7 +2341,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	/* Due to lack of space, no more new filters can be programmed */
 	if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
 		return;
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
 		/* HW ATR eviction will take care of removing filters on FIN
 		 * and RST packets.
 		 */
@@ -2402,7 +2403,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
 		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
 
 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 95c23fb..0fb38ca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

@@ -3017,10 +3017,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 					   VLAN_VID_MASK));
 	}
 
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	if (vlan_id || qos)
 		ret = i40e_vsi_add_pvid(vsi, vlanprio);
 	else
 		i40e_vsi_remove_pvid(vsi);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
 	if (vlan_id) {
 		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index dfe241a..12b02e5 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c

@@ -1190,7 +1190,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
 #endif
 	struct sk_buff *skb;
 

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9b875d7..33c9016 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c

@@ -3719,7 +3719,7 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 				    dma_addr_t *dma_addr,
 				    phys_addr_t *phys_addr)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	*dma_addr = mvpp2_percpu_read(priv, cpu,
 				      MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
@@ -3740,6 +3740,8 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 		if (sizeof(phys_addr_t) == 8)
 			*phys_addr |= (u64)phys_addr_highbits << 32;
 	}
+
+	put_cpu();
 }
 
 /* Free all buffers from the pool */
@@ -3920,18 +3922,12 @@ static inline u32 mvpp2_bm_cookie_pool_set(u32 cookie, int pool)
 	return bm;
 }
 
-/* Get pool number from a BM cookie */
-static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
-{
-	return (cookie >> MVPP2_BM_COOKIE_POOL_OFFS) & 0xFF;
-}
-
 /* Release buffer to BM */
 static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 				     dma_addr_t buf_dma_addr,
 				     phys_addr_t buf_phys_addr)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	if (port->priv->hw_version == MVPP22) {
 		u32 val = 0;
@@ -3958,15 +3954,15 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 			   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
 	mvpp2_percpu_write(port->priv, cpu,
 			   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+
+	put_cpu();
 }
 
 /* Refill BM pool */
-static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
+static void mvpp2_pool_refill(struct mvpp2_port *port, int pool,
 			      dma_addr_t dma_addr,
 			      phys_addr_t phys_addr)
 {
-	int pool = mvpp2_bm_cookie_pool_get(bm);
-
 	mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 }
 
@@ -4186,8 +4182,6 @@ static void mvpp22_port_mii_set(struct mvpp2_port *port)
 {
 	u32 val;
 
-	return;
-
 	/* Only GOP port 0 has an XLG MAC */
 	if (port->gop_id == 0) {
 		val = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4515,21 +4509,6 @@ static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
-/* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
-				 struct mvpp2_rx_desc *rx_desc)
-{
-	int cpu = smp_processor_id();
-	int pool;
-
-	pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
-		MVPP2_RXD_BM_POOL_ID_MASK) >>
-		MVPP2_RXD_BM_POOL_ID_OFFS;
-
-	return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
-	       ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
-}
-
 /* Tx descriptors helper methods */
 
 /* Get pointer to next Tx descriptor to be processed (send) by HW */
@@ -4757,7 +4736,7 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 				   struct mvpp2_rx_queue *rxq)
 {
-	int cpu = smp_processor_id();
+	int cpu = get_cpu();
 
 	if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
 		rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
@@ -4765,6 +4744,8 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
 			   rxq->pkts_coal);
+
+	put_cpu();
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
@@ -4945,7 +4926,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
 	/* Set Rx descriptors queue starting address - indirect access */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	if (port->priv->hw_version == MVPP21)
 		rxq_dma = rxq->descs_dma;
@@ -4954,6 +4935,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
+	put_cpu();
 
 	/* Set Offset */
 	mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
@@ -4980,9 +4962,13 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 
 	for (i = 0; i < rx_received; i++) {
 		struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
-		u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
+		u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+		int pool;
 
-		mvpp2_pool_refill(port, bm,
+		pool = (status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+			MVPP2_RXD_BM_POOL_ID_OFFS;
+
+		mvpp2_pool_refill(port, pool,
 				  mvpp2_rxdesc_dma_addr_get(port, rx_desc),
 				  mvpp2_rxdesc_cookie_get(port, rx_desc));
 	}
@@ -5012,10 +4998,11 @@ static void mvpp2_rxq_deinit(struct mvpp2_port *port,
 	 * free descriptor number
 	 */
 	mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
+	put_cpu();
 }
 
 /* Create and initialize a Tx queue */
@@ -5038,7 +5025,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	txq->last_desc = txq->size - 1;
 
 	/* Set Tx descriptors queue starting address - indirect access */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
 			   txq->descs_dma);
@@ -5063,6 +5050,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
 			   MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
 			   MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
+	put_cpu();
 
 	/* WRR / EJP configuration - indirect access */
 	tx_port_num = mvpp2_egress_port(port);
@@ -5133,10 +5121,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
 
 	/* Set Tx descriptors queue starting address and size */
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
+	put_cpu();
 }
 
 /* Cleanup Tx ports */
@@ -5146,7 +5135,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 	int delay, pending, cpu;
 	u32 val;
 
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
 	val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
 	val |= MVPP2_TXQ_DRAIN_EN_MASK;
@@ -5173,6 +5162,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 
 	val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
 	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+	put_cpu();
 
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
@@ -5420,7 +5410,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 
 /* Reuse skb if possible, or allocate a new skb and add it to BM pool */
 static int mvpp2_rx_refill(struct mvpp2_port *port,
-			   struct mvpp2_bm_pool *bm_pool, u32 bm)
+			   struct mvpp2_bm_pool *bm_pool, int pool)
 {
 	dma_addr_t dma_addr;
 	phys_addr_t phys_addr;
@@ -5432,7 +5422,7 @@ static int mvpp2_rx_refill(struct mvpp2_port *port,
 	if (!buf)
 		return -ENOMEM;
 
-	mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+	mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
 
 	return 0;
 }
@@ -5490,7 +5480,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		unsigned int frag_size;
 		dma_addr_t dma_addr;
 		phys_addr_t phys_addr;
-		u32 bm, rx_status;
+		u32 rx_status;
 		int pool, rx_bytes, err;
 		void *data;
 
@@ -5502,8 +5492,8 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 		phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
 		data = (void *)phys_to_virt(phys_addr);
 
-		bm = mvpp2_bm_cookie_build(port, rx_desc);
-		pool = mvpp2_bm_cookie_pool_get(bm);
+		pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+			MVPP2_RXD_BM_POOL_ID_OFFS;
 		bm_pool = &port->priv->bm_pools[pool];
 
 		/* In case of an error, release the requested buffer pointer
@@ -5516,7 +5506,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 			dev->stats.rx_errors++;
 			mvpp2_rx_error(port, rx_desc);
 			/* Return the buffer to the pool */
-			mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+			mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
 			continue;
 		}
 
@@ -5531,7 +5521,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
 			goto err_drop_frame;
 		}
 
-		err = mvpp2_rx_refill(port, bm_pool, bm);
+		err = mvpp2_rx_refill(port, bm_pool, pool);
 		if (err) {
 			netdev_err(port->dev, "failed to refill BM pools\n");
 			goto err_drop_frame;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ae5fdc2..ffbcb27 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c

@@ -1562,11 +1562,6 @@ static int mlx4_en_flow_replace(struct net_device *dev,
 		qpn = priv->drop_qp.qpn;
 	else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
 		qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
-		if (qpn < priv->rss_map.base_qpn ||
-		    qpn >= priv->rss_map.base_qpn + priv->rx_ring_num) {
-			en_warn(priv, "rxnfc: QP (0x%x) doesn't exist\n", qpn);
-			return -EINVAL;
-		}
 	} else {
 		if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
 			en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1a670b6..0710b36 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -35,6 +35,7 @@
 #include <linux/etherdevice.h>
 
 #include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
 #include <linux/export.h>
 
 #include "mlx4.h"
@@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
+	if (!mlx4_qp_lookup(dev, rule->qpn)) {
+		mlx4_err_rule(dev, "QP doesn't exist\n", rule);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	trans_rule_ctrl_to_hw(rule, mailbox->buf);
 
 	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
 
 	list_for_each_entry(cur, &rule->list, list) {
 		ret = parse_trans_rule(dev, cur, mailbox->buf + size);
-		if (ret < 0) {
-			mlx4_free_cmd_mailbox(dev, mailbox);
-			return ret;
-		}
+		if (ret < 0)
+			goto out;
+
 		size += ret;
 	}
 
@@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
 		}
 	}
 
+out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
 	return ret;

diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2d6abd4..5a310d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c

@@ -384,6 +384,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
+{
+	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+	struct mlx4_qp *qp;
+
+	spin_lock(&qp_table->lock);
+
+	qp = __mlx4_qp_lookup(dev, qpn);
+
+	spin_unlock(&qp_table->lock);
+	return qp;
+}
+
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -471,6 +484,12 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 	}
 
 	if (attr & MLX4_UPDATE_QP_QOS_VPORT) {
+		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) {
+			mlx4_warn(dev, "Granular QoS per VF is not enabled\n");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
 		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP;
 		cmd->qp_context.qos_vport = params->qos_vport;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0751654..8127838 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

@@ -5255,6 +5255,13 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
 	mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
 }
 
+static void update_qos_vpp(struct mlx4_update_qp_context *ctx,
+			   struct mlx4_vf_immed_vlan_work *work)
+{
+	ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP);
+	ctx->qp_context.qos_vport = work->qos_vport;
+}
+
 void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 {
 	struct mlx4_vf_immed_vlan_work *work =
@@ -5369,11 +5376,10 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 					qp->sched_queue & 0xC7;
 				upd_context->qp_context.pri_path.sched_queue |=
 					((work->qos & 0x7) << 3);
-				upd_context->qp_mask |=
-					cpu_to_be64(1ULL <<
-						    MLX4_UPD_QP_MASK_QOS_VPP);
-				upd_context->qp_context.qos_vport =
-					work->qos_vport;
+
+				if (dev->caps.flags2 &
+				    MLX4_DEV_CAP_FLAG2_QOS_VPP)
+					update_qos_vpp(upd_context, work);
 			}
 
 			err = mlx4_cmd(dev, mailbox->dma,

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 5bdaf3d..10d2828 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

@@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work)
 	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
-	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 }
 
 static void cmd_work_handler(struct work_struct *work)
@@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work)
 	}
 
 	cmd->ent_arr[ent->idx] = ent;
+	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
 	memset(lay, 0, sizeof(*lay));
@@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work)
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
+	/* Skip sending command to fw if internal error */
+	if (pci_channel_offline(dev->pdev) ||
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		u8 status = 0;
+		u32 drv_synd;
+
+		ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+		MLX5_SET(mbox_out, ent->out, status, status);
+		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+		return;
+	}
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work)
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
 	}
 }
 
@@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		wait_for_completion(&ent->done);
 	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
 		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
 	}
 
 	err = ent->ret;
@@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 	}
 }
 
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+
+			/* if we already completed the command, ignore it */
+			if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+						&ent->state)) {
+				/* only real completion can free the cmd slot */
+				if (!forced) {
+					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+						      ent->idx);
+					free_ent(cmd, ent->idx);
+				}
+				continue;
+			}
+
 			if (ent->callback)
 				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
@@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 				mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
-			free_ent(cmd, ent->idx);
+
+			/* only real completion will free the entry slot */
+			if (!forced)
+				free_ent(cmd, ent->idx);
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2fd044b..944fc17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h

@@ -458,13 +458,15 @@ struct mlx5e_mpw_info {
 
 struct mlx5e_rx_am_stats {
 	int ppms; /* packets per msec */
+	int bpms; /* bytes per msec */
 	int epms; /* events per msec */
 };
 
 struct mlx5e_rx_am_sample {
-	ktime_t		time;
-	unsigned int	pkt_ctr;
-	u16		event_ctr;
+	ktime_t	time;
+	u32	pkt_ctr;
+	u32	byte_ctr;
+	u16	event_ctr;
 };
 
 struct mlx5e_rx_am { /* Adaptive Moderation */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 8209aff..16486df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

@@ -1242,11 +1242,11 @@ static int mlx5e_get_ts_info(struct net_device *dev,
 				 SOF_TIMESTAMPING_RX_HARDWARE |
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
 
-	info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) |
-			 (BIT(1) << HWTSTAMP_TX_ON);
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
 
-	info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) |
-			   (BIT(1) << HWTSTAMP_FILTER_ALL);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 41cd22a..277f4de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

@@ -4241,7 +4241,8 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 	return netdev;
 
 err_cleanup_nic:
-	profile->cleanup(priv);
+	if (profile->cleanup)
+		profile->cleanup(priv);
 	free_netdev(netdev);
 
 	return NULL;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 79462c0..46984a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

@@ -791,6 +791,8 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 	params->tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
 	params->num_tc                = 1;
 	params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7b1566f..66b5fec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

@@ -1041,6 +1041,8 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 #define MLX5_IB_GRH_BYTES       40
 #define MLX5_IPOIB_ENCAP_LEN    4
 #define MLX5_GID_SIZE           16
+#define MLX5_IPOIB_PSEUDO_LEN   20
+#define MLX5_IPOIB_HARD_LEN     (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
 
 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct mlx5_cqe64 *cqe,
@@ -1048,6 +1050,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 struct sk_buff *skb)
 {
 	struct net_device *netdev = rq->netdev;
+	char *pseudo_header;
 	u8 *dgid;
 	u8 g;
 
@@ -1076,8 +1079,11 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	if (likely(netdev->features & NETIF_F_RXHASH))
 		mlx5e_skb_set_hash(cqe, skb);
 
+	/* 20 bytes of ipoib header and 4 for encap existing */
+	pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+	memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
 	skb_reset_mac_header(skb);
-	skb_pull(skb, MLX5_IPOIB_ENCAP_LEN);
+	skb_pull(skb, MLX5_IPOIB_HARD_LEN);
 
 	skb->dev = netdev;
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
index 02dd3a9..acf32fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c

@@ -183,28 +183,27 @@ static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am)
 	mlx5e_am_step(am);
 }
 
+#define IS_SIGNIFICANT_DIFF(val, ref) \
+	(((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
+
 static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
 				  struct mlx5e_rx_am_stats *prev)
 {
-	int diff;
-
-	if (!prev->ppms)
-		return curr->ppms ? MLX5E_AM_STATS_BETTER :
+	if (!prev->bpms)
+		return curr->bpms ? MLX5E_AM_STATS_BETTER :
 				    MLX5E_AM_STATS_SAME;
 
-	diff = curr->ppms - prev->ppms;
-	if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */
-		return (diff > 0) ? MLX5E_AM_STATS_BETTER :
-				    MLX5E_AM_STATS_WORSE;
+	if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+		return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
-	if (!prev->epms)
-		return curr->epms ? MLX5E_AM_STATS_WORSE :
-				    MLX5E_AM_STATS_SAME;
+	if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+		return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
-	diff = curr->epms - prev->epms;
-	if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */
-		return (diff < 0) ? MLX5E_AM_STATS_BETTER :
-				    MLX5E_AM_STATS_WORSE;
+	if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+		return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER :
+						   MLX5E_AM_STATS_WORSE;
 
 	return MLX5E_AM_STATS_SAME;
 }
@@ -266,10 +265,13 @@ static void mlx5e_am_sample(struct mlx5e_rq *rq,
 {
 	s->time	     = ktime_get();
 	s->pkt_ctr   = rq->stats.packets;
+	s->byte_ctr  = rq->stats.bytes;
 	s->event_ctr = rq->cq.event_ctr;
 }
 
 #define MLX5E_AM_NEVENTS 64
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 				struct mlx5e_rx_am_sample *end,
@@ -277,13 +279,17 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 {
 	/* u32 holds up to 71 minutes, should be enough */
 	u32 delta_us = ktime_us_delta(end->time, start->time);
-	unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+	u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+	u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+			     start->byte_ctr);
 
 	if (!delta_us)
 		return;
 
-	curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
-	curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
+	curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+	curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+	curr_stats->epms = DIV_ROUND_UP(MLX5E_AM_NEVENTS * USEC_PER_MSEC,
+					delta_us);
 }
 
 void mlx5e_rx_am_work(struct work_struct *work)
@@ -308,7 +314,8 @@ void mlx5e_rx_am(struct mlx5e_rq *rq)
 
 	switch (am->state) {
 	case MLX5E_AM_MEASURE_IN_PROGRESS:
-		nevents = rq->cq.event_ctr - am->start_sample.event_ctr;
+		nevents = BIT_GAP(BITS_PER_TYPE(u16), rq->cq.event_ctr,
+				  am->start_sample.event_ctr);
 		if (nevents < MLX5E_AM_NEVENTS)
 			break;
 		mlx5e_am_sample(rq, &end_sample);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 53e4992..f81c3aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h

@@ -417,20 +417,13 @@ struct mlx5e_stats {
 };
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-	{ "module_plug", 0 },
 	{ "module_unplug", 8 },
 };
 
 static const struct counter_desc mlx5e_pme_error_desc[] = {
-	{ "module_pwr_budget_exd", 0 },  /* power budget exceed */
-	{ "module_long_range", 8 },      /* long range for non MLNX cable */
-	{ "module_bus_stuck", 16 },      /* bus stuck (I2C or data shorted) */
-	{ "module_no_eeprom", 24 },      /* no eeprom/retry time out */
-	{ "module_enforce_part", 32 },   /* enforce part number list */
-	{ "module_unknown_id", 40 },     /* unknown identifier */
-	{ "module_high_temp", 48 },      /* high temperature */
+	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
+	{ "module_high_temp", 48 },       /* high temperature */
 	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
-	{ "module_unknown_status", 64 },
 };
 
 #endif /* __MLX5_EN_STATS_H__ */

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 11c27e4..9df9fc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

@@ -43,6 +43,7 @@
 #include <net/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
 #include <net/vxlan.h>
 #include <net/arp.h>
 #include "en.h"
@@ -384,7 +385,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 			mlx5_encap_dealloc(priv->mdev, e->encap_id);
 
-		hlist_del_rcu(&e->encap_hlist);
+		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
 		kfree(e);
 	}
@@ -894,7 +895,6 @@ static struct mlx5_fields fields[] = {
 	{MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_source[4])},
 	{MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE,  2, offsetof(struct pedit_headers, eth.h_proto)},
 
-	{MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
 	{MLX5_ACTION_IN_FIELD_OUT_IP_TTL,  1, offsetof(struct pedit_headers, ip4.ttl)},
 	{MLX5_ACTION_IN_FIELD_OUT_SIPV4,   4, offsetof(struct pedit_headers, ip4.saddr)},
 	{MLX5_ACTION_IN_FIELD_OUT_DIPV4,   4, offsetof(struct pedit_headers, ip4.daddr)},
@@ -925,11 +925,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
 	struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-	int i, action_size, nactions, max_actions, first, last;
+	int i, action_size, nactions, max_actions, first, last, first_z;
 	void *s_masks_p, *a_masks_p, *vals_p;
-	u32 s_mask, a_mask, val;
 	struct mlx5_fields *f;
 	u8 cmd, field_bsize;
+	u32 s_mask, a_mask;
 	unsigned long mask;
 	void *action;
 
@@ -946,7 +946,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		f = &fields[i];
 		/* avoid seeing bits set from previous iterations */
-		s_mask = a_mask = mask = val = 0;
+		s_mask = 0;
+		a_mask = 0;
 
 		s_masks_p = (void *)set_masks + f->offset;
 		a_masks_p = (void *)add_masks + f->offset;
@@ -981,12 +982,12 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 			memset(a_masks_p, 0, f->size);
 		}
 
-		memcpy(&val, vals_p, f->size);
-
 		field_bsize = f->size * BITS_PER_BYTE;
+
+		first_z = find_first_zero_bit(&mask, field_bsize);
 		first = find_first_bit(&mask, field_bsize);
 		last  = find_last_bit(&mask, field_bsize);
-		if (first > 0 || last != (field_bsize - 1)) {
+		if (first > 0 || last != (field_bsize - 1) || first_z < last) {
 			printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
 			       mask);
 			return -EOPNOTSUPP;
@@ -1002,11 +1003,11 @@ static int offload_pedit_fields(struct pedit_headers *masks,
 		}
 
 		if (field_bsize == 32)
-			MLX5_SET(set_action_in, action, data, ntohl(val));
+			MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p));
 		else if (field_bsize == 16)
-			MLX5_SET(set_action_in, action, data, ntohs(val));
+			MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p));
 		else if (field_bsize == 8)
-			MLX5_SET(set_action_in, action, data, val);
+			MLX5_SET(set_action_in, action, data, *(u8 *)vals_p);
 
 		action += action_size;
 		nactions++;
@@ -1109,6 +1110,28 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
 	return err;
 }
 
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+	u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+			 TCA_CSUM_UPDATE_FLAG_UDP;
+
+	/*  The HW recalcs checksums only if re-writing headers */
+	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+		netdev_warn(priv->netdev,
+			    "TC csum action is only offloaded with pedit\n");
+		return false;
+	}
+
+	if (update_flags & ~prot_flags) {
+		netdev_warn(priv->netdev,
+			    "can't offload TC csum action for some header/s - flags %#x\n",
+			    update_flags);
+		return false;
+	}
+
+	return true;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				struct mlx5e_tc_flow_parse_attr *parse_attr,
 				struct mlx5e_tc_flow *flow)
@@ -1149,6 +1172,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_skbedit_mark(a)) {
 			u32 mark = tcf_skbedit_mark(a);
 
@@ -1651,6 +1682,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_csum(a)) {
+			if (csum_offload_supported(priv, attr->action,
+						   tcf_csum_update_flags(a)))
+				continue;
+
+			return -EOPNOTSUPP;
+		}
+
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev, *encap_dev = NULL;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ea5d8d3..33eae5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

@@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
-			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
+			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f991f66..a53e982 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

@@ -906,21 +906,34 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
 	return 0;
 }
 
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
 {
-	struct mlx5_core_dev *dev;
-	u16 cur_mlx5_mode, mlx5_mode = 0;
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
 
-	dev = devlink_priv(devlink);
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return -EOPNOTSUPP;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
 
-	cur_mlx5_mode = dev->priv.eswitch->mode;
-
-	if (cur_mlx5_mode == SRIOV_NONE)
+	if (dev->priv.eswitch->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
+	return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u16 cur_mlx5_mode, mlx5_mode = 0;
+	int err;
+
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
+
+	cur_mlx5_mode = dev->priv.eswitch->mode;
+
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
@@ -937,15 +950,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
-	struct mlx5_core_dev *dev;
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	int err;
 
-	dev = devlink_priv(devlink);
-
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (dev->priv.eswitch->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
@@ -954,15 +964,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	int num_vports = esw->enabled_vports;
 	int err, vport;
 	u8 mlx5_mode;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
 	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
@@ -985,7 +992,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	if (err)
 		goto out;
 
-	for (vport = 1; vport < num_vports; vport++) {
+	for (vport = 1; vport < esw->enabled_vports; vport++) {
 		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
 		if (err) {
 			esw_warn(dev, "Failed to set min inline on vport %d\n",
@@ -1010,12 +1017,11 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
@@ -1062,11 +1068,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
 	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
@@ -1105,12 +1109,11 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
-
-	if (esw->mode == SRIOV_NONE)
-		return -EOPNOTSUPP;
+	err = mlx5_devlink_eswitch_check(devlink);
+	if (err)
+		return err;
 
 	*encap = esw->offloads.encap;
 	return 0;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 0e487e8..8f5125c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

@@ -862,7 +862,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
 	ft_attr.level   = level;
 	ft_attr.prio    = prio;
 
-	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
 }
 
 struct mlx5_flow_table*

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d051539..f27f84f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c

@@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
-	mlx5_cmd_comp_handler(dev, vector);
+	mlx5_cmd_comp_handler(dev, vector, true);
 	return;
 
 no_trig:
@@ -275,10 +275,8 @@ static void poll_health(unsigned long data)
 	struct mlx5_core_health *health = &dev->priv.health;
 	u32 count;
 
-	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		mod_timer(&health->timer, get_next_poll_jiffies());
-		return;
-	}
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		goto out;
 
 	count = ioread32be(health->health_counter);
 	if (count == health->prev)
@@ -290,8 +288,6 @@ static void poll_health(unsigned long data)
 	if (health->miss_counter == MAX_MISSES) {
 		dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
 		print_health_info(dev);
-	} else {
-		mod_timer(&health->timer, get_next_poll_jiffies());
 	}
 
 	if (in_fatal(dev) && !health->sick) {
@@ -305,6 +301,9 @@ static void poll_health(unsigned long data)
 				"new health works are not permitted at this stage\n");
 		spin_unlock(&health->wq_lock);
 	}
+
+out:
+	mod_timer(&health->timer, get_next_poll_jiffies());
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0c123d5..13be264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -175,8 +175,9 @@ static struct mlx5_profile profile[] = {
 	},
 };
 
-#define FW_INIT_TIMEOUT_MILI	2000
-#define FW_INIT_WAIT_MS		2
+#define FW_INIT_TIMEOUT_MILI		2000
+#define FW_INIT_WAIT_MS			2
+#define FW_PRE_INIT_TIMEOUT_MILI	10000
 
 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
 {
@@ -537,8 +538,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	/* disable cmdif checksum */
 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
 
-	/* If the HCA supports 4K UARs use it */
-	if (MLX5_CAP_GEN_MAX(dev, uar_4k))
+	/* Enable 4K UAR only when HCA supports it and page size is bigger
+	 * than 4K.
+	 */
+	if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
 
 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
@@ -612,7 +615,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	struct mlx5_priv *priv  = &mdev->priv;
 	struct msix_entry *msix = priv->msix_arr;
 	int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
-	int err;
 
 	if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
@@ -622,18 +624,11 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 			priv->irq_info[i].mask);
 
-	err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
-	if (err) {
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
-			       irq);
-		goto err_clear_mask;
-	}
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
 
 	return 0;
-
-err_clear_mask:
-	free_cpumask_var(priv->irq_info[i].mask);
-	return err;
 }
 
 static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
@@ -1019,6 +1014,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	 */
 	dev->state = MLX5_DEVICE_STATE_UP;
 
+	/* wait for firmware to accept initialization segments configurations
+	 */
+	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+	if (err) {
+		dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+			FW_PRE_INIT_TIMEOUT_MILI);
+		goto out;
+	}
+
 	err = mlx5_cmd_init(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 9f89c41..0744452 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

@@ -3334,6 +3334,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
 
+	if (netif_is_bridge_port(vlan_dev))
+		return 0;
+
 	if (mlxsw_sp_port_dev_check(real_dev))
 		return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
 						     vid);

diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 483241b..a672f6a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c

@@ -2956,7 +2956,7 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				qed_wr(p_hwfn,
 				       p_ptt,
 				       s_storm_defs[storm_id].cm_ctx_wr_addr,
-				       BIT(9) | lid);
+				       (i << 9) | lid);
 				*(dump_buf + offset) = qed_rd(p_hwfn,
 							      p_ptt,
 							      rd_reg_addr);

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 537d123..715b3aa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c

@@ -1730,7 +1730,8 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
 		qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
 		break;
 	default:
-		DP_ERR(cdev, "Invalid protocol type = %d\n", type);
+		DP_VERBOSE(cdev, QED_MSG_SP,
+			   "Invalid protocol type = %d\n", type);
 		return;
 	}
 }

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 7245b10..8131292 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h

@@ -1824,22 +1824,44 @@ struct qlcnic_hardware_ops {
 	u32 (*get_cap_size)(void *, int);
 	void (*set_sys_info)(void *, int, u32);
 	void (*store_cap_mask)(void *, u32);
+	bool (*encap_rx_offload) (struct qlcnic_adapter *adapter);
+	bool (*encap_tx_offload) (struct qlcnic_adapter *adapter);
 };
 
 extern struct qlcnic_nic_template qlcnic_vf_ops;
 
-static inline bool qlcnic_encap_tx_offload(struct qlcnic_adapter *adapter)
+static inline bool qlcnic_83xx_encap_tx_offload(struct qlcnic_adapter *adapter)
 {
 	return adapter->ahw->extra_capability[0] &
 	       QLCNIC_83XX_FW_CAPAB_ENCAP_TX_OFFLOAD;
 }
 
-static inline bool qlcnic_encap_rx_offload(struct qlcnic_adapter *adapter)
+static inline bool qlcnic_83xx_encap_rx_offload(struct qlcnic_adapter *adapter)
 {
 	return adapter->ahw->extra_capability[0] &
 	       QLCNIC_83XX_FW_CAPAB_ENCAP_RX_OFFLOAD;
 }
 
+static inline bool qlcnic_82xx_encap_tx_offload(struct qlcnic_adapter *adapter)
+{
+	return false;
+}
+
+static inline bool qlcnic_82xx_encap_rx_offload(struct qlcnic_adapter *adapter)
+{
+        return false;
+}
+
+static inline bool qlcnic_encap_rx_offload(struct qlcnic_adapter *adapter)
+{
+        return adapter->ahw->hw_ops->encap_rx_offload(adapter);
+}
+
+static inline bool qlcnic_encap_tx_offload(struct qlcnic_adapter *adapter)
+{
+        return adapter->ahw->hw_ops->encap_tx_offload(adapter);
+}
+
 static inline int qlcnic_start_firmware(struct qlcnic_adapter *adapter)
 {
 	return adapter->nic_ops->start_firmware(adapter);

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 4fb6879..f7080d0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c

@@ -242,6 +242,8 @@ static struct qlcnic_hardware_ops qlcnic_83xx_hw_ops = {
 	.get_cap_size			= qlcnic_83xx_get_cap_size,
 	.set_sys_info			= qlcnic_83xx_set_sys_info,
 	.store_cap_mask			= qlcnic_83xx_store_cap_mask,
+	.encap_rx_offload		= qlcnic_83xx_encap_rx_offload,
+	.encap_tx_offload		= qlcnic_83xx_encap_tx_offload,
 };
 
 static struct qlcnic_nic_template qlcnic_83xx_ops = {

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 838cc0c..7848cf0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c

@@ -341,7 +341,7 @@ qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg)
 			}
 			return -EIO;
 		}
-		usleep_range(1000, 1500);
+		udelay(1200);
 	}
 
 	if (id_reg)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index b6628aa..1b5f7d5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c

@@ -632,6 +632,8 @@ static struct qlcnic_hardware_ops qlcnic_hw_ops = {
 	.get_cap_size			= qlcnic_82xx_get_cap_size,
 	.set_sys_info			= qlcnic_82xx_set_sys_info,
 	.store_cap_mask			= qlcnic_82xx_store_cap_mask,
+	.encap_rx_offload               = qlcnic_82xx_encap_rx_offload,
+	.encap_tx_offload               = qlcnic_82xx_encap_tx_offload,
 };
 
 static int qlcnic_check_multi_tx_capability(struct qlcnic_adapter *adapter)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 2f656f3..c58180f 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c

@@ -77,6 +77,8 @@ static struct qlcnic_hardware_ops qlcnic_sriov_vf_hw_ops = {
 	.free_mac_list			= qlcnic_sriov_vf_free_mac_list,
 	.enable_sds_intr		= qlcnic_83xx_enable_sds_intr,
 	.disable_sds_intr		= qlcnic_83xx_disable_sds_intr,
+	.encap_rx_offload               = qlcnic_83xx_encap_rx_offload,
+	.encap_tx_offload               = qlcnic_83xx_encap_tx_offload,
 };
 
 static struct qlcnic_nic_template qlcnic_sriov_vf_ops = {

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index cc065ff..bcd4708 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c

@@ -931,7 +931,7 @@ int emac_mac_up(struct emac_adapter *adpt)
 	emac_mac_config(adpt);
 	emac_mac_rx_descs_refill(adpt, &adpt->rx_q);
 
-	adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
+	adpt->phydev->irq = PHY_POLL;
 	ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link,
 				 PHY_INTERFACE_MODE_SGMII);
 	if (ret) {

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index 441c1936..18461fc 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c

@@ -13,15 +13,11 @@
 /* Qualcomm Technologies, Inc. EMAC PHY Controller driver.
  */
 
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/iopoll.h>
 #include <linux/acpi.h>
 #include "emac.h"
-#include "emac-mac.h"
 
 /* EMAC base register offsets */
 #define EMAC_MDIO_CTRL                                        0x001414
@@ -52,62 +48,10 @@
 
 #define MDIO_WAIT_TIMES                                           1000
 
-#define EMAC_LINK_SPEED_DEFAULT (\
-		EMAC_LINK_SPEED_10_HALF  |\
-		EMAC_LINK_SPEED_10_FULL  |\
-		EMAC_LINK_SPEED_100_HALF |\
-		EMAC_LINK_SPEED_100_FULL |\
-		EMAC_LINK_SPEED_1GB_FULL)
-
-/**
- * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
- * @adpt: the emac adapter
- *
- * The autopoll feature takes over the MDIO bus.  In order for
- * the PHY driver to be able to talk to the PHY over the MDIO
- * bus, we need to temporarily disable the autopoll feature.
- */
-static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt)
-{
-	u32 val;
-
-	/* disable autopoll */
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0);
-
-	/* wait for any mdio polling to complete */
-	if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val,
-				!(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100))
-		return 0;
-
-	/* failed to disable; ensure it is enabled before returning */
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
-
-	return -EBUSY;
-}
-
-/**
- * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
- * @adpt: the emac adapter
- *
- * The EMAC has the ability to poll the external PHY on the MDIO
- * bus for link state changes.  This eliminates the need for the
- * driver to poll the phy.  If if the link state does change,
- * the EMAC issues an interrupt on behalf of the PHY.
- */
-static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt)
-{
-	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
-}
-
 static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 {
 	struct emac_adapter *adpt = bus->priv;
 	u32 reg;
-	int ret;
-
-	ret = emac_phy_mdio_autopoll_disable(adpt);
-	if (ret)
-		return ret;
 
 	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
 			  (addr << PHY_ADDR_SHFT));
@@ -122,24 +66,15 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
 			       !(reg & (MDIO_START | MDIO_BUSY)),
 			       100, MDIO_WAIT_TIMES * 100))
-		ret = -EIO;
-	else
-		ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
+		return -EIO;
 
-	emac_phy_mdio_autopoll_enable(adpt);
-
-	return ret;
+	return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
 }
 
 static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
 {
 	struct emac_adapter *adpt = bus->priv;
 	u32 reg;
-	int ret;
-
-	ret = emac_phy_mdio_autopoll_disable(adpt);
-	if (ret)
-		return ret;
 
 	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
 			  (addr << PHY_ADDR_SHFT));
@@ -155,11 +90,9 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
 	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
 			       !(reg & (MDIO_START | MDIO_BUSY)), 100,
 			       MDIO_WAIT_TIMES * 100))
-		ret = -EIO;
+		return -EIO;
 
-	emac_phy_mdio_autopoll_enable(adpt);
-
-	return ret;
+	return 0;
 }
 
 /* Configure the MDIO bus and connect the external PHY */

diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 28a8cdc..98a326f 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c

@@ -50,19 +50,7 @@
 #define DMAR_DLY_CNT_DEF				    15
 #define DMAW_DLY_CNT_DEF				     4
 
-#define IMR_NORMAL_MASK         (\
-		ISR_ERROR       |\
-		ISR_GPHY_LINK   |\
-		ISR_TX_PKT      |\
-		GPHY_WAKEUP_INT)
-
-#define IMR_EXTENDED_MASK       (\
-		SW_MAN_INT      |\
-		ISR_OVER        |\
-		ISR_ERROR       |\
-		ISR_GPHY_LINK   |\
-		ISR_TX_PKT      |\
-		GPHY_WAKEUP_INT)
+#define IMR_NORMAL_MASK		(ISR_ERROR | ISR_OVER | ISR_TX_PKT)
 
 #define ISR_TX_PKT      (\
 	TX_PKT_INT      |\
@@ -70,10 +58,6 @@
 	TX_PKT_INT2     |\
 	TX_PKT_INT3)
 
-#define ISR_GPHY_LINK        (\
-	GPHY_LINK_UP_INT     |\
-	GPHY_LINK_DOWN_INT)
-
 #define ISR_OVER        (\
 	RFD0_UR_INT     |\
 	RFD1_UR_INT     |\
@@ -187,10 +171,6 @@ irqreturn_t emac_isr(int _irq, void *data)
 	if (status & ISR_OVER)
 		net_warn_ratelimited("warning: TX/RX overflow\n");
 
-	/* link event */
-	if (status & ISR_GPHY_LINK)
-		phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT));
-
 exit:
 	/* enable the interrupt */
 	writel(irq->mask, adpt->base + EMAC_INT_MASK);

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 3cd7989..784782d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -230,18 +230,6 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
-	/* Free RX skb ringbuffer */
-	if (priv->rx_skb[q]) {
-		for (i = 0; i < priv->num_rx_ring[q]; i++)
-			dev_kfree_skb(priv->rx_skb[q][i]);
-	}
-	kfree(priv->rx_skb[q]);
-	priv->rx_skb[q] = NULL;
-
-	/* Free aligned TX buffers */
-	kfree(priv->tx_align[q]);
-	priv->tx_align[q] = NULL;
-
 	if (priv->rx_ring[q]) {
 		for (i = 0; i < priv->num_rx_ring[q]; i++) {
 			struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
@@ -270,6 +258,18 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 		priv->tx_ring[q] = NULL;
 	}
 
+	/* Free RX skb ringbuffer */
+	if (priv->rx_skb[q]) {
+		for (i = 0; i < priv->num_rx_ring[q]; i++)
+			dev_kfree_skb(priv->rx_skb[q][i]);
+	}
+	kfree(priv->rx_skb[q]);
+	priv->rx_skb[q] = NULL;
+
+	/* Free aligned TX buffers */
+	kfree(priv->tx_align[q]);
+	priv->tx_align[q] = NULL;
+
 	/* Free TX skb ringbuffer.
 	 * SKBs are freed by ravb_tx_free() call above.
 	 */

diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 2ae8524..a9ce82d 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c

@@ -1505,8 +1505,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
 		*index = entry->index;
 		resolved = false;
 	} else if (removing) {
-		ofdpa_neigh_del(trans, found);
 		*index = found->index;
+		ofdpa_neigh_del(trans, found);
 	} else if (updating) {
 		ofdpa_neigh_update(found, trans, NULL, false);
 		resolved = !is_zero_ether_addr(found->eth_dst);

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 78efb28..78f9e43 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c

@@ -4172,7 +4172,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	 * recipients
 	 */
 	if (is_mc_recip) {
-		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 		unsigned int depth, i;
 
 		memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
 		} else {
 			efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-					       MC_CMD_FILTER_OP_IN_LEN,
+					       MC_CMD_FILTER_OP_EXT_IN_LEN,
 					       NULL, 0, rc);
 		}
 	}
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
 				      struct efx_filter_spec *spec)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *saved_spec;
 	unsigned int hash, i, depth = 1;
 	bool replacing = false;
@@ -4940,7 +4940,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	struct efx_filter_spec *spec;
 	unsigned int filter_idx;
 	int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 
 	/* Insert/renew filters */
 	for (i = 0; i < addr_count; i++) {
+		EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
 		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
 		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
 		rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 				}
 				return rc;
 			} else {
-				/* mark as not inserted, and carry on */
-				rc = EFX_EF10_FILTER_ID_INVALID;
+				/* keep invalid ID, and carry on */
 			}
+		} else {
+			ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 		}
-		ids[i] = efx_ef10_filter_get_unsafe_id(rc);
 	}
 
 	if (multicast && rollback) {

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index b7e4345..019cef1 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c

@@ -661,8 +661,6 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan,
 		up_write(&vf->efx->filter_sem);
 		mutex_unlock(&vf->efx->mac_lock);
 
-		up_write(&vf->efx->filter_sem);
-
 		rc2 = efx_net_open(vf->efx->net_dev);
 		if (rc2)
 			goto reset_nic;

diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index 489ef14..6a9c9544 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c

@@ -37,6 +37,7 @@
 #define TSE_PCS_CONTROL_AN_EN_MASK			BIT(12)
 #define TSE_PCS_CONTROL_REG				0x00
 #define TSE_PCS_CONTROL_RESTART_AN_MASK			BIT(9)
+#define TSE_PCS_CTRL_AUTONEG_SGMII			0x1140
 #define TSE_PCS_IF_MODE_REG				0x28
 #define TSE_PCS_LINK_TIMER_0_REG			0x24
 #define TSE_PCS_LINK_TIMER_1_REG			0x26
@@ -65,6 +66,7 @@
 #define TSE_PCS_SW_RESET_TIMEOUT			100
 #define TSE_PCS_USE_SGMII_AN_MASK			BIT(1)
 #define TSE_PCS_USE_SGMII_ENA				BIT(0)
+#define TSE_PCS_IF_USE_SGMII				0x03
 
 #define SGMII_ADAPTER_CTRL_REG				0x00
 #define SGMII_ADAPTER_DISABLE				0x0001
@@ -101,7 +103,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
 {
 	int ret = 0;
 
-	writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+	writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG);
+
+	writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG);
 
 	writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
 	writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index aa64764..e0ef02f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c

@@ -214,13 +214,13 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 {
 	/* Context type from W/B descriptor must be zero */
 	if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
-		return -EINVAL;
+		return 0;
 
 	/* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
 	if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
-		return 0;
+		return 1;
 
-	return 1;
+	return 0;
 }
 
 static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
@@ -282,7 +282,10 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 		}
 	}
 exit:
-	return ret;
+	if (likely(ret == 0))
+		return 1;
+
+	return 0;
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a74c481..6e4cbc6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -434,14 +434,14 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 		return;
 
 	/* check tx tstamp status */
-	if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+	if (priv->hw->desc->get_tx_timestamp_status(p)) {
 		/* get the valid tstamp */
 		ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
-		netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+		netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns);
 		/* pass tstamp to stack */
 		skb_tstamp_tx(skb, &shhwtstamp);
 	}
@@ -468,19 +468,19 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 		return;
 
 	/* Check if timestamp is available */
-	if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+	if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
 		/* For GMAC4, the valid timestamp is from CTX next desc. */
 		if (priv->plat->has_gmac4)
 			ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
 		else
 			ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-		netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
 		memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp->hwtstamp = ns_to_ktime(ns);
 	} else  {
-		netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+		netdev_dbg(priv->dev, "cannot get RX hw timestamp\n");
 	}
 }
 
@@ -546,7 +546,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			/* PTP v1, UDP, any kind of event packet */
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -578,7 +581,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -612,7 +618,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 			ptp_v2 = PTP_TCR_TSVER2ENA;
 			/* take time stamp for all event messages */
-			snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+			if (priv->plat->has_gmac4)
+				snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+			else
+				snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
 			ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
 			ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -1208,7 +1217,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	unsigned int bfsize = 0;
 	int ret = -ENOMEM;
-	u32 queue;
+	int queue;
 	int i;
 
 	if (priv->hw->mode->set_16kib_bfsize)
@@ -2724,7 +2733,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
 		priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size,
 			0, 1,
-			(last_segment) && (buff_size < TSO_MAX_BUFF_SIZE),
+			(last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
 			0, 0);
 
 		tmp_len -= TSO_MAX_BUFF_SIZE;
@@ -2822,7 +2831,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_q->tx_skbuff_dma[first_entry].buf = des;
 	tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
-	tx_q->tx_skbuff[first_entry] = skb;
 
 	first->des0 = cpu_to_le32(des);
 
@@ -2856,6 +2864,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
+	/* Only the last descriptor gets to point to the skb. */
+	tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+
+	/* We've used all descriptors we need for this skb, however,
+	 * advance cur_tx so that it references a fresh descriptor.
+	 * ndo_start_xmit will fill this descriptor the next time it's
+	 * called and stmmac_tx_clean may clean up to this descriptor.
+	 */
 	tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 
 	if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
@@ -2947,7 +2963,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, csum_insertion = 0, is_jumbo = 0;
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
-	unsigned int entry, first_entry;
+	int entry;
+	unsigned int first_entry;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
 	unsigned int enh_desc;
@@ -2988,8 +3005,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	first = desc;
 
-	tx_q->tx_skbuff[first_entry] = skb;
-
 	enh_desc = priv->plat->enh_desc;
 	/* To program the descriptors according to the size of the frame */
 	if (enh_desc)
@@ -3037,8 +3052,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 						skb->len);
 	}
 
-	entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+	/* Only the last descriptor gets to point to the skb. */
+	tx_q->tx_skbuff[entry] = skb;
 
+	/* We've used all descriptors we need for this skb, however,
+	 * advance cur_tx so that it references a fresh descriptor.
+	 * ndo_start_xmit will fill this descriptor the next time it's
+	 * called and stmmac_tx_clean may clean up to this descriptor.
+	 */
+	entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 	tx_q->cur_tx = entry;
 
 	if (netif_msg_pktdata(priv)) {

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index 48fb72f..f4b31d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h

@@ -59,7 +59,8 @@
 /* Enable Snapshot for Messages Relevant to Master */
 #define	PTP_TCR_TSMSTRENA	BIT(15)
 /* Select PTP packets for Taking Snapshots */
-#define	PTP_TCR_SNAPTYPSEL_1	GENMASK(17, 16)
+#define	PTP_TCR_SNAPTYPSEL_1	BIT(16)
+#define	PTP_GMAC4_TCR_SNAPTYPSEL_1	GENMASK(17, 16)
 /* Enable MAC address for PTP Frame Filtering */
 #define	PTP_TCR_TSENMACADDR	BIT(18)
 

diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index 1562ab4..56ba411 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c

@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 	if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
 
-	if (of_machine_is_compatible("ti,am4372"))
+	if (of_machine_is_compatible("ti,am43"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
 	if (of_machine_is_compatible("ti,dra7"))

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index dec5d56..199459b 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c

@@ -1007,7 +1007,7 @@ static void geneve_setup(struct net_device *dev)
 
 	dev->netdev_ops = &geneve_netdev_ops;
 	dev->ethtool_ops = &geneve_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	SET_NETDEV_DEVTYPE(dev, &geneve_type);
 
@@ -1133,7 +1133,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
 
 	/* make enough headroom for basic scenario */
 	encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
 		encap_len += sizeof(struct iphdr);
 		dev->max_mtu -= sizeof(struct iphdr);
 	} else {
@@ -1293,7 +1293,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
 		goto nla_put_failure;
 
-	if (ip_tunnel_info_af(info) == AF_INET) {
+	if (rtnl_dereference(geneve->sock4)) {
 		if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
 				    info->key.u.ipv4.dst))
 			goto nla_put_failure;
@@ -1302,8 +1302,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			       !!(info->key.tun_flags & TUNNEL_CSUM)))
 			goto nla_put_failure;
 
+	}
+
 #if IS_ENABLED(CONFIG_IPV6)
-	} else {
+	if (rtnl_dereference(geneve->sock6)) {
 		if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
 				     &info->key.u.ipv6.dst))
 			goto nla_put_failure;
@@ -1315,8 +1317,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 			       !geneve->use_udp6_rx_checksums))
 			goto nla_put_failure;
-#endif
 	}
+#endif
 
 	if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
 	    nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4fea1b3..ca110cd 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c

@@ -611,7 +611,7 @@ static const struct net_device_ops gtp_netdev_ops = {
 static void gtp_link_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &gtp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dev->hard_header_len = 0;
 	dev->addr_len = 0;
@@ -873,7 +873,7 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
 
 	/* Check if there's an existing gtpX device to configure */
 	dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
-	if (dev->netdev_ops == &gtp_netdev_ops)
+	if (dev && dev->netdev_ops == &gtp_netdev_ops)
 		gtp = netdev_priv(dev);
 
 	put_net(net);

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 922bf44..021a8ec 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c

@@ -311,7 +311,7 @@ static void sp_setup(struct net_device *dev)
 {
 	/* Finish setting up the DEVICE info. */
 	dev->netdev_ops		= &sp_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->mtu		= SIXP_MTU;
 	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
 	dev->header_ops 	= &ax25_header_ops;

diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index f62e7f3..78a6414 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c

@@ -476,7 +476,7 @@ static const struct net_device_ops bpq_netdev_ops = {
 static void bpq_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &bpq_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 
 	memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
 	memcpy(dev->dev_addr,  &ax25_defaddr, AX25_ADDR_LEN);

diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 8c3633c..97e3bc6 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c

@@ -576,6 +576,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	case HDLCDRVCTL_CALIBRATE:
 		if(!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+		if (s->par.bitrate <= 0)
+			return -EINVAL;
 		if (bi.data.calibrate > INT_MAX / s->par.bitrate)
 			return -EINVAL;
 		s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 262b2ea5..6066f1b 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h

@@ -171,6 +171,8 @@ struct rndis_device {
 	spinlock_t request_lock;
 	struct list_head req_list;
 
+	struct work_struct mcast_work;
+
 	u8 hw_mac_adr[ETH_ALEN];
 	u8 rss_key[NETVSC_HASH_KEYLEN];
 	u16 ind_table[ITAB_NUM];
@@ -201,6 +203,7 @@ int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
 			    struct netvsc_device_info *info);
+void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
 				struct netvsc_device *nvdev);
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
@@ -211,7 +214,6 @@ int rndis_filter_receive(struct net_device *ndev,
 			 struct vmbus_channel *channel,
 			 void *data, u32 buflen);
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
 int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@ -696,7 +698,6 @@ struct net_device_context {
 	/* list protection */
 	spinlock_t lock;
 
-	struct work_struct work;
 	u32 msg_enable; /* debug level */
 
 	u32 tx_checksum_mask;

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4421a6d..643c539 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -56,37 +56,12 @@ static int debug = -1;
 module_param(debug, int, S_IRUGO);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static void do_set_multicast(struct work_struct *w)
-{
-	struct net_device_context *ndevctx =
-		container_of(w, struct net_device_context, work);
-	struct hv_device *device_obj = ndevctx->device_ctx;
-	struct net_device *ndev = hv_get_drvdata(device_obj);
-	struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
-	struct rndis_device *rdev;
-
-	if (!nvdev)
-		return;
-
-	rdev = nvdev->extension;
-	if (rdev == NULL)
-		return;
-
-	if (ndev->flags & IFF_PROMISC)
-		rndis_filter_set_packet_filter(rdev,
-			NDIS_PACKET_TYPE_PROMISCUOUS);
-	else
-		rndis_filter_set_packet_filter(rdev,
-			NDIS_PACKET_TYPE_BROADCAST |
-			NDIS_PACKET_TYPE_ALL_MULTICAST |
-			NDIS_PACKET_TYPE_DIRECTED);
-}
-
 static void netvsc_set_multicast_list(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
-	schedule_work(&net_device_ctx->work);
+	rndis_filter_update(nvdev);
 }
 
 static int netvsc_open(struct net_device *net)
@@ -123,8 +98,6 @@ static int netvsc_close(struct net_device *net)
 
 	netif_tx_disable(net);
 
-	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
-	cancel_work_sync(&net_device_ctx->work);
 	ret = rndis_filter_close(nvdev);
 	if (ret != 0) {
 		netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -803,7 +776,7 @@ static int netvsc_set_channels(struct net_device *net,
 	    channels->rx_count || channels->tx_count || channels->other_count)
 		return -EINVAL;
 
-	if (count > net->num_tx_queues || count > net->num_rx_queues)
+	if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
 		return -EINVAL;
 
 	if (!nvdev || nvdev->destroy)
@@ -1028,7 +1001,7 @@ static const struct {
 static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 
 	if (!nvdev)
 		return -ENODEV;
@@ -1158,11 +1131,22 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *net)
+static void netvsc_poll_controller(struct net_device *dev)
 {
-	/* As netvsc_start_xmit() works synchronous we don't have to
-	 * trigger anything here.
-	 */
+	struct net_device_context *ndc = netdev_priv(dev);
+	struct netvsc_device *ndev;
+	int i;
+
+	rcu_read_lock();
+	ndev = rcu_dereference(ndc->nvdev);
+	if (ndev) {
+		for (i = 0; i < ndev->num_chn; i++) {
+			struct netvsc_channel *nvchan = &ndev->chan_table[i];
+
+			napi_schedule(&nvchan->napi);
+		}
+	}
+	rcu_read_unlock();
 }
 #endif
 
@@ -1219,7 +1203,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 	rndis_dev = ndev->extension;
 	if (indir) {
 		for (i = 0; i < ITAB_NUM; i++)
-			if (indir[i] >= dev->num_rx_queues)
+			if (indir[i] >= VRSS_CHANNEL_MAX)
 				return -EINVAL;
 
 		for (i = 0; i < ITAB_NUM; i++)
@@ -1552,7 +1536,6 @@ static int netvsc_probe(struct hv_device *dev,
 	hv_set_drvdata(dev, net);
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
-	INIT_WORK(&net_device_ctx->work, do_set_multicast);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
@@ -1622,7 +1605,6 @@ static int netvsc_remove(struct hv_device *dev)
 	netif_device_detach(net);
 
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
-	cancel_work_sync(&ndev_ctx->work);
 
 	/*
 	 * Call to the vsc driver to let it know that the device is being

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f9d5b0b..cb79cd0 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c

@@ -31,6 +31,7 @@
 
 #include "hyperv_net.h"
 
+static void rndis_set_multicast(struct work_struct *w);
 
 #define RNDIS_EXT_LEN PAGE_SIZE
 struct rndis_request {
@@ -76,6 +77,7 @@ static struct rndis_device *get_rndis_device(void)
 	spin_lock_init(&device->request_lock);
 
 	INIT_LIST_HEAD(&device->req_list);
+	INIT_WORK(&device->mcast_work, rndis_set_multicast);
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
@@ -815,7 +817,8 @@ static int rndis_filter_query_link_speed(struct rndis_device *dev)
 	return ret;
 }
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
+static int rndis_filter_set_packet_filter(struct rndis_device *dev,
+					  u32 new_filter)
 {
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -846,6 +849,28 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 	return ret;
 }
 
+static void rndis_set_multicast(struct work_struct *w)
+{
+	struct rndis_device *rdev
+		= container_of(w, struct rndis_device, mcast_work);
+
+	if (rdev->ndev->flags & IFF_PROMISC)
+		rndis_filter_set_packet_filter(rdev,
+					       NDIS_PACKET_TYPE_PROMISCUOUS);
+	else
+		rndis_filter_set_packet_filter(rdev,
+					       NDIS_PACKET_TYPE_BROADCAST |
+					       NDIS_PACKET_TYPE_ALL_MULTICAST |
+					       NDIS_PACKET_TYPE_DIRECTED);
+}
+
+void rndis_filter_update(struct netvsc_device *nvdev)
+{
+	struct rndis_device *rdev = nvdev->extension;
+
+	schedule_work(&rdev->mcast_work);
+}
+
 static int rndis_filter_init_device(struct rndis_device *dev)
 {
 	struct rndis_request *request;
@@ -973,6 +998,9 @@ static int rndis_filter_close_device(struct rndis_device *dev)
 	if (dev->state != RNDIS_DEV_DATAINITIALIZED)
 		return 0;
 
+	/* Make sure rndis_set_multicast doesn't re-enable filter! */
+	cancel_work_sync(&dev->mcast_work);
+
 	ret = rndis_filter_set_packet_filter(dev, 0);
 	if (ret == -ENODEV)
 		ret = 0;

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 312fce7..144ea5a 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c

@@ -207,7 +207,6 @@ static void ifb_dev_free(struct net_device *dev)
 		__skb_queue_purge(&txp->tq);
 	}
 	kfree(dp->tx_private);
-	free_netdev(dev);
 }
 
 static void ifb_setup(struct net_device *dev)
@@ -230,7 +229,8 @@ static void ifb_setup(struct net_device *dev)
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
-	dev->destructor = ifb_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ifb_dev_free;
 }
 
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 618ed88..7c7680c 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c

@@ -632,7 +632,7 @@ void ipvlan_link_setup(struct net_device *dev)
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
 	dev->netdev_ops = &ipvlan_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->header_ops = &ipvlan_header_ops;
 	dev->ethtool_ops = &ipvlan_ethtool_ops;
 }

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 224f65c..3061249 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c

@@ -159,7 +159,6 @@ static void loopback_dev_free(struct net_device *dev)
 {
 	dev_net(dev)->loopback_dev = NULL;
 	free_percpu(dev->lstats);
-	free_netdev(dev);
 }
 
 static const struct net_device_ops loopback_ops = {
@@ -196,7 +195,8 @@ static void loopback_setup(struct net_device *dev)
 	dev->ethtool_ops	= &loopback_ethtool_ops;
 	dev->header_ops		= &eth_header_ops;
 	dev->netdev_ops		= &loopback_ops;
-	dev->destructor		= loopback_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= loopback_dev_free;
 }
 
 /* Setup and register the loopback device. */

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cdc347b..7941167 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c

@@ -2996,7 +2996,6 @@ static void macsec_free_netdev(struct net_device *dev)
 	free_percpu(macsec->secy.tx_sc.stats);
 
 	dev_put(real_dev);
-	free_netdev(dev);
 }
 
 static void macsec_setup(struct net_device *dev)
@@ -3006,7 +3005,8 @@ static void macsec_setup(struct net_device *dev)
 	dev->max_mtu = ETH_MAX_MTU;
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->netdev_ops = &macsec_netdev_ops;
-	dev->destructor = macsec_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = macsec_free_netdev;
 	SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
 	eth_zero_addr(dev->broadcast);

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 346ad2f..72b8018 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c

@@ -39,16 +39,20 @@
 #define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
+#define MACVLAN_F_PASSTHRU	1
+#define MACVLAN_F_ADDRCHANGE	2
+
 struct macvlan_port {
 	struct net_device	*dev;
 	struct hlist_head	vlan_hash[MACVLAN_HASH_SIZE];
 	struct list_head	vlans;
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
-	bool 			passthru;
+	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
 	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+	unsigned char           perm_addr[ETH_ALEN];
 };
 
 struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+	return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+	port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+	port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
 /* Hash Ethernet address */
 static u32 macvlan_eth_hash(const unsigned char *addr)
 {
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 static bool macvlan_addr_busy(const struct macvlan_port *port,
 			      const unsigned char *addr)
 {
-	/* Test to see if the specified multicast address is
+	/* Test to see if the specified address is
 	 * currently in use by the underlying device or
 	 * another macvlan.
 	 */
-	if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+	if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+	    ether_addr_equal_64bits(port->dev->dev_addr, addr))
 		return true;
 
 	if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	}
 
 	macvlan_forward_source(skb, port, eth->h_source);
-	if (port->passthru)
+	if (macvlan_passthru(port))
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
 	else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
 	struct net_device *lowerdev = vlan->lowerdev;
 	int err;
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
 			err = dev_set_promiscuity(lowerdev, 1);
 			if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
 	dev_uc_unsync(lowerdev, dev);
 	dev_mc_unsync(lowerdev, dev);
 
-	if (vlan->port->passthru) {
+	if (macvlan_passthru(vlan->port)) {
 		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
 			dev_set_promiscuity(lowerdev, -1);
 		goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
+	struct macvlan_port *port = vlan->port;
 	int err;
 
 	if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 		if (macvlan_addr_busy(vlan->port, addr))
 			return -EBUSY;
 
-		if (!vlan->port->passthru) {
+		if (!macvlan_passthru(port)) {
 			err = dev_uc_add(lowerdev, addr);
 			if (err)
 				return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 
 		macvlan_hash_change_addr(vlan, addr);
 	}
+	if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+		/* Since addr_change isn't set, we are here due to lower
+		 * device change.  Save the lower-dev address so we can
+		 * restore it later.
+		 */
+		ether_addr_copy(vlan->port->perm_addr,
+				lowerdev->dev_addr);
+	}
+	macvlan_clear_addr_change(port);
 	return 0;
 }
 
@@ -703,7 +743,12 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	/* If the addresses are the same, this is a no-op */
+	if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+		return 0;
+
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+		macvlan_set_addr_change(vlan->port);
 		dev_set_mac_address(vlan->lowerdev, addr);
 		return 0;
 	}
@@ -928,7 +973,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (flags & NLM_F_REPLACE)
@@ -952,7 +997,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 	/* Support unicast filter only on passthru devices.
 	 * Multicast filter should be allowed on all devices.
 	 */
-	if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+	if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
 		return -EOPNOTSUPP;
 
 	if (is_unicast_ether_addr(addr))
@@ -1092,7 +1137,7 @@ void macvlan_common_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &macvlan_hard_header_ops;
 	dev->ethtool_ops	= &macvlan_ethtool_ops;
 }
@@ -1120,8 +1165,8 @@ static int macvlan_port_create(struct net_device *dev)
 	if (port == NULL)
 		return -ENOMEM;
 
-	port->passthru = false;
 	port->dev = dev;
+	ether_addr_copy(port->perm_addr, dev->dev_addr);
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1161,6 +1206,18 @@ static void macvlan_port_destroy(struct net_device *dev)
 		kfree_skb(skb);
 	}
 
+	/* If the lower device address has been changed by passthru
+	 * macvlan, put it back.
+	 */
+	if (macvlan_passthru(port) &&
+	    !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+		struct sockaddr sa;
+
+		sa.sa_family = port->dev->type;
+		memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+		dev_set_mac_address(port->dev, &sa);
+	}
+
 	kfree(port);
 }
 
@@ -1326,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	port = macvlan_port_get_rtnl(lowerdev);
 
 	/* Only 1 macvlan device can be created in passthru mode */
-	if (port->passthru) {
+	if (macvlan_passthru(port)) {
 		/* The macvlan port must be not created this time,
 		 * still goto destroy_macvlan_port for readability.
 		 */
@@ -1352,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			err = -EINVAL;
 			goto destroy_macvlan_port;
 		}
-		port->passthru = true;
+		macvlan_set_passthru(port);
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
@@ -1434,7 +1491,7 @@ static int macvlan_changelink(struct net_device *dev,
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
 		__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 		bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
-		if (vlan->port->passthru && promisc) {
+		if (macvlan_passthru(vlan->port) && promisc) {
 			int err;
 
 			if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1597,7 +1654,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		}
 		break;
 	case NETDEV_CHANGEADDR:
-		if (!port->passthru)
+		if (!macvlan_passthru(port))
 			return NOTIFY_DONE;
 
 		vlan = list_first_entry_or_null(&port->vlans,

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 06ee639..0e27920 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c

@@ -358,7 +358,7 @@ static ssize_t enabled_store(struct config_item *item,
 		if (err)
 			goto out_unlock;
 
-		pr_info("netconsole: network logging started\n");
+		pr_info("network logging started\n");
 	} else {	/* false */
 		/* We need to disable the netconsole before cleaning it up
 		 * otherwise we might end up in write_msg() with

diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index b916038..c4b3362 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c

@@ -113,7 +113,7 @@ static void nlmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &nlmon_ops;
 	dev->ethtool_ops = &nlmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 60ffc9d..3ab6c58 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig

@@ -108,7 +108,7 @@
 config MDIO_OCTEON
 	tristate "Octeon and some ThunderX SOCs MDIO buses"
 	depends on 64BIT
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && OF_MDIO
 	select MDIO_CAVIUM
 	help
 	  This module provides a driver for the Octeon and ThunderX MDIO
@@ -127,6 +127,7 @@
 	tristate "ThunderX SOCs MDIO buses"
 	depends on 64BIT
 	depends on PCI
+	depends on !(MDIO_DEVICE=y && PHYLIB=m)
 	select MDIO_CAVIUM
 	help
 	  This driver supports the MDIO interfaces found on Cavium

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index ed0d10f..c3065236 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c

@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
 	if (overflow) {
 		pr_debug("tx timestamp queue overflow, count %d\n", overflow);
 		while (skb) {
-			skb_complete_tx_timestamp(skb, NULL);
+			kfree_skb(skb);
 			skb = skb_dequeue(&dp83640->tx_queue);
 		}
 		return;

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 272b051..57297ba 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c

@@ -255,34 +255,6 @@ static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x1f);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x200c);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1d, 0x5);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0);
-	if (err < 0)
-		return err;
-
-	err = phy_write(phydev, 0x1e, 0x100);
-	if (err < 0)
-		return err;
-
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
@@ -316,6 +288,42 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1101_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* This Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x1f);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x200c);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1d, 0x5);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, 0x1e, 0x100);
+	if (err < 0)
+		return err;
+
+	return marvell_config_aneg(phydev);
+}
+
 static int m88e1111_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -1119,8 +1127,6 @@ static int marvell_read_status_page(struct phy_device *phydev, int page)
 		if (adv < 0)
 			return adv;
 
-		lpa &= adv;
-
 		if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
 			phydev->duplex = DUPLEX_FULL;
 		else
@@ -1892,7 +1898,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &marvell_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1101_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 8e73f5f..f99c21f 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c

@@ -658,6 +658,18 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
+static int mdio_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	int rc;
+
+	/* Some devices have extra OF data and an OF-style MODALIAS */
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
+	return 0;
+}
+
 #ifdef CONFIG_PM
 static int mdio_bus_suspend(struct device *dev)
 {
@@ -708,6 +720,7 @@ static const struct dev_pm_ops mdio_bus_pm_ops = {
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
+	.uevent		= mdio_uevent,
 	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6a5fd18..8b20388 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c

@@ -268,11 +268,31 @@ static int kszphy_nand_tree_disable(struct phy_device *phydev)
 	return ret;
 }
 
+/* Some config bits need to be set again on resume, handle them here. */
+static int kszphy_config_reset(struct phy_device *phydev)
+{
+	struct kszphy_priv *priv = phydev->priv;
+	int ret;
+
+	if (priv->rmii_ref_clk_sel) {
+		ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val);
+		if (ret) {
+			phydev_err(phydev,
+				   "failed to set rmii reference clock\n");
+			return ret;
+		}
+	}
+
+	if (priv->led_mode >= 0)
+		kszphy_setup_led(phydev, priv->type->led_mode_reg, priv->led_mode);
+
+	return 0;
+}
+
 static int kszphy_config_init(struct phy_device *phydev)
 {
 	struct kszphy_priv *priv = phydev->priv;
 	const struct kszphy_type *type;
-	int ret;
 
 	if (!priv)
 		return 0;
@@ -285,19 +305,7 @@ static int kszphy_config_init(struct phy_device *phydev)
 	if (type->has_nand_tree_disable)
 		kszphy_nand_tree_disable(phydev);
 
-	if (priv->rmii_ref_clk_sel) {
-		ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val);
-		if (ret) {
-			phydev_err(phydev,
-				   "failed to set rmii reference clock\n");
-			return ret;
-		}
-	}
-
-	if (priv->led_mode >= 0)
-		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
-
-	return 0;
+	return kszphy_config_reset(phydev);
 }
 
 static int ksz8041_config_init(struct phy_device *phydev)
@@ -611,6 +619,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
 	if ((regval & 0xFF) == 0xFF) {
 		phy_init_hw(phydev);
 		phydev->link = 0;
+		if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+			phydev->drv->config_intr(phydev);
 	}
 
 	return 0;
@@ -700,8 +710,14 @@ static int kszphy_suspend(struct phy_device *phydev)
 
 static int kszphy_resume(struct phy_device *phydev)
 {
+	int ret;
+
 	genphy_resume(phydev);
 
+	ret = kszphy_config_reset(phydev);
+	if (ret)
+		return ret;
+
 	/* Enable PHY Interrupts */
 	if (phy_interrupt_is_valid(phydev)) {
 		phydev->interrupts = PHY_INTERRUPT_ENABLED;

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 82ab8fb..eebb0e1 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c

@@ -54,6 +54,8 @@ static const char *phy_speed_to_str(int speed)
 		return "5Gbps";
 	case SPEED_10000:
 		return "10Gbps";
+	case SPEED_14000:
+		return "14Gbps";
 	case SPEED_20000:
 		return "20Gbps";
 	case SPEED_25000:
@@ -241,7 +243,7 @@ static const struct phy_setting settings[] = {
  * phy_lookup_setting - lookup a PHY setting
  * @speed: speed to match
  * @duplex: duplex to match
- * @feature: allowed link modes
+ * @features: allowed link modes
  * @exact: an exact match is required
  *
  * Search the settings array for a setting that matches the speed and

diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 1da31dc..74b9072 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c

@@ -629,7 +629,7 @@ static void sl_uninit(struct net_device *dev)
 static void sl_free_netdev(struct net_device *dev)
 {
 	int i = dev->base_addr;
-	free_netdev(dev);
+
 	slip_devs[i] = NULL;
 }
 
@@ -651,7 +651,8 @@ static const struct net_device_ops sl_netdev_ops = {
 static void sl_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &sl_netdev_ops;
-	dev->destructor		= sl_free_netdev;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= sl_free_netdev;
 
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
@@ -1369,8 +1370,6 @@ static void __exit slip_exit(void)
 		if (sl->tty) {
 			printk(KERN_ERR "%s: tty discipline still running\n",
 			       dev->name);
-			/* Intentionally leak the control block. */
-			dev->destructor = NULL;
 		}
 
 		unregister_netdev(dev);

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6c5d5ef..fba8c13 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c

@@ -1643,7 +1643,6 @@ static void team_destructor(struct net_device *dev)
 	struct team *team = netdev_priv(dev);
 
 	free_percpu(team->pcpu_stats);
-	free_netdev(dev);
 }
 
 static int team_open(struct net_device *dev)
@@ -2079,7 +2078,8 @@ static void team_setup(struct net_device *dev)
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;
-	dev->destructor	= team_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = team_destructor;
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->priv_flags |= IFF_TEAM;

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index bbd707b..9ee7d42 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -1560,7 +1560,6 @@ static void tun_free_netdev(struct net_device *dev)
 	free_percpu(tun->pcpu_stats);
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
-	free_netdev(dev);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -1571,7 +1570,8 @@ static void tun_setup(struct net_device *dev)
 	tun->group = INVALID_GID;
 
 	dev->ethtool_ops = &tun_ethtool_ops;
-	dev->destructor = tun_free_netdev;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = tun_free_netdev;
 	/* We prefer our own queue length */
 	dev->tx_queue_len = TUN_READQ_SIZE;
 }

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 51cf600..4037ab2 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c

@@ -1722,6 +1722,18 @@ static const struct driver_info lenovo_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info belkin_info = {
+	.description = "Belkin USB Ethernet Adapter",
+	.bind	= ax88179_bind,
+	.unbind = ax88179_unbind,
+	.status = ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset	= ax88179_reset,
+	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
 	/* ASIX AX88179 10/100/1000 */
@@ -1751,6 +1763,10 @@ static const struct usb_device_id products[] = {
 	/* Lenovo OneLinkDock Gigabit LAN */
 	USB_DEVICE(0x17ef, 0x304b),
 	.driver_info = (unsigned long)&lenovo_info,
+}, {
+	/* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+	USB_DEVICE(0x050d, 0x0128),
+	.driver_info = (unsigned long)&belkin_info,
 },
 	{ },
 };

diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index eb52de8..c7a350b 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c

@@ -298,7 +298,7 @@ static void usbpn_setup(struct net_device *dev)
 	dev->addr_len		= 1;
 	dev->tx_queue_len	= 3;
 
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f3ae88f..8ab281b 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c

@@ -310,13 +310,6 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 		return -ENODEV;
 	}
 
-	/* Some devices don't initialise properly. In particular
-	 * the packet filter is not reset. There are devices that
-	 * don't do reset all the way. So the packet filter should
-	 * be set to a sane initial value.
-	 */
-	usbnet_cdc_update_filter(dev);
-
 	return 0;
 
 bad_desc:
@@ -325,6 +318,30 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind);
 
+
+/* like usbnet_generic_cdc_bind() but handles filter initialization
+ * correctly
+ */
+int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int rv;
+
+	rv = usbnet_generic_cdc_bind(dev, intf);
+	if (rv < 0)
+		goto bail_out;
+
+	/* Some devices don't initialise properly. In particular
+	 * the packet filter is not reset. There are devices that
+	 * don't do reset all the way. So the packet filter should
+	 * be set to a sane initial value.
+	 */
+	usbnet_cdc_update_filter(dev);
+
+bail_out:
+	return rv;
+}
+EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind);
+
 void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct cdc_state		*info = (void *) &dev->data;
@@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data)
 			< sizeof(struct cdc_state)));
 
-	status = usbnet_generic_cdc_bind(dev, intf);
+	status = usbnet_ether_cdc_bind(dev, intf);
 	if (status < 0)
 		return status;
 

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8f923a1..32a22f4e 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c

@@ -123,7 +123,7 @@ static void qmimux_setup(struct net_device *dev)
 	dev->addr_len        = 0;
 	dev->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->netdev_ops      = &qmimux_netdev_ops;
-	dev->destructor      = free_netdev;
+	dev->needs_free_netdev = true;
 }
 
 static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
@@ -1192,6 +1192,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
+	{QMI_FIXED_INTF(0x1199, 0x9063, 8)},	/* Sierra Wireless EM7305 */
+	{QMI_FIXED_INTF(0x1199, 0x9063, 10)},	/* Sierra Wireless EM7305 */
 	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
 	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
@@ -1206,6 +1208,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},	/* Telit ME910 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)},	/* Telit LE920, LE920A4 */
+	{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)},	/* Telewell TW-3G HSPA+ */
+	{QMI_FIXED_INTF(0x1c9e, 0x9803, 4)},	/* Telewell TW-3G HSPA+ */
 	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index ddc62cb..1a419a4 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c

@@ -4368,6 +4368,8 @@ static u8 rtl_get_version(struct usb_interface *intf)
 		break;
 	}
 
+	dev_dbg(&intf->dev, "Detected version 0x%04x\n", version);
+
 	return version;
 }
 

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 765400b..2dfca96 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c

@@ -681,7 +681,7 @@ static int smsc95xx_set_features(struct net_device *netdev,
 	if (ret < 0)
 		return ret;
 
-	if (features & NETIF_F_HW_CSUM)
+	if (features & NETIF_F_IP_CSUM)
 		read_buf |= Tx_COE_EN_;
 	else
 		read_buf &= ~Tx_COE_EN_;
@@ -1279,12 +1279,19 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	spin_lock_init(&pdata->mac_cr_lock);
 
+	/* LAN95xx devices do not alter the computed checksum of 0 to 0xffff.
+	 * RFC 2460, ipv6 UDP calculated checksum yields a result of zero must
+	 * be changed to 0xffff. RFC 768, ipv4 UDP computed checksum is zero,
+	 * it is transmitted as all ones. The zero transmitted checksum means
+	 * transmitter generated no checksum. Hence, enable csum offload only
+	 * for ipv4 packets.
+	 */
 	if (DEFAULT_TX_CSUM_ENABLE)
-		dev->net->features |= NETIF_F_HW_CSUM;
+		dev->net->features |= NETIF_F_IP_CSUM;
 	if (DEFAULT_RX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_RXCSUM;
 
-	dev->net->hw_features = NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
 
 	smsc95xx_init_mac_address(dev);
 

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 38f0f03..364fa9d 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c

@@ -222,7 +222,6 @@ static int veth_dev_init(struct net_device *dev)
 static void veth_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->vstats);
-	free_netdev(dev);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -317,7 +316,8 @@ static void veth_setup(struct net_device *dev)
 			       NETIF_F_HW_VLAN_STAG_TX |
 			       NETIF_F_HW_VLAN_CTAG_RX |
 			       NETIF_F_HW_VLAN_STAG_RX);
-	dev->destructor = veth_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = veth_dev_free;
 	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->hw_features = VETH_FEATURES;
@@ -383,7 +383,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		tbp = tb;
 	}
 
-	if (tbp[IFLA_IFNAME]) {
+	if (ifmp && tbp[IFLA_IFNAME]) {
 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
 		name_assign_type = NET_NAME_USER;
 	} else {
@@ -402,7 +402,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		return PTR_ERR(peer);
 	}
 
-	if (tbp[IFLA_ADDRESS] == NULL)
+	if (!ifmp || !tbp[IFLA_ADDRESS])
 		eth_hw_addr_random(peer);
 
 	if (ifmp && (dev->ifindex != 0))

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9320d96..143d8a9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -869,7 +869,7 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
 	unsigned int len;
 
 	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
-				rq->min_buf_len - hdr_len, PAGE_SIZE - hdr_len);
+				rq->min_buf_len, PAGE_SIZE - hdr_len);
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1797,6 +1797,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 	flush_work(&vi->config_work);
 
 	netif_device_detach(vi->dev);
+	netif_tx_disable(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev)) {
@@ -1989,6 +1990,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
 	.ndo_xdp		= virtnet_xdp,
+	.ndo_features_check	= passthru_features_check,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -2143,7 +2145,8 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
 	unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
 	unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
 
-	return max(min_buf_len, hdr_len);
+	return max(max(min_buf_len, hdr_len) - hdr_len,
+		   (unsigned int)GOOD_PACKET_LEN);
 }
 
 static int virtnet_find_vqs(struct virtnet_info *vi)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index db88249..022c0b5 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c

@@ -36,12 +36,14 @@
 #include <net/addrconf.h>
 #include <net/l3mdev.h>
 #include <net/fib_rules.h>
+#include <net/netns/generic.h>
 
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
 #define FIB_RULE_PREF  1000       /* default preference for FIB rules */
-static bool add_fib_rules = true;
+
+static unsigned int vrf_net_id;
 
 struct net_vrf {
 	struct rtable __rcu	*rth;
@@ -1348,7 +1350,7 @@ static void vrf_setup(struct net_device *dev)
 	dev->netdev_ops = &vrf_netdev_ops;
 	dev->l3mdev_ops = &vrf_l3mdev_ops;
 	dev->ethtool_ops = &vrf_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	/* Fill in device structure with ethernet-generic values. */
 	eth_hw_addr_random(dev);
@@ -1394,6 +1396,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
+	bool *add_fib_rules;
+	struct net *net;
 	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
@@ -1409,13 +1413,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		goto out;
 
-	if (add_fib_rules) {
+	net = dev_net(dev);
+	add_fib_rules = net_generic(net, vrf_net_id);
+	if (*add_fib_rules) {
 		err = vrf_add_fib_rules(dev);
 		if (err) {
 			unregister_netdevice(dev);
 			goto out;
 		}
-		add_fib_rules = false;
+		*add_fib_rules = false;
 	}
 
 out:
@@ -1498,16 +1504,38 @@ static struct notifier_block vrf_notifier_block __read_mostly = {
 	.notifier_call = vrf_device_event,
 };
 
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+	bool *add_fib_rules = net_generic(net, vrf_net_id);
+
+	*add_fib_rules = true;
+
+	return 0;
+}
+
+static struct pernet_operations vrf_net_ops __net_initdata = {
+	.init = vrf_netns_init,
+	.id   = &vrf_net_id,
+	.size = sizeof(bool),
+};
+
 static int __init vrf_init_module(void)
 {
 	int rc;
 
 	register_netdevice_notifier(&vrf_notifier_block);
 
-	rc = rtnl_link_register(&vrf_link_ops);
+	rc = register_pernet_subsys(&vrf_net_ops);
 	if (rc < 0)
 		goto error;
 
+	rc = rtnl_link_register(&vrf_link_ops);
+	if (rc < 0) {
+		unregister_pernet_subsys(&vrf_net_ops);
+		goto error;
+	}
+
 	return 0;
 
 error:

diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c
index 7f0136f..c28bdce 100644
--- a/drivers/net/vsockmon.c
+++ b/drivers/net/vsockmon.c

@@ -135,7 +135,7 @@ static void vsockmon_setup(struct net_device *dev)
 
 	dev->netdev_ops	= &vsockmon_ops;
 	dev->ethtool_ops = &vsockmon_ethtool_ops;
-	dev->destructor	= free_netdev;
+	dev->needs_free_netdev = true;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			NETIF_F_HIGHDMA | NETIF_F_LLTX;

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 328b471..5fa798a 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c

@@ -59,6 +59,8 @@ static const u8 all_zeros_mac[ETH_ALEN + 2];
 
 static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
+static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
+
 /* per-network namespace private data for this module */
 struct vxlan_net {
 	struct list_head  vxlan_list;
@@ -740,6 +742,22 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
 	call_rcu(&f->rcu, vxlan_fdb_free);
 }
 
+static void vxlan_dst_free(struct rcu_head *head)
+{
+	struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+	dst_cache_destroy(&rd->dst_cache);
+	kfree(rd);
+}
+
+static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+				  struct vxlan_rdst *rd)
+{
+	list_del_rcu(&rd->list);
+	vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
+	call_rcu(&rd->rcu, vxlan_dst_free);
+}
+
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
 			   __be32 *vni, u32 *ifindex)
@@ -864,9 +882,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 	 * otherwise destroy the fdb entry
 	 */
 	if (rd && !list_is_singular(&f->remotes)) {
-		list_del_rcu(&rd->list);
-		vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
-		kfree_rcu(rd, rcu);
+		vxlan_fdb_dst_destroy(vxlan, f, rd);
 		goto out;
 	}
 
@@ -1067,6 +1083,8 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
 	rcu_assign_pointer(vxlan->vn4_sock, NULL);
 	synchronize_net();
 
+	vxlan_vs_del_dev(vxlan);
+
 	if (__vxlan_sock_release_prep(sock4)) {
 		udp_tunnel_sock_release(sock4->sock);
 		kfree(sock4);
@@ -2342,6 +2360,15 @@ static void vxlan_cleanup(unsigned long arg)
 	mod_timer(&vxlan->age_timer, next_timer);
 }
 
+static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
+{
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+
+	spin_lock(&vn->sock_lock);
+	hlist_del_init_rcu(&vxlan->hlist);
+	spin_unlock(&vn->sock_lock);
+}
+
 static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
 {
 	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
@@ -2584,7 +2611,7 @@ static void vxlan_setup(struct net_device *dev)
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
 	dev->features	|= NETIF_F_LLTX;
@@ -3286,15 +3313,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
-	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 
 	vxlan_flush(vxlan, true);
 
-	spin_lock(&vn->sock_lock);
-	if (!hlist_unhashed(&vxlan->hlist))
-		hlist_del_rcu(&vxlan->hlist);
-	spin_unlock(&vn->sock_lock);
-
 	gro_cells_destroy(&vxlan->gro_cells);
 	list_del(&vxlan->next);
 	unregister_netdevice_queue(dev, head);

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 65ee2a6..a0d76f7 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c

@@ -475,7 +475,7 @@ static void dlci_setup(struct net_device *dev)
 	dev->flags		= 0;
 	dev->header_ops		= &dlci_header_ops;
 	dev->netdev_ops		= &dlci_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 
 	dlp->receive		= dlci_receive;
 

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index eb91528..78596e4 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c

@@ -1106,7 +1106,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		return -EIO;
 	}
 
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	*get_dev_p(pvc, type) = dev;
 	if (!used) {
 		state(hdlc)->dce_changed = 1;

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 9df9ed6..63f7490 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c

@@ -306,7 +306,7 @@ static const struct net_device_ops lapbeth_netdev_ops = {
 static void lapbeth_setup(struct net_device *dev)
 {
 	dev->netdev_ops	     = &lapbeth_netdev_ops;
-	dev->destructor	     = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->type            = ARPHRD_X25;
 	dev->hard_header_len = 3;
 	dev->mtu             = 1000;

diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 91ee542..b90c77e 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c

@@ -1287,7 +1287,7 @@ void init_netdev(struct net_device *dev)
 	struct ath6kl *ar = ath6kl_priv(dev);
 
 	dev->netdev_ops = &ath6kl_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->watchdog_timeo = ATH6KL_TX_TIMEOUT;
 
 	dev->needed_headroom = ETH_HLEN;

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index d5e993d..517a315 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c

@@ -1271,6 +1271,8 @@ static int wcn36xx_remove(struct platform_device *pdev)
 	qcom_smem_state_put(wcn->tx_enable_state);
 	qcom_smem_state_put(wcn->tx_rings_empty_state);
 
+	rpmsg_destroy_ept(wcn->smd_channel);
+
 	iounmap(wcn->dxe_base);
 	iounmap(wcn->ccu_base);
 

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index cd1d673..617199c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c

@@ -5225,7 +5225,6 @@ void brcmf_cfg80211_free_netdev(struct net_device *ndev)
 
 	if (vif)
 		brcmf_free_vif(vif);
-	free_netdev(ndev);
 }
 
 static bool brcmf_is_linkup(const struct brcmf_event_msg *e)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index a3d8236..511d190 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c

@@ -624,7 +624,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
 		if (!ndev)
 			return ERR_PTR(-ENOMEM);
 
-		ndev->destructor = brcmf_cfg80211_free_netdev;
+		ndev->needs_free_netdev = true;
+		ndev->priv_destructor = brcmf_cfg80211_free_netdev;
 		ifp = netdev_priv(ndev);
 		ifp->ndev = ndev;
 		/* store mapping ifidx to bsscfgidx */

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index c7c1e99..d231042 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c

@@ -442,7 +442,7 @@ struct brcmf_fw {
 	const char *nvram_name;
 	u16 domain_nr;
 	u16 bus_nr;
-	void (*done)(struct device *dev, const struct firmware *fw,
+	void (*done)(struct device *dev, int err, const struct firmware *fw,
 		     void *nvram_image, u32 nvram_len);
 };
 
@@ -477,52 +477,51 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
 	if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
 		goto fail;
 
-	fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length);
+	fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length);
 	kfree(fwctx);
 	return;
 
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
 	release_firmware(fwctx->code);
-	device_release_driver(fwctx->dev);
+	fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0);
 	kfree(fwctx);
 }
 
 static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
 {
 	struct brcmf_fw *fwctx = ctx;
-	int ret;
+	int ret = 0;
 
 	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
-	if (!fw)
+	if (!fw) {
+		ret = -ENOENT;
 		goto fail;
-
-	/* only requested code so done here */
-	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) {
-		fwctx->done(fwctx->dev, fw, NULL, 0);
-		kfree(fwctx);
-		return;
 	}
+	/* only requested code so done here */
+	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM))
+		goto done;
+
 	fwctx->code = fw;
 	ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
 				      fwctx->dev, GFP_KERNEL, fwctx,
 				      brcmf_fw_request_nvram_done);
 
-	if (!ret)
-		return;
-
-	brcmf_fw_request_nvram_done(NULL, fwctx);
+	/* pass NULL to nvram callback for bcm47xx fallback */
+	if (ret)
+		brcmf_fw_request_nvram_done(NULL, fwctx);
 	return;
 
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
-	device_release_driver(fwctx->dev);
+done:
+	fwctx->done(fwctx->dev, ret, fw, NULL, 0);
 	kfree(fwctx);
 }
 
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev,
+				void (*fw_cb)(struct device *dev, int err,
 					      const struct firmware *fw,
 					      void *nvram_image, u32 nvram_len),
 				u16 domain_nr, u16 bus_nr)
@@ -555,7 +554,7 @@ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
 			   const char *code, const char *nvram,
-			   void (*fw_cb)(struct device *dev,
+			   void (*fw_cb)(struct device *dev, int err,
 					 const struct firmware *fw,
 					 void *nvram_image, u32 nvram_len))
 {

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index d3c9f0d..8fa4b7e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h

@@ -73,13 +73,13 @@ void brcmf_fw_nvram_free(void *nvram);
  */
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev,
+				void (*fw_cb)(struct device *dev, int err,
 					      const struct firmware *fw,
 					      void *nvram_image, u32 nvram_len),
 				u16 domain_nr, u16 bus_nr);
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
 			   const char *code, const char *nvram,
-			   void (*fw_cb)(struct device *dev,
+			   void (*fw_cb)(struct device *dev, int err,
 					 const struct firmware *fw,
 					 void *nvram_image, u32 nvram_len));
 

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 72373e5..f59642b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c

@@ -2145,7 +2145,7 @@ void brcmf_fws_add_interface(struct brcmf_if *ifp)
 	struct brcmf_fws_info *fws = drvr_to_fws(ifp->drvr);
 	struct brcmf_fws_mac_descriptor *entry;
 
-	if (!ifp->ndev || fws->fcmode == BRCMF_FWS_FCMODE_NONE)
+	if (!ifp->ndev || !brcmf_fws_queue_skbs(fws))
 		return;
 
 	entry = &fws->desc.iface[ifp->ifidx];

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index f36b96d..f878706 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c

@@ -1650,16 +1650,23 @@ static const struct brcmf_buscore_ops brcmf_pcie_buscore_ops = {
 	.write32 = brcmf_pcie_buscore_write32,
 };
 
-static void brcmf_pcie_setup(struct device *dev, const struct firmware *fw,
+static void brcmf_pcie_setup(struct device *dev, int ret,
+			     const struct firmware *fw,
 			     void *nvram, u32 nvram_len)
 {
-	struct brcmf_bus *bus = dev_get_drvdata(dev);
-	struct brcmf_pciedev *pcie_bus_dev = bus->bus_priv.pcie;
-	struct brcmf_pciedev_info *devinfo = pcie_bus_dev->devinfo;
+	struct brcmf_bus *bus;
+	struct brcmf_pciedev *pcie_bus_dev;
+	struct brcmf_pciedev_info *devinfo;
 	struct brcmf_commonring **flowrings;
-	int ret;
 	u32 i;
 
+	/* check firmware loading result */
+	if (ret)
+		goto fail;
+
+	bus = dev_get_drvdata(dev);
+	pcie_bus_dev = bus->bus_priv.pcie;
+	devinfo = pcie_bus_dev->devinfo;
 	brcmf_pcie_attach(devinfo);
 
 	/* Some of the firmwares have the size of the memory of the device

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index fc64b89..5653d6d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c

@@ -3422,7 +3422,7 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
 		/* otherwise, set txglomalign */
 		value = sdiodev->settings->bus.sdio.sd_sgentry_align;
 		/* SDIO ADMA requires at least 32 bit alignment */
-		value = max_t(u32, value, 4);
+		value = max_t(u32, value, ALIGNMENT);
 		err = brcmf_iovar_data_set(dev, "bus:txglomalign", &value,
 					   sizeof(u32));
 	}
@@ -3982,21 +3982,26 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
 	.get_memdump = brcmf_sdio_bus_get_memdump,
 };
 
-static void brcmf_sdio_firmware_callback(struct device *dev,
+static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 					 const struct firmware *code,
 					 void *nvram, u32 nvram_len)
 {
-	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	struct brcmf_sdio *bus = sdiodev->bus;
-	int err = 0;
+	struct brcmf_bus *bus_if;
+	struct brcmf_sdio_dev *sdiodev;
+	struct brcmf_sdio *bus;
 	u8 saveclk;
 
-	brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev));
+	brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err);
+	bus_if = dev_get_drvdata(dev);
+	sdiodev = bus_if->bus_priv.sdio;
+	if (err)
+		goto fail;
 
 	if (!bus_if->drvr)
 		return;
 
+	bus = sdiodev->bus;
+
 	/* try to download image and nvram to the dongle */
 	bus->alp_only = true;
 	err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4083,6 +4088,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev,
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
 	device_release_driver(dev);
+	device_release_driver(&sdiodev->func[2]->dev);
 }
 
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index e4d545f..0eea48e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c

@@ -1159,17 +1159,18 @@ static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
 	return ret;
 }
 
-static void brcmf_usb_probe_phase2(struct device *dev,
+static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 				   const struct firmware *fw,
 				   void *nvram, u32 nvlen)
 {
 	struct brcmf_bus *bus = dev_get_drvdata(dev);
-	struct brcmf_usbdev_info *devinfo;
-	int ret;
+	struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo;
+
+	if (ret)
+		goto error;
 
 	brcmf_dbg(USB, "Start fw downloading\n");
 
-	devinfo = bus->bus_priv.usb->devinfo;
 	ret = check_file(fw->data);
 	if (ret < 0) {
 		brcmf_err("invalid firmware\n");

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 1b7e125..6a13303 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c

@@ -3066,7 +3066,7 @@ static int airo_thread(void *data) {
 		if (ai->jobs) {
 			locked = down_interruptible(&ai->sem);
 		} else {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&ai->thr_wait, &wait);

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
index 3b3e076..45e2efc 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c

@@ -79,8 +79,8 @@
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN	17
 #define IWL7265_UCODE_API_MIN	17
-#define IWL7265D_UCODE_API_MIN	17
-#define IWL3168_UCODE_API_MIN	20
+#define IWL7265D_UCODE_API_MIN	22
+#define IWL3168_UCODE_API_MIN	22
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION		0x0a1d

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index b9718c0..8913771 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c

@@ -74,8 +74,8 @@
 #define IWL8265_UCODE_API_MAX	30
 
 /* Lowest firmware API version supported */
-#define IWL8000_UCODE_API_MIN	17
-#define IWL8265_UCODE_API_MIN	20
+#define IWL8000_UCODE_API_MIN	22
+#define IWL8265_UCODE_API_MIN	22
 
 /* NVM versions */
 #define IWL8000_NVM_VERSION		0x0a1d

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 306bc96..77efbb7 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h

@@ -370,6 +370,7 @@
 #define MON_DMARB_RD_DATA_ADDR		(0xa03c5c)
 
 #define DBGC_IN_SAMPLE			(0xa03c00)
+#define DBGC_OUT_CTRL			(0xa03c0c)
 
 /* enable the ID buf for read */
 #define WFPM_PS_CTL_CLR			0xA0300C

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
index 1b7d265..a10c6aa 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h

@@ -307,6 +307,11 @@ enum {
 /* Bit 1-3: LQ command color. Used to match responses to LQ commands */
 #define LQ_FLAG_COLOR_POS               1
 #define LQ_FLAG_COLOR_MSK               (7 << LQ_FLAG_COLOR_POS)
+#define LQ_FLAG_COLOR_GET(_f)		(((_f) & LQ_FLAG_COLOR_MSK) >>\
+					 LQ_FLAG_COLOR_POS)
+#define LQ_FLAGS_COLOR_INC(_c)		((((_c) + 1) << LQ_FLAG_COLOR_POS) &\
+					 LQ_FLAG_COLOR_MSK)
+#define LQ_FLAG_COLOR_SET(_f, _c)	((_c) | ((_f) & ~LQ_FLAG_COLOR_MSK))
 
 /* Bit 4-5: Tx RTS BW Signalling
  * (0) No RTS BW signalling

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
index 81b9891..1360ebf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h

@@ -519,8 +519,11 @@ struct agg_tx_status {
  * bit-7 invalid rate indication
  */
 #define TX_RES_INIT_RATE_INDEX_MSK 0x0f
+#define TX_RES_RATE_TABLE_COLOR_POS 4
 #define TX_RES_RATE_TABLE_COLOR_MSK 0x70
 #define TX_RES_INV_RATE_INDEX_MSK 0x80
+#define TX_RES_RATE_TABLE_COL_GET(_f) (((_f) & TX_RES_RATE_TABLE_COLOR_MSK) >>\
+				       TX_RES_RATE_TABLE_COLOR_POS)
 
 #define IWL_MVM_TX_RES_GET_TID(_ra_tid) ((_ra_tid) & 0x0f)
 #define IWL_MVM_TX_RES_GET_RA(_ra_tid) ((_ra_tid) >> 4)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
index 7b86a4f..c8712e6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c

@@ -1002,14 +1002,6 @@ int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
 	return 0;
 }
 
-static inline void iwl_mvm_restart_early_start(struct iwl_mvm *mvm)
-{
-	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000)
-		iwl_clear_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
-	else
-		iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 1);
-}
-
 int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
 {
 	u8 *ptr;
@@ -1023,10 +1015,8 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
 	/* EARLY START - firmware's configuration is hard coded */
 	if ((!mvm->fw->dbg_conf_tlv[conf_id] ||
 	     !mvm->fw->dbg_conf_tlv[conf_id]->num_of_hcmds) &&
-	    conf_id == FW_DBG_START_FROM_ALIVE) {
-		iwl_mvm_restart_early_start(mvm);
+	    conf_id == FW_DBG_START_FROM_ALIVE)
 		return 0;
-	}
 
 	if (!mvm->fw->dbg_conf_tlv[conf_id])
 		return -EINVAL;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 0f1831b..fd2fc46 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c

@@ -1040,7 +1040,7 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 		struct iwl_mac_beacon_cmd_v6 beacon_cmd_v6;
 		struct iwl_mac_beacon_cmd_v7 beacon_cmd;
 	} u = {};
-	struct iwl_mac_beacon_cmd beacon_cmd;
+	struct iwl_mac_beacon_cmd beacon_cmd = {};
 	struct ieee80211_tx_info *info;
 	u32 beacon_skb_len;
 	u32 rate, tx_flags;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 4e74a6b..52f8d7a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h

@@ -1730,8 +1730,11 @@ int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq);
  */
 static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm)
 {
+	u32 cmd_queue = iwl_mvm_is_dqa_supported(mvm) ? IWL_MVM_DQA_CMD_QUEUE :
+		IWL_MVM_CMD_QUEUE;
+
 	return ((BIT(mvm->cfg->base_params->num_of_queues) - 1) &
-		~BIT(IWL_MVM_CMD_QUEUE));
+		~BIT(cmd_queue));
 }
 
 static inline
@@ -1753,6 +1756,7 @@ static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
 	if (!iwl_mvm_has_new_tx_api(mvm))
 		iwl_free_fw_paging(mvm);
 	mvm->ucode_loaded = false;
+	mvm->fw_dbg_conf = FW_DBG_INVALID;
 	iwl_trans_stop_device(mvm->trans);
 }
 

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 9ffff6e..3da5ec4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c

@@ -1149,22 +1149,38 @@ static void iwl_mvm_fw_error_dump_wk(struct work_struct *work)
 
 	mutex_lock(&mvm->mutex);
 
-	/* stop recording */
 	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+		/* stop recording */
 		iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
+
+		iwl_mvm_fw_error_dump(mvm);
+
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
+		    mvm->fw->dbg_dest_tlv)
+			iwl_clear_bits_prph(mvm->trans,
+					    MON_BUFF_SAMPLE_CTL, 0x100);
 	} else {
+		u32 in_sample = iwl_read_prph(mvm->trans, DBGC_IN_SAMPLE);
+		u32 out_ctrl = iwl_read_prph(mvm->trans, DBGC_OUT_CTRL);
+
+		/* stop recording */
 		iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 0);
-		/* wait before we collect the data till the DBGC stop */
 		udelay(100);
+		iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, 0);
+		/* wait before we collect the data till the DBGC stop */
+		udelay(500);
+
+		iwl_mvm_fw_error_dump(mvm);
+
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
+		    mvm->fw->dbg_dest_tlv) {
+			iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, in_sample);
+			iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, out_ctrl);
+		}
 	}
 
-	iwl_mvm_fw_error_dump(mvm);
-
-	/* start recording again if the firmware is not crashed */
-	WARN_ON_ONCE((!test_bit(STATUS_FW_ERROR, &mvm->trans->status)) &&
-		     mvm->fw->dbg_dest_tlv &&
-		     iwl_mvm_start_fw_dbg_conf(mvm, mvm->fw_dbg_conf));
-
 	mutex_unlock(&mvm->mutex);
 
 	iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT);

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 7788eef..aa785cf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c

@@ -2,7 +2,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -1083,34 +1083,6 @@ static void rs_get_lower_rate_down_column(struct iwl_lq_sta *lq_sta,
 		rs_get_lower_rate_in_column(lq_sta, rate);
 }
 
-/* Check if both rates are identical
- * allow_ant_mismatch enables matching a SISO rate on ANT_A or ANT_B
- * with a rate indicating STBC/BFER and ANT_AB.
- */
-static inline bool rs_rate_equal(struct rs_rate *a,
-				 struct rs_rate *b,
-				 bool allow_ant_mismatch)
-
-{
-	bool ant_match = (a->ant == b->ant) && (a->stbc == b->stbc) &&
-		(a->bfer == b->bfer);
-
-	if (allow_ant_mismatch) {
-		if (a->stbc || a->bfer) {
-			WARN_ONCE(a->ant != ANT_AB, "stbc %d bfer %d ant %d",
-				  a->stbc, a->bfer, a->ant);
-			ant_match |= (b->ant == ANT_A || b->ant == ANT_B);
-		} else if (b->stbc || b->bfer) {
-			WARN_ONCE(b->ant != ANT_AB, "stbc %d bfer %d ant %d",
-				  b->stbc, b->bfer, b->ant);
-			ant_match |= (a->ant == ANT_A || a->ant == ANT_B);
-		}
-	}
-
-	return (a->type == b->type) && (a->bw == b->bw) && (a->sgi == b->sgi) &&
-		(a->ldpc == b->ldpc) && (a->index == b->index) && ant_match;
-}
-
 /* Check if both rates share the same column */
 static inline bool rs_rate_column_match(struct rs_rate *a,
 					struct rs_rate *b)
@@ -1182,12 +1154,12 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	u32 lq_hwrate;
 	struct rs_rate lq_rate, tx_resp_rate;
 	struct iwl_scale_tbl_info *curr_tbl, *other_tbl, *tmp_tbl;
-	u8 reduced_txp = (uintptr_t)info->status.status_driver_data[0];
+	u32 tlc_info = (uintptr_t)info->status.status_driver_data[0];
+	u8 reduced_txp = tlc_info & RS_DRV_DATA_TXP_MSK;
+	u8 lq_color = RS_DRV_DATA_LQ_COLOR_GET(tlc_info);
 	u32 tx_resp_hwrate = (uintptr_t)info->status.status_driver_data[1];
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_lq_sta *lq_sta = &mvmsta->lq_sta;
-	bool allow_ant_mismatch = fw_has_api(&mvm->fw->ucode_capa,
-					     IWL_UCODE_TLV_API_LQ_SS_PARAMS);
 
 	/* Treat uninitialized rate scaling data same as non-existing. */
 	if (!lq_sta) {
@@ -1262,10 +1234,10 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	rs_rate_from_ucode_rate(lq_hwrate, info->band, &lq_rate);
 
 	/* Here we actually compare this rate to the latest LQ command */
-	if (!rs_rate_equal(&tx_resp_rate, &lq_rate, allow_ant_mismatch)) {
+	if (lq_color != LQ_FLAG_COLOR_GET(table->flags)) {
 		IWL_DEBUG_RATE(mvm,
-			       "initial tx resp rate 0x%x does not match 0x%x\n",
-			       tx_resp_hwrate, lq_hwrate);
+			       "tx resp color 0x%x does not match 0x%x\n",
+			       lq_color, LQ_FLAG_COLOR_GET(table->flags));
 
 		/*
 		 * Since rates mis-match, the last LQ command may have failed.
@@ -3326,6 +3298,7 @@ static void rs_build_rates_table(struct iwl_mvm *mvm,
 	u8 valid_tx_ant = 0;
 	struct iwl_lq_cmd *lq_cmd = &lq_sta->lq;
 	bool toggle_ant = false;
+	u32 color;
 
 	memcpy(&rate, initial_rate, sizeof(rate));
 
@@ -3380,6 +3353,9 @@ static void rs_build_rates_table(struct iwl_mvm *mvm,
 				 num_rates, num_retries, valid_tx_ant,
 				 toggle_ant);
 
+	/* update the color of the LQ command (as a counter at bits 1-3) */
+	color = LQ_FLAGS_COLOR_INC(LQ_FLAG_COLOR_GET(lq_cmd->flags));
+	lq_cmd->flags = LQ_FLAG_COLOR_SET(lq_cmd->flags, color);
 }
 
 struct rs_bfer_active_iter_data {

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
index ee207f2..3abde1c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h

@@ -2,6 +2,7 @@
  *
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -357,6 +358,20 @@ struct iwl_lq_sta {
 	} pers;
 };
 
+/* ieee80211_tx_info's status_driver_data[0] is packed with lq color and txp
+ * Note, it's iwlmvm <-> mac80211 interface.
+ * bits 0-7: reduced tx power
+ * bits 8-10: LQ command's color
+ */
+#define RS_DRV_DATA_TXP_MSK 0xff
+#define RS_DRV_DATA_LQ_COLOR_POS 8
+#define RS_DRV_DATA_LQ_COLOR_MSK (7 << RS_DRV_DATA_LQ_COLOR_POS)
+#define RS_DRV_DATA_LQ_COLOR_GET(_f) (((_f) & RS_DRV_DATA_LQ_COLOR_MSK) >>\
+				      RS_DRV_DATA_LQ_COLOR_POS)
+#define RS_DRV_DATA_PACK(_c, _p) ((void *)(uintptr_t)\
+				  (((uintptr_t)_p) |\
+				   ((_c) << RS_DRV_DATA_LQ_COLOR_POS)))
+
 /* Initialize station's rate scaling information after adding station */
 void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 			  enum nl80211_band band, bool init);

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index f5c786d..614d678 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c

@@ -2120,7 +2120,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	if (!iwl_mvm_is_dqa_supported(mvm))
 		return 0;
 
-	if (WARN_ON(vif->type != NL80211_IFTYPE_AP))
+	if (WARN_ON(vif->type != NL80211_IFTYPE_AP &&
+		    vif->type != NL80211_IFTYPE_ADHOC))
 		return -ENOTSUPP;
 
 	/*
@@ -2155,6 +2156,16 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		mvmvif->cab_queue = queue;
 	} else if (!fw_has_api(&mvm->fw->ucode_capa,
 			       IWL_UCODE_TLV_API_STA_TYPE)) {
+		/*
+		 * In IBSS, ieee80211_check_queues() sets the cab_queue to be
+		 * invalid, so make sure we use the queue we want.
+		 * Note that this is done here as we want to avoid making DQA
+		 * changes in mac80211 layer.
+		 */
+		if (vif->type == NL80211_IFTYPE_ADHOC) {
+			vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
+			mvmvif->cab_queue = vif->cab_queue;
+		}
 		iwl_mvm_enable_txq(mvm, vif->cab_queue, vif->cab_queue, 0,
 				   &cfg, timeout);
 	}
@@ -3321,18 +3332,15 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 
 	/* Get the station from the mvm local station table */
 	mvm_sta = iwl_mvm_get_key_sta(mvm, vif, sta);
-	if (!mvm_sta) {
-		IWL_ERR(mvm, "Failed to find station\n");
-		return -EINVAL;
-	}
-	sta_id = mvm_sta->sta_id;
+	if (mvm_sta)
+		sta_id = mvm_sta->sta_id;
 
 	IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
 		      keyconf->keyidx, sta_id);
 
-	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
-	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
-	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
+	if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+			keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+			keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256))
 		return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true);
 
 	if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) {

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 2716cb5..ad62b67 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h

@@ -313,6 +313,7 @@ enum iwl_mvm_agg_state {
  *	This is basically (last acked packet++).
  * @rate_n_flags: Rate at which Tx was attempted. Holds the data between the
  *	Tx response (TX_CMD), and the block ack notification (COMPRESSED_BA).
+ * @lq_color: the color of the LQ command as it appears in tx response.
  * @amsdu_in_ampdu_allowed: true if A-MSDU in A-MPDU is allowed.
  * @state: state of the BA agreement establishment / tear down.
  * @txq_id: Tx queue used by the BA session / DQA
@@ -331,6 +332,7 @@ struct iwl_mvm_tid_data {
 	u16 next_reclaimed;
 	/* The rest is Tx AGG related */
 	u32 rate_n_flags;
+	u8 lq_color;
 	bool amsdu_in_ampdu_allowed;
 	enum iwl_mvm_agg_state state;
 	u16 txq_id;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index f9cbd19..506d581 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c

@@ -790,11 +790,13 @@ static int iwl_mvm_tcool_set_cur_state(struct thermal_cooling_device *cdev,
 	struct iwl_mvm *mvm = (struct iwl_mvm *)(cdev->devdata);
 	int ret;
 
-	if (!mvm->ucode_loaded || !(mvm->cur_ucode == IWL_UCODE_REGULAR))
-		return -EIO;
-
 	mutex_lock(&mvm->mutex);
 
+	if (!mvm->ucode_loaded || !(mvm->cur_ucode == IWL_UCODE_REGULAR)) {
+		ret = -EIO;
+		goto unlock;
+	}
+
 	if (new_state >= ARRAY_SIZE(iwl_mvm_cdev_budgets)) {
 		ret = -EINVAL;
 		goto unlock;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index bcaceb64..f21901c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c

@@ -1323,6 +1323,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	struct iwl_mvm_sta *mvmsta;
 	struct sk_buff_head skbs;
 	u8 skb_freed = 0;
+	u8 lq_color;
 	u16 next_reclaimed, seq_ctl;
 	bool is_ndp = false;
 
@@ -1405,8 +1406,9 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		info->status.tx_time =
 			le16_to_cpu(tx_resp->wireless_media_time);
 		BUILD_BUG_ON(ARRAY_SIZE(info->status.status_driver_data) < 1);
+		lq_color = TX_RES_RATE_TABLE_COL_GET(tx_resp->tlc_info);
 		info->status.status_driver_data[0] =
-				(void *)(uintptr_t)tx_resp->reduced_tpc;
+			RS_DRV_DATA_PACK(lq_color, tx_resp->reduced_tpc);
 
 		ieee80211_tx_status(mvm->hw, skb);
 	}
@@ -1638,6 +1640,9 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
 			le32_to_cpu(tx_resp->initial_rate);
 		mvmsta->tid_data[tid].tx_time =
 			le16_to_cpu(tx_resp->wireless_media_time);
+		mvmsta->tid_data[tid].lq_color =
+			(tx_resp->tlc_info & TX_RES_RATE_TABLE_COLOR_MSK) >>
+			TX_RES_RATE_TABLE_COLOR_POS;
 	}
 
 	rcu_read_unlock();
@@ -1707,6 +1712,11 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
 	iwl_mvm_check_ratid_empty(mvm, sta, tid);
 
 	freed = 0;
+
+	/* pack lq color from tid_data along the reduced txp */
+	ba_info->status.status_driver_data[0] =
+		RS_DRV_DATA_PACK(tid_data->lq_color,
+				 ba_info->status.status_driver_data[0]);
 	ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate;
 
 	skb_queue_walk(&reclaimed_skbs, skb) {

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 70acf85..93cbc7a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c

@@ -2803,7 +2803,8 @@ static struct iwl_trans_dump_data
 #ifdef CONFIG_PM_SLEEP
 static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
 {
-	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3 &&
+	    (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3))
 		return iwl_pci_fw_enter_d0i3(trans);
 
 	return 0;
@@ -2811,7 +2812,8 @@ static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
 
 static void iwl_trans_pcie_resume(struct iwl_trans *trans)
 {
-	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3 &&
+	    (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3))
 		iwl_pci_fw_exit_d0i3(trans);
 }
 #endif /* CONFIG_PM_SLEEP */

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 9fb46a6f..9c9bfbb 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c

@@ -906,7 +906,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 
 	if (WARN_ON(iwl_rx_packet_payload_len(hcmd.resp_pkt) != sizeof(*rsp))) {
 		ret = -EINVAL;
-		goto error;
+		goto error_free_resp;
 	}
 
 	rsp = (void *)hcmd.resp_pkt->data;
@@ -915,13 +915,13 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 	if (qid > ARRAY_SIZE(trans_pcie->txq)) {
 		WARN_ONCE(1, "queue index %d unsupported", qid);
 		ret = -EIO;
-		goto error;
+		goto error_free_resp;
 	}
 
 	if (test_and_set_bit(qid, trans_pcie->queue_used)) {
 		WARN_ONCE(1, "queue %d already used", qid);
 		ret = -EIO;
-		goto error;
+		goto error_free_resp;
 	}
 
 	txq->id = qid;
@@ -934,8 +934,11 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 			   (txq->write_ptr) | (qid << 16));
 	IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid);
 
+	iwl_free_resp(&hcmd);
 	return qid;
 
+error_free_resp:
+	iwl_free_resp(&hcmd);
 error:
 	iwl_pcie_gen2_txq_free_memory(trans, txq);
 	return ret;

diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index b2c6b06..ff153ce 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c

@@ -2544,7 +2544,7 @@ static int prism2_ioctl_priv_prism2_param(struct net_device *dev,
 			ret = -EINVAL;
 		}
 		if (local->iw_mode == IW_MODE_MASTER) {
-			wait_queue_t __wait;
+			wait_queue_entry_t __wait;
 			init_waitqueue_entry(&__wait, current);
 			add_wait_queue(&local->hostscan_wq, &__wait);
 			set_current_state(TASK_INTERRUPTIBLE);

diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 544fc09..1372b20 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c

@@ -73,7 +73,7 @@ struct net_device * hostap_add_interface(struct local_info *local,
 	dev->mem_end = mdev->mem_end;
 
 	hostap_setup_dev(dev, local, type);
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 
 	sprintf(dev->name, "%s%s", prefix, name);
 	if (!rtnl_locked)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 002b25c..c854a55 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c

@@ -2861,7 +2861,7 @@ static const struct net_device_ops hwsim_netdev_ops = {
 static void hwsim_mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &hwsim_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211_RADIOTAP;

diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c
index e350020..dde065d 100644
--- a/drivers/net/wireless/marvell/libertas/main.c
+++ b/drivers/net/wireless/marvell/libertas/main.c

@@ -453,7 +453,7 @@ static int lbs_thread(void *data)
 {
 	struct net_device *dev = data;
 	struct lbs_private *priv = dev->ml_priv;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	lbs_deb_enter(LBS_DEB_THREAD);
 

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index dd87b9f..39b6b5e 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c

@@ -1280,7 +1280,7 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
 			      struct net_device *dev)
 {
 	dev->netdev_ops = &mwifiex_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	/* Initialize private structure */
 	priv->current_key_index = 0;
 	priv->media_connected = false;

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 530586b..5b1d2e8 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h

@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;
 	u64 credit_window_start;
+	bool rate_limited;
 
 	/* Statistics */
 	struct xenvif_stats stats;

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 8397f6c..e322a86 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c

@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
 
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
-		xenvif_napi_schedule_or_enable_events(queue);
+		/* If the queue is rate-limited, it shall be
+		 * rescheduled in the timer callback.
+		 */
+		if (likely(!queue->rate_limited))
+			xenvif_napi_schedule_or_enable_events(queue);
 	}
 
 	return work_done;

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 602d408..5042ff8 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c

@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
 	queue->remaining_credit = min(max_credit, max_burst);
+	queue->rate_limited = false;
 }
 
 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		msecs_to_jiffies(queue->credit_usec / 1000);
 
 	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&queue->credit_timeout))
+	if (timer_pending(&queue->credit_timeout)) {
+		queue->rate_limited = true;
 		return true;
+	}
 
 	/* Passed the point where we can replenish credit? */
 	if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
 		mod_timer(&queue->credit_timeout,
 			  next_credit);
 		queue->credit_window_start = next_credit;
+		queue->rate_limited = true;
 
 		return true;
 	}

diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index c002384..7b3b6fd 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c

@@ -2878,7 +2878,7 @@ static const struct intel_ntb_reg skx_reg = {
 	.link_is_up		= xeon_link_is_up,
 	.db_ioread		= skx_db_ioread,
 	.db_iowrite		= skx_db_iowrite,
-	.db_size		= sizeof(u64),
+	.db_size		= sizeof(u32),
 	.ntb_ctl		= SKX_NTBCNTL_OFFSET,
 	.mw_bar			= {2, 4},
 };

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 02ca45f..10e5bf4 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c

@@ -177,14 +177,12 @@ struct ntb_transport_qp {
 	u64 rx_err_ver;
 	u64 rx_memcpy;
 	u64 rx_async;
-	u64 dma_rx_prep_err;
 	u64 tx_bytes;
 	u64 tx_pkts;
 	u64 tx_ring_full;
 	u64 tx_err_no_buf;
 	u64 tx_memcpy;
 	u64 tx_async;
-	u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -254,8 +252,6 @@ enum {
 #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES	100
 #define NTB_LINK_DOWN_TIMEOUT	10
-#define DMA_RETRIES		20
-#define DMA_OUT_RESOURCE_TO	msecs_to_jiffies(50)
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -516,12 +512,6 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
 	out_offset += snprintf(buf + out_offset, out_count - out_offset,
 			       "free tx - \t%u\n",
 			       ntb_transport_tx_free_entry(qp));
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
-			       "DMA tx prep err - \t%llu\n",
-			       qp->dma_tx_prep_err);
-	out_offset += snprintf(buf + out_offset, out_count - out_offset,
-			       "DMA rx prep err - \t%llu\n",
-			       qp->dma_rx_prep_err);
 
 	out_offset += snprintf(buf + out_offset, out_count - out_offset,
 			       "\n");
@@ -623,7 +613,7 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
 	if (!mw->virt_addr)
 		return -ENOMEM;
 
-	if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+	if (mw_num < qp_count % mw_count)
 		num_qps_mw = qp_count / mw_count + 1;
 	else
 		num_qps_mw = qp_count / mw_count;
@@ -768,8 +758,6 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
 	qp->tx_err_no_buf = 0;
 	qp->tx_memcpy = 0;
 	qp->tx_async = 0;
-	qp->dma_tx_prep_err = 0;
-	qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1000,7 +988,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
 	qp->event_handler = NULL;
 	ntb_qp_link_down_reset(qp);
 
-	if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+	if (mw_num < qp_count % mw_count)
 		num_qps_mw = qp_count / mw_count + 1;
 	else
 		num_qps_mw = qp_count / mw_count;
@@ -1128,8 +1116,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	qp_count = ilog2(qp_bitmap);
 	if (max_num_clients && max_num_clients < qp_count)
 		qp_count = max_num_clients;
-	else if (mw_count < qp_count)
-		qp_count = mw_count;
+	else if (nt->mw_count < qp_count)
+		qp_count = nt->mw_count;
 
 	qp_bitmap &= BIT_ULL(qp_count) - 1;
 
@@ -1317,7 +1305,6 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	void *buf = entry->buf;
-	int retries = 0;
 
 	len = entry->len;
 	device = chan->device;
@@ -1346,22 +1333,11 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 
 	unmap->from_cnt = 1;
 
-	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan,
-						     unmap->addr[1],
-						     unmap->addr[0], len,
-						     DMA_PREP_INTERRUPT);
-		if (txd)
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(DMA_OUT_RESOURCE_TO);
-	}
-
-	if (!txd) {
-		qp->dma_rx_prep_err++;
+	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
 		goto err_get_unmap;
-	}
 
 	txd->callback_result = ntb_rx_copy_callback;
 	txd->callback_param = entry;
@@ -1606,7 +1582,6 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 	struct dmaengine_unmap_data *unmap;
 	dma_addr_t dest;
 	dma_cookie_t cookie;
-	int retries = 0;
 
 	device = chan->device;
 	dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
@@ -1628,21 +1603,10 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 
 	unmap->to_cnt = 1;
 
-	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan, dest,
-						     unmap->addr[0], len,
-						     DMA_PREP_INTERRUPT);
-		if (txd)
-			break;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(DMA_OUT_RESOURCE_TO);
-	}
-
-	if (!txd) {
-		qp->dma_tx_prep_err++;
+	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
+	if (!txd)
 		goto err_get_unmap;
-	}
 
 	txd->callback_result = ntb_tx_copy_callback;
 	txd->callback_param = entry;

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 434e1d4..5cab283 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c

@@ -90,11 +90,11 @@ MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
 
 static unsigned int seg_order = 19; /* 512K */
 module_param(seg_order, uint, 0644);
-MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
 
 static unsigned int run_order = 32; /* 4G */
 module_param(run_order, uint, 0644);
-MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
 
 static bool use_dma; /* default to 0 */
 module_param(use_dma, bool, 0644);

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a..f12d23c 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c

@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 					"io error in %s sector %lld, len %d,\n",
 					(rw == READ) ? "READ" : "WRITE",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 
 	blk_queue_make_request(q, nd_blk_make_request);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	q->queuedata = nsblk;

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b..b6ba061 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c

@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	 * another kernel subsystem, and we just pass it through.
 	 */
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		goto out;
 	}
 
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 					(op_is_write(bio_op(bio))) ? "WRITE" :
 					"READ",
 					(unsigned long long) iter.bi_sector, len);
-			bio->bi_error = err;
+			bio->bi_status = errno_to_blk_status(err);
 			break;
 		}
 	}
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
 	blk_queue_make_request(btt->btt_queue, btt_make_request);
 	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
 	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-	blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
 	btt->btt_queue->queuedata = btt;
 

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index ae00dc0..4c989bb 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c

@@ -222,13 +222,6 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 	return dev;
 }
 
-static bool uuid_is_null(u8 *uuid)
-{
-	static const u8 null_uuid[16];
-
-	return (memcmp(uuid, null_uuid, 16) == 0);
-}
-
 /**
  * nd_btt_arena_is_valid - check if the metadata layout is valid
  * @nd_btt:	device with BTT geometry and backing device info
@@ -249,7 +242,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
 		return false;
 
-	if (!uuid_is_null(super->parent_uuid))
+	if (!guid_is_null((guid_t *)&super->parent_uuid))
 		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
 			return false;
 

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d46..6b577af 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c

@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
 	return to_nd_region(to_dev(pmem)->parent);
 }
 
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
-		unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+		phys_addr_t offset, unsigned int len)
 {
 	struct device *dev = to_dev(pmem);
 	sector_t sector;
 	long cleared;
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 
 	sector = (offset - pmem->data_offset) / 512;
 
 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
 	if (cleared < len)
-		rc = -EIO;
+		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
 		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 	kunmap_atomic(mem);
 }
 
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
 		void *pmem_addr, unsigned int len)
 {
 	int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
 	rc = memcpy_mcsafe(mem + off, pmem_addr, len);
 	kunmap_atomic(mem);
 	if (rc)
-		return -EIO;
-	return 0;
+		return BLK_STS_IOERR;
+	return BLK_STS_OK;
 }
 
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, bool is_write,
 			sector_t sector)
 {
-	int rc = 0;
+	blk_status_t rc = BLK_STS_OK;
 	bool bad_pmem = false;
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
 	void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 	if (!is_write) {
 		if (unlikely(bad_pmem))
-			rc = -EIO;
+			rc = BLK_STS_IOERR;
 		else {
 			rc = read_pmem(page, off, pmem_addr, len);
 			flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	int rc = 0;
+	blk_status_t rc = 0;
 	bool do_acct;
 	unsigned long start;
 	struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 				bvec.bv_offset, op_is_write(bio_op(bio)),
 				iter.bi_sector);
 		if (rc) {
-			bio->bi_error = rc;
+			bio->bi_status = rc;
 			break;
 		}
 	}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, bool is_write)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
-	int rc;
+	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
 
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	if (rc == 0)
 		page_endio(page, is_write, 0);
 
-	return rc;
+	return blk_status_to_errno(rc);
 }
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
 	queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
 	q->queuedata = pmem;

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a6..46d6cb1 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig

@@ -13,18 +13,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called nvme.
 
-config BLK_DEV_NVME_SCSI
-	bool "SCSI emulation for NVMe device nodes"
-	depends on NVME_CORE
-	---help---
-	  This adds support for the SG_IO ioctl on the NVMe character
-	  and block devices nodes, as well as a translation for a small
-	  number of selected SCSI commands to NVMe commands to the NVMe
-	  driver.  If you don't know what this means you probably want
-	  to say N here, unless you run a distro that abuses the SCSI
-	  emulation to provide stable device names for mount by id, like
-	  some OpenSuSE and SLES versions.
-
 config NVME_FABRICS
 	tristate
 

diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d94..cc0aacb 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile

@@ -5,7 +5,6 @@
 obj-$(CONFIG_NVME_FC)			+= nvme-fc.o
 
 nvme-core-y				:= core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)	+= scsi.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 
 nvme-y					+= pci.o

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d5e0906..d70df1d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c

@@ -27,7 +27,6 @@
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
-#include <scsi/sg.h>
 #include <asm/unaligned.h>
 
 #include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 EXPORT_SYMBOL_GPL(nvme_io_timeout);
 
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
@@ -56,7 +55,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
 
-static unsigned long default_ps_max_latency_us = 25000;
+static unsigned long default_ps_max_latency_us = 100000;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
@@ -65,34 +64,53 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+	if (!queue_work(nvme_wq, &ctrl->reset_work))
+		return -EBUSY;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+	int ret;
+
+	ret = nvme_reset_ctrl(ctrl);
+	if (!ret)
+		flush_work(&ctrl->reset_work);
+	return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
 {
 	switch (nvme_req(req)->status & 0x7ff) {
 	case NVME_SC_SUCCESS:
-		return 0;
+		return BLK_STS_OK;
 	case NVME_SC_CAP_EXCEEDED:
-		return -ENOSPC;
-	default:
-		return -EIO;
-
-	/*
-	 * XXX: these errors are a nasty side-band protocol to
-	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-	 * anywhere..
-	 */
-	case NVME_SC_CMD_SEQ_ERROR:
-		return -EILSEQ;
+		return BLK_STS_NOSPC;
 	case NVME_SC_ONCS_NOT_SUPPORTED:
-		return -EOPNOTSUPP;
+		return BLK_STS_NOTSUPP;
 	case NVME_SC_WRITE_FAULT:
 	case NVME_SC_READ_ERROR:
 	case NVME_SC_UNWRITTEN_BLOCK:
-		return -ENODATA;
+		return BLK_STS_MEDIUM;
+	default:
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		switch (old_state) {
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_LIVE:
-		case NVME_CTRL_RECONNECTING:
 			changed = true;
 			/* FALLTHRU */
 		default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+
+	c.directive.opcode = nvme_admin_directive_send;
+	c.directive.nsid = cpu_to_le32(0xffffffff);
+	c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+	c.directive.dtype = NVME_DIR_IDENTIFY;
+	c.directive.tdtype = NVME_DIR_STREAMS;
+	c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+	return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+				  struct streams_directive_params *s, u32 nsid)
+{
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	memset(s, 0, sizeof(*s));
+
+	c.directive.opcode = nvme_admin_directive_recv;
+	c.directive.nsid = cpu_to_le32(nsid);
+	c.directive.numd = sizeof(*s);
+	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+	c.directive.dtype = NVME_DIR_STREAMS;
+
+	return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+		return 0;
+	if (!streams)
+		return 0;
+
+	ret = nvme_enable_streams(ctrl);
+	if (ret)
+		return ret;
+
+	ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+	if (ret)
+		return ret;
+
+	ctrl->nssa = le16_to_cpu(s.nssa);
+	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+		dev_info(ctrl->device, "too few streams (%u) available\n",
+					ctrl->nssa);
+		nvme_disable_streams(ctrl);
+		return 0;
+	}
+
+	ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+	return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+				     struct request *req, u16 *control,
+				     u32 *dsmgmt)
+{
+	enum rw_hint streamid = req->write_hint;
+
+	if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+		streamid = 0;
+	else {
+		streamid--;
+		if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+			return;
+
+		*control |= NVME_RW_DTYPE_STREAMS;
+		*dsmgmt |= streamid << 16;
+	}
+
+	if (streamid < ARRAY_SIZE(req->q->write_hints))
+		req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
 static inline void nvme_setup_flush(struct nvme_ns *ns,
 		struct nvme_command *cmnd)
 {
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
 	unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 	if (!range)
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 
 	__rq_for_each_bio(bio, req) {
 		u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
 	if (WARN_ON_ONCE(n != segments)) {
 		kfree(range);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	req->special_vec.bv_len = sizeof(*range) * segments;
 	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-		struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+		struct request *req, struct nvme_command *cmnd)
 {
+	struct nvme_ctrl *ctrl = ns->ctrl;
 	u16 control = 0;
 	u32 dsmgmt = 0;
 
+	/*
+	 * If formated with metadata, require the block layer provide a buffer
+	 * unless this namespace is formated such that the metadata can be
+	 * stripped/generated by the controller with PRACT=1.
+	 */
+	if (ns && ns->ms &&
+	    (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+	    !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+		return BLK_STS_NOTSUPP;
+
 	if (req->cmd_flags & REQ_FUA)
 		control |= NVME_RW_FUA;
 	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+	if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
 	if (ns->ms) {
 		switch (ns->pi_type) {
 		case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+	return 0;
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd)
 {
-	int ret = BLK_MQ_RQ_QUEUE_OK;
+	blk_status_t ret = BLK_STS_OK;
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		break;
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
-		nvme_setup_rw(ns, req, cmd);
+		ret = nvme_setup_rw(ns, req, cmd);
 		break;
 	default:
 		WARN_ON_ONCE(1);
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 
 	cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 			result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
 
 	blk_mq_free_request(rq);
 
-	if (error) {
+	if (status) {
 		dev_err(ctrl->device,
-			"failed nvme_keep_alive_end_io error=%d\n", error);
+			"failed nvme_keep_alive_end_io error=%d\n",
+				status);
 		return;
 	}
 
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
 	if (nvme_keep_alive(ctrl)) {
 		/* allocation failure, reset the controller */
 		dev_err(ctrl->device, "keep-alive failed\n");
-		ctrl->ops->reset_ctrl(ctrl);
+		nvme_reset_ctrl(ctrl);
 		return;
 	}
 }
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
 	struct nvme_command c = { };
 	int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 	return error;
 }
 
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+	struct nvme_command c = { };
+	int status;
+	void *data;
+	int pos;
+	int len;
+
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.nsid = cpu_to_le32(nsid);
+	c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+	data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+				      NVME_IDENTIFY_DATA_SIZE);
+	if (status)
+		goto free_data;
+
+	for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+		struct nvme_ns_id_desc *cur = data + pos;
+
+		if (cur->nidl == 0)
+			break;
+
+		switch (cur->nidt) {
+		case NVME_NIDT_EUI64:
+			if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_EUI64_LEN;
+			memcpy(ns->eui, data + pos + sizeof(*cur), len);
+			break;
+		case NVME_NIDT_NGUID:
+			if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_NGUID_LEN;
+			memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+			break;
+		case NVME_NIDT_UUID:
+			if (cur->nidl != NVME_NIDT_UUID_LEN) {
+				dev_warn(ns->ctrl->device,
+					 "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+					 cur->nidl);
+				goto free_data;
+			}
+			len = NVME_NIDT_UUID_LEN;
+			uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+			break;
+		default:
+			/* Skip unnkown types */
+			len = cur->nidl;
+			break;
+		}
+
+		len += sizeof(*cur);
+	}
+free_data:
+	kfree(data);
+	return status;
+}
+
 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
 {
 	struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
 	return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
 
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 		struct nvme_id_ns **id)
 {
 	struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
 	return error;
 }
 
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-		      void *buffer, size_t buflen, u32 *result)
-{
-	struct nvme_command c;
-	union nvme_result res;
-	int ret;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(nsid);
-	c.features.fid = cpu_to_le32(fid);
-
-	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
-			NVME_QID_ANY, 0, 0);
-	if (ret >= 0 && result)
-		*result = le32_to_cpu(res.u32);
-	return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 		      void *buffer, size_t buflen, u32 *result)
 {
 	struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 	return ret;
 }
 
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
-	struct nvme_command c = { };
-	int error;
-
-	c.common.opcode = nvme_admin_get_log_page,
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-	c.common.cdw10[0] = cpu_to_le32(
-			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-			 NVME_LOG_SMART),
-
-	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-	if (!*log)
-		return -ENOMEM;
-
-	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-			sizeof(struct nvme_smart_log));
-	if (error)
-		kfree(*log);
-	return error;
-}
-
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
 	u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 	 * access to the admin queue, as that might be only way to fix them up.
 	 */
 	if (status > 0) {
-		dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+		dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
 		*count = 0;
 	} else {
 		nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 		return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
-	case SG_GET_VERSION_NUM:
-		return nvme_sg_get_version_num((void __user *)arg);
-	case SG_IO:
-		return nvme_sg_io(ns, (void __user *)arg);
-#endif
 	default:
 #ifdef CONFIG_NVM
 		if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
-	switch (cmd) {
-	case SG_IO:
-		return -ENOIOCTLCMD;
-	}
 	return nvme_ioctl(bdev, mode, cmd, arg);
 }
 #else
@@ -925,6 +1077,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+	struct nvme_ns *ns = disk->private_data;
+	u16 old_ms = ns->ms;
+	u8 pi_type = 0;
+
+	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+	/* PI implementation requires metadata equal t10 pi tuple size */
+	if (ns->ms == sizeof(struct t10_pi_tuple))
+		pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+
+	if (blk_get_integrity(disk) &&
+	    (ns->pi_type != pi_type || ns->ms != old_ms ||
+	     bs != queue_logical_block_size(disk->queue) ||
+	     (ns->ms && ns->ext)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+}
+
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 	struct blk_integrity integrity;
@@ -951,11 +1126,21 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 	blk_queue_max_integrity_segments(ns->queue, 1);
 }
 #else
+static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
+		u16 bs)
+{
+}
 static void nvme_init_integrity(struct nvme_ns *ns)
 {
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+	u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+	blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
 static void nvme_config_discard(struct nvme_ns *ns)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -964,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	ns->queue->limits.discard_alignment = logical_block_size;
-	ns->queue->limits.discard_granularity = logical_block_size;
+	if (ctrl->nr_streams && ns->sws && ns->sgs) {
+		unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+		ns->queue->limits.discard_alignment = sz;
+		ns->queue->limits.discard_granularity = sz;
+	} else {
+		ns->queue->limits.discard_alignment = logical_block_size;
+		ns->queue->limits.discard_granularity = logical_block_size;
+	}
 	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -989,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 	if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
 		memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
 	if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
-		memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+		memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+	if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+		 /* Don't treat error as fatal we potentially
+		  * already have a NGUID or EUI-64
+		  */
+		if (nvme_identify_ns_descs(ns, ns->ns_id))
+			dev_warn(ns->ctrl->device,
+				 "%s: Identify Descriptors failed\n", __func__);
+	}
 
 	return 0;
 }
@@ -997,37 +1197,26 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
 	struct nvme_ns *ns = disk->private_data;
-	u8 lbaf, pi_type;
-	u16 old_ms;
-	unsigned short bs;
-
-	old_ms = ns->ms;
-	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
-	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+	struct nvme_ctrl *ctrl = ns->ctrl;
+	u16 bs;
 
 	/*
 	 * If identify namespace failed, use default 512 byte block size so
 	 * block layer can use before failing read/write for 0 capacity.
 	 */
+	ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
 	if (ns->lba_shift == 0)
 		ns->lba_shift = 9;
 	bs = 1 << ns->lba_shift;
-	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
-	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
-					id->dps & NVME_NS_DPS_PI_MASK : 0;
+	ns->noiob = le16_to_cpu(id->noiob);
 
 	blk_mq_freeze_queue(disk->queue);
-	if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
-				ns->ms != old_ms ||
-				bs != queue_logical_block_size(disk->queue) ||
-				(ns->ms && ns->ext)))
-		blk_integrity_unregister(disk);
 
-	ns->pi_type = pi_type;
+	if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+		nvme_prep_integrity(disk, id, bs);
 	blk_queue_logical_block_size(ns->queue, bs);
-
+	if (ns->noiob)
+		nvme_set_chunk_size(ns);
 	if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
 		nvme_init_integrity(ns);
 	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1035,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-	if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+	if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 }
@@ -1271,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
-	unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+	unsigned long timeout = jiffies + (shutdown_timeout * HZ);
 	u32 csts;
 	int ret;
 
@@ -1330,7 +1519,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	 * transitioning between power states.  Therefore, when running
 	 * in any given state, we will enter the next lower-power
 	 * non-operational state after waiting 50 * (enlat + exlat)
-	 * microseconds, as long as that state's total latency is under
+	 * microseconds, as long as that state's exit latency is under
 	 * the requested maximum latency.
 	 *
 	 * We will not autonomously enter any non-operational state for
@@ -1360,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	if (!table)
 		return;
 
-	if (ctrl->ps_max_latency_us == 0) {
+	if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
 		/* Turn off APST. */
 		apste = 0;
 		dev_dbg(ctrl->device, "APST disabled\n");
@@ -1375,7 +1564,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 		 * lowest-power state, not the number of states.
 		 */
 		for (state = (int)ctrl->npss; state >= 0; state--) {
-			u64 total_latency_us, transition_ms;
+			u64 total_latency_us, exit_latency_us, transition_ms;
 
 			if (target)
 				table->entries[state] = target;
@@ -1396,12 +1585,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 			      NVME_PS_FLAGS_NON_OP_STATE))
 				continue;
 
-			total_latency_us =
-				(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
-				+ le32_to_cpu(ctrl->psd[state].exit_lat);
-			if (total_latency_us > ctrl->ps_max_latency_us)
+			exit_latency_us =
+				(u64)le32_to_cpu(ctrl->psd[state].exit_lat);
+			if (exit_latency_us > ctrl->ps_max_latency_us)
 				continue;
 
+			total_latency_us =
+				exit_latency_us +
+				le32_to_cpu(ctrl->psd[state].entry_lat);
+
 			/*
 			 * This state is good.  Use it as the APST idle
 			 * target for higher power states.
@@ -1513,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
 		string_matches(id->fr, q->fr, sizeof(id->fr));
 }
 
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+	size_t nqnlen;
+	int off;
+
+	nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+	if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+		strcpy(ctrl->subnqn, id->subnqn);
+		return;
+	}
+
+	if (ctrl->vs >= NVME_VS(1, 2, 1))
+		dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+	/* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+	off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+			"nqn.2014.08.org.nvmexpress:%4x%4x",
+			le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+	memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+	off += sizeof(id->sn);
+	memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+	off += sizeof(id->mn);
+	memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -1524,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	u64 cap;
 	int ret, page_shift;
 	u32 max_hw_sectors;
-	u8 prev_apsta;
+	bool prev_apst_enabled;
 
 	ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
 	if (ret) {
@@ -1548,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		return -EIO;
 	}
 
+	nvme_init_subnqn(ctrl, id);
+
 	if (!ctrl->identified) {
 		/*
 		 * Check for quirks.  Quirk can depend on firmware version,
@@ -1567,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	}
 
 	if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
-		dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+		dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
 		ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
 	}
 
@@ -1592,20 +1811,21 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->kas = le16_to_cpu(id->kas);
 
 	ctrl->npss = id->npss;
-	prev_apsta = ctrl->apsta;
+	ctrl->apsta = id->apsta;
+	prev_apst_enabled = ctrl->apst_enabled;
 	if (ctrl->quirks & NVME_QUIRK_NO_APST) {
 		if (force_apst && id->apsta) {
-			dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
-			ctrl->apsta = 1;
+			dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+			ctrl->apst_enabled = true;
 		} else {
-			ctrl->apsta = 0;
+			ctrl->apst_enabled = false;
 		}
 	} else {
-		ctrl->apsta = id->apsta;
+		ctrl->apst_enabled = id->apsta;
 	}
 	memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
-	if (ctrl->ops->is_fabrics) {
+	if (ctrl->ops->flags & NVME_F_FABRICS) {
 		ctrl->icdoff = le16_to_cpu(id->icdoff);
 		ctrl->ioccsz = le32_to_cpu(id->ioccsz);
 		ctrl->iorcsz = le32_to_cpu(id->iorcsz);
@@ -1619,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 			ret = -EINVAL;
 
 		if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
-			dev_err(ctrl->dev,
+			dev_err(ctrl->device,
 				"keep-alive support is mandatory for fabrics\n");
 			ret = -EINVAL;
 		}
 	} else {
 		ctrl->cntlid = le16_to_cpu(id->cntlid);
+		ctrl->hmpre = le32_to_cpu(id->hmpre);
+		ctrl->hmmin = le32_to_cpu(id->hmmin);
 	}
 
 	kfree(id);
 
-	if (ctrl->apsta && !prev_apsta)
+	if (ctrl->apst_enabled && !prev_apst_enabled)
 		dev_pm_qos_expose_latency_tolerance(ctrl->device);
-	else if (!ctrl->apsta && prev_apsta)
+	else if (!ctrl->apst_enabled && prev_apst_enabled)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
 	nvme_configure_apst(ctrl);
+	nvme_configure_directives(ctrl);
 
 	ctrl->identified = true;
 
@@ -1720,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		return nvme_dev_user_cmd(ctrl, argp);
 	case NVME_IOCTL_RESET:
 		dev_warn(ctrl->device, "resetting controller\n");
-		return ctrl->ops->reset_ctrl(ctrl);
+		return nvme_reset_ctrl_sync(ctrl);
 	case NVME_IOCTL_SUBSYS_RESET:
 		return nvme_reset_subsystem(ctrl);
 	case NVME_IOCTL_RESCAN:
@@ -1746,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 	int ret;
 
-	ret = ctrl->ops->reset_ctrl(ctrl);
+	ret = nvme_reset_ctrl_sync(ctrl);
 	if (ret < 0)
 		return ret;
 	return count;
@@ -1772,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 	int serial_len = sizeof(ctrl->serial);
 	int model_len = sizeof(ctrl->model);
 
-	if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
-		return sprintf(buf, "eui.%16phN\n", ns->uuid);
+	if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+		return sprintf(buf, "eui.%16phN\n", ns->nguid);
 
 	if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
 		return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1788,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
 
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+	return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
 								char *buf)
 {
 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-	return sprintf(buf, "%pU\n", ns->uuid);
+
+	/* For backward compatibility expose the NGUID to userspace if
+	 * we have no UUID set
+	 */
+	if (uuid_is_null(&ns->uuid)) {
+		printk_ratelimited(KERN_WARNING
+				   "No UUID available providing old NGUID\n");
+		return sprintf(buf, "%pU\n", ns->nguid);
+	}
+	return sprintf(buf, "%pU\n", &ns->uuid);
 }
 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
 
@@ -1815,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
 static struct attribute *nvme_ns_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
+	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
 	&dev_attr_nsid.attr,
 	NULL,
@@ -1827,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
 
 	if (a == &dev_attr_uuid.attr) {
-		if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+		if (uuid_is_null(&ns->uuid) ||
+		    !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+			return 0;
+	}
+	if (a == &dev_attr_nguid.attr) {
+		if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
 			return 0;
 	}
 	if (a == &dev_attr_eui.attr) {
@@ -1916,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
 {
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n",
-			ctrl->ops->get_subsysnqn(ctrl));
+	return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
 }
 static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
 
@@ -1946,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
 	NULL
 };
 
-#define CHECK_ATTR(ctrl, a, name)		\
-	if ((a) == &dev_attr_##name.attr &&	\
-	    !(ctrl)->ops->get_##name)		\
-		return 0
-
 static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-	if (a == &dev_attr_delete_controller.attr) {
-		if (!ctrl->ops->delete_ctrl)
-			return 0;
-	}
-
-	CHECK_ATTR(ctrl, a, subsysnqn);
-	CHECK_ATTR(ctrl, a, address);
+	if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+		return 0;
+	if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+		return 0;
 
 	return a->mode;
 }
@@ -2004,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	return ret;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	struct streams_directive_params s;
+	int ret;
+
+	if (!ctrl->nr_streams)
+		return 0;
+
+	ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+	if (ret)
+		return ret;
+
+	ns->sws = le32_to_cpu(s.sws);
+	ns->sgs = le16_to_cpu(s.sgs);
+
+	if (ns->sws) {
+		unsigned int bs = 1 << ns->lba_shift;
+
+		blk_queue_io_min(ns->queue, bs * ns->sws);
+		if (ns->sgs)
+			blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+	}
+
+	return 0;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
@@ -2033,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	nvme_set_queue_limits(ctrl, ns->queue);
+	nvme_setup_streams_ns(ctrl, ns);
 
 	sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
@@ -2041,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	if (nvme_nvm_ns_supported(ns, id) &&
 				nvme_nvm_register(ns, disk_name, node)) {
-		dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+		dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
 		goto out_free_id;
 	}
 
@@ -2098,7 +2362,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		if (ns->ndev)
 			nvme_nvm_unregister_sysfs(ns);
 		del_gendisk(ns->disk);
-		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
 
@@ -2217,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 	 * removal.
 	 */
 	if (ctrl->state == NVME_CTRL_LIVE)
-		schedule_work(&ctrl->scan_work);
+		queue_work(nvme_wq, &ctrl->scan_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
@@ -2272,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		/*FALLTHRU*/
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
-		schedule_work(&ctrl->async_event_work);
+		queue_work(nvme_wq, &ctrl->async_event_work);
 		break;
 	default:
 		break;
@@ -2295,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
 	ctrl->event_limit = NVME_NR_AERS;
-	schedule_work(&ctrl->async_event_work);
+	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
@@ -2427,6 +2690,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
+
+	/* Forcibly unquiesce queues to avoid blocking dispatch */
+	blk_mq_unquiesce_queue(ctrl->admin_q);
+
+	/* Forcibly start all queues to avoid having stuck requests */
+	blk_mq_start_hw_queues(ctrl->admin_q);
+
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2436,8 +2706,19 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 			continue;
 		revalidate_disk(ns->disk);
 		blk_set_queue_dying(ns->queue);
-		blk_mq_abort_requeue_list(ns->queue);
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		/* Forcibly unquiesce queues to avoid blocking dispatch */
+		blk_mq_unquiesce_queue(ns->queue);
+
+		/*
+		 * Forcibly start all queues to avoid having stuck requests.
+		 * Note that we must ensure the queues are not stopped
+		 * when the final removal happens.
+		 */
+		blk_mq_start_hw_queues(ns->queue);
+
+		/* draining requests in requeue list */
+		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2507,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		blk_mq_start_stopped_hw_queues(ns->queue, true);
+		blk_mq_unquiesce_queue(ns->queue);
 		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
@@ -2518,10 +2799,15 @@ int __init nvme_core_init(void)
 {
 	int result;
 
+	nvme_wq = alloc_workqueue("nvme-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvme_wq)
+		return -ENOMEM;
+
 	result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
 							&nvme_dev_fops);
 	if (result < 0)
-		return result;
+		goto destroy_wq;
 	else if (result > 0)
 		nvme_char_major = result;
 
@@ -2533,8 +2819,10 @@ int __init nvme_core_init(void)
 
 	return 0;
 
- unregister_chrdev:
+unregister_chrdev:
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+	destroy_workqueue(nvme_wq);
 	return result;
 }
 
@@ -2542,6 +2830,7 @@ void nvme_core_exit(void)
 {
 	class_destroy(nvme_class);
 	__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+	destroy_workqueue(nvme_wq);
 }
 
 MODULE_LICENSE("GPL");

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 990e6fb..2e582a2 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c

@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
 	kref_init(&host->ref);
 	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-	uuid_be_gen(&host->id);
 
 	list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
 		return NULL;
 
 	kref_init(&host->ref);
-	uuid_be_gen(&host->id);
 	snprintf(host->nqn, NVMF_NQN_SIZE,
 		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 EXPORT_SYMBOL_GPL(nvmf_get_address);
 
 /**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl:	Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
-	return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
  * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
  * @ctrl:	Host NVMe controller instance maintaining the admin
  *		queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 			}
 		}
 		break;
+
+	case NVME_SC_CONNECT_INVALID_HOST:
+		dev_err(ctrl->device,
+			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
+			data->subsysnqn, data->hostnqn);
+		break;
+
+	case NVME_SC_CONNECT_CTRL_BUSY:
+		dev_err(ctrl->device,
+			"Connect command failed: controller is busy or not available\n");
+		break;
+
+	case NVME_SC_CONNECT_FORMAT:
+		dev_err(ctrl->device,
+			"Connect incompatible format: %d",
+			cmd->connect.recfmt);
+		break;
+
 	default:
 		dev_err(ctrl->device,
 			"Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;
-
-	/*
-	 * fabrics spec sets a minimum of depth 32 for admin queue,
-	 * so set the queue with this depth always until
-	 * justification otherwise.
-	 */
-	cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+	cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 
 	/*
 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -395,7 +395,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(0xffff);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -454,7 +454,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+	uuid_copy(&data->hostid, &ctrl->opts->host->id);
 	data->cntlid = cpu_to_le16(ctrl->cntlid);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
 {
 	if (ctrl->opts->max_reconnects != -1 &&
-	    ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
 		return true;
 
 	return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
+	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
 	{ NVMF_OPT_ERR,			NULL			}
 };
 
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	int token, ret = 0;
 	size_t nqnlen  = 0;
 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+	uuid_t hostid;
 
 	/* Set defaults */
 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	if (!options)
 		return -ENOMEM;
 
+	uuid_gen(&hostid);
+
 	while ((p = strsep(&o, ",\n")) != NULL) {
 		if (!*p)
 			continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			}
 			opts->host_traddr = p;
 			break;
+		case NVMF_OPT_HOST_ID:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			if (uuid_parse(p, &hostid)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			break;
 		default:
 			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
 				p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 		opts->host = nvmf_default_host;
 	}
 
+	uuid_copy(&opts->host->id, &hostid);
+
 out:
 	if (!opts->discovery_nqn && !opts->kato)
 		opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
 
 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+				 NVMF_OPT_HOST_ID)
 
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
 		goto out_unlock;
 	}
 
+	if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+		dev_warn(ctrl->device,
+			"controller returned incorrect NQN: \"%s\".\n",
+			ctrl->subnqn);
+		mutex_unlock(&nvmf_transports_mutex);
+		ctrl->ops->delete_ctrl(ctrl);
+		return ERR_PTR(-EINVAL);
+	}
+
 	mutex_unlock(&nvmf_transports_mutex);
 	return ctrl;
 

diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index f5a9c1f..bf33663 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h

@@ -36,7 +36,7 @@ struct nvmf_host {
 	struct kref		ref;
 	struct list_head	list;
 	char			nqn[NVMF_NQN_SIZE];
-	uuid_be			id;
+	uuid_t			id;
 };
 
 /**
@@ -56,6 +56,7 @@ enum {
 	NVMF_OPT_RECONNECT_DELAY = 1 << 9,
 	NVMF_OPT_HOST_TRADDR	= 1 << 10,
 	NVMF_OPT_CTRL_LOSS_TMO	= 1 << 11,
+	NVMF_OPT_HOST_ID	= 1 << 12,
 };
 
 /**
@@ -80,7 +81,6 @@ enum {
  * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
  * @kato:	Keep-alive timeout.
  * @host:	Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
  * @max_reconnects: maximum number of allowed reconnect attempts before removing
  *              the controller, (-1) means reconnect forever, zero means remove
  *              immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
 	bool			discovery_nqn;
 	unsigned int		kato;
 	struct nvmf_host	*host;
-	int			nr_reconnects;
 	int			max_reconnects;
 };
 
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
 int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index dca7165..ed87214 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c

@@ -36,7 +36,7 @@
  */
 #define NVME_FC_NR_AEN_COMMANDS	1
 #define NVME_FC_AQ_BLKMQ_DEPTH	\
-	(NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
 #define AEN_CMDID_BASE		(NVME_FC_AQ_BLKMQ_DEPTH + 1)
 
 enum nvme_fc_queue_flags {
@@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
-#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
-
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -163,14 +161,12 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	tag_set;
 
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 	struct delayed_work	connect_work;
-	int			reconnect_delay;
-	int			connect_attempts;
 
 	struct kref		ref;
 	u32			flags;
 	u32			iocnt;
+	wait_queue_head_t	ioabort_wait;
 
 	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -218,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
 static DEFINE_IDA(nvme_fc_local_port_cnt);
 static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
-static struct workqueue_struct *nvme_fc_wq;
 
 
 
@@ -882,8 +877,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
 	assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
 	/* Linux supports only Dynamic controllers */
 	assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
-	memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id,
-		min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be)));
+	uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
 	strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
 		min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
 	strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
@@ -1143,6 +1137,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 /* *********************** NVME Ctrl Routines **************************** */
 
 static void __nvme_fc_final_op_cleanup(struct request *rq);
+static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
 
 static int
 nvme_fc_reinit_request(void *data, struct request *rq)
@@ -1245,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-		if (ctrl->flags & FCCTRL_TERMIO)
-			ctrl->iocnt--;
+		if (ctrl->flags & FCCTRL_TERMIO) {
+			if (!--ctrl->iocnt)
+				wake_up(&ctrl->ioabort_wait);
+		}
 	}
 	if (op->flags & FCOP_FLAGS_RELEASED)
 		complete_rq = true;
@@ -1269,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	struct nvme_command *sqe = &op->cmd_iu.sqe;
 	__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
 	union nvme_result result;
-	bool complete_rq;
+	bool complete_rq, terminate_assoc = true;
 
 	/*
 	 * WARNING:
@@ -1298,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	 * fabricate a CQE, the following fields will not be set as they
 	 * are not referenced:
 	 *      cqe.sqid,  cqe.sqhd,  cqe.command_id
+	 *
+	 * Failure or error of an individual i/o, in a transport
+	 * detected fashion unrelated to the nvme completion status,
+	 * potentially cause the initiator and target sides to get out
+	 * of sync on SQ head/tail (aka outstanding io count allowed).
+	 * Per FC-NVME spec, failure of an individual command requires
+	 * the connection to be terminated, which in turn requires the
+	 * association to be terminated.
 	 */
 
 	fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
@@ -1363,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		goto done;
 	}
 
+	terminate_assoc = false;
+
 done:
 	if (op->flags & FCOP_FLAGS_AEN) {
 		nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
@@ -1370,19 +1377,23 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		atomic_set(&op->state, FCPOP_STATE_IDLE);
 		op->flags = FCOP_FLAGS_AEN;	/* clear other flags */
 		nvme_fc_ctrl_put(ctrl);
-		return;
+		goto check_error;
 	}
 
 	complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
 	if (!complete_rq) {
 		if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-			status = cpu_to_le16(NVME_SC_ABORT_REQ);
+			status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
 			if (blk_queue_dying(rq->q))
-				status |= cpu_to_le16(NVME_SC_DNR);
+				status |= cpu_to_le16(NVME_SC_DNR << 1);
 		}
 		nvme_end_request(rq, status, result);
 	} else
 		__nvme_fc_final_op_cleanup(rq);
+
+check_error:
+	if (terminate_assoc)
+		nvme_fc_error_recovery(ctrl, "transport detected io error");
 }
 
 static int
@@ -1439,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 {
 	struct nvme_fc_ctrl *ctrl = set->driver_data;
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-	struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
-	return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
-		unsigned int hctx_idx, unsigned int numa_node)
-{
-	struct nvme_fc_ctrl *ctrl = set->driver_data;
-	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-	struct nvme_fc_queue *queue = &ctrl->queues[0];
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 
 	return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
 }
@@ -1748,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 static void
 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
+	/* only proceed if in LIVE state - e.g. on first error */
+	if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+		return;
+
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: transport association error detected: %s\n",
 		ctrl->cnum, errmsg);
-	dev_info(ctrl->ctrl.device,
+	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
-	/* stop the queues on error, cleanup is in reset thread */
-	if (ctrl->queue_count > 1)
-		nvme_stop_queues(&ctrl->ctrl);
-
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1765,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 		return;
 	}
 
-	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-		dev_err(ctrl->ctrl.device,
-			"NVME-FC{%d}: error_recovery: Failed to schedule "
-			"reset work\n", ctrl->cnum);
+	nvme_reset_ctrl(&ctrl->ctrl);
 }
 
 static enum blk_eh_timer_return
@@ -1877,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
  * level FC exchange resource that is also outstanding. This must be
  * considered in all cleanup operations.
  */
-static int
+static blk_status_t
 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	struct nvme_fc_fcp_op *op, u32 data_len,
 	enum nvmefc_fcp_datadir	io_dir)
@@ -1892,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * the target device is present
 	 */
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	if (!nvme_fc_ctrl_get(ctrl))
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 
 	/* format the FC-NVME CMD IU and fcp_req */
 	cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1943,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret < 0) {
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
-			return (ret == -ENOMEM || ret == -EAGAIN) ?
-				BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+			if (ret == -ENOMEM || ret == -EAGAIN)
+				return BLK_STS_RESOURCE;
+			return BLK_STS_IOERR;
 		}
 	}
 
@@ -1961,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 					queue->lldd_handle, &op->fcp_req);
 
 	if (ret) {
-		if (op->rq) {			/* normal request */
+		if (op->rq)			/* normal request */
 			nvme_fc_unmap_data(ctrl, op->rq, op);
-			nvme_cleanup_cmd(op->rq);
-		}
 		/* else - aen. no cleanup needed */
 
 		nvme_fc_ctrl_put(ctrl);
 
 		if (ret != -EBUSY)
-			return BLK_MQ_RQ_QUEUE_ERROR;
+			return BLK_STS_IOERR;
 
 		if (op->rq) {
 			blk_mq_stop_hw_queues(op->rq->q);
 			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
 		}
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
-static int
+static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
@@ -1995,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *sqe = &cmdiu->sqe;
 	enum nvmefc_fcp_datadir	io_dir;
 	u32 data_len;
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, rq, sqe);
 	if (ret)
@@ -2050,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	struct nvme_fc_fcp_op *aen_op;
 	unsigned long flags;
 	bool terminating = false;
-	int ret;
+	blk_status_t ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
@@ -2082,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
 	op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
 			FCOP_FLAGS_COMPLETE);
 
-	nvme_cleanup_cmd(rq);
 	nvme_fc_unmap_data(ctrl, rq, op);
 	nvme_complete_rq(rq);
 	nvme_fc_ctrl_put(ctrl);
@@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (!opts->nr_io_queues)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
@@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->queue_count == 1)
 		return 0;
 
-	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
-			opts->nr_io_queues);
-
 	nvme_fc_init_io_queues(ctrl);
 
 	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	int ret;
 	bool changed;
 
-	ctrl->connect_attempts++;
+	++ctrl->ctrl.nr_reconnects;
 
 	/*
 	 * Create the admin queue
@@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	ctrl->connect_attempts = 0;
-
-	kref_get(&ctrl->ctrl.kref);
+	ctrl->ctrl.nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
@@ -2491,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	/* wait for all io that had to be aborted */
 	spin_lock_irqsave(&ctrl->lock, flags);
-	while (ctrl->iocnt) {
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-		msleep(1000);
-		spin_lock_irqsave(&ctrl->lock, flags);
-	}
+	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
 	ctrl->flags &= ~FCCTRL_TERMIO;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
@@ -2525,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 	struct nvme_fc_ctrl *ctrl =
 		container_of(work, struct nvme_fc_ctrl, delete_work);
 
-	cancel_work_sync(&ctrl->reset_work);
+	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 
 	/*
@@ -2536,26 +2520,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	/*
 	 * tear down the controller
-	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
-	 * From there, the transport will tear down it's logical queues and
-	 * association.
+	 * After the last reference on the nvme ctrl is removed,
+	 * the transport nvme_fc_nvme_ctrl_freed() callback will be
+	 * invoked. From there, the transport will tear down it's
+	 * logical queues and association.
 	 */
 	nvme_uninit_ctrl(&ctrl->ctrl);
 
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
+static bool
+__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
+{
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+		return true;
+
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
+		return true;
+
+	return false;
+}
+
 static int
 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
 {
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-		return -EBUSY;
-
-	if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
-		return -EBUSY;
-
-	return 0;
+	return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
 }
 
 /*
@@ -2573,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 	ret = __nvme_fc_del_ctrl(ctrl);
 
 	if (!ret)
-		flush_workqueue(nvme_fc_wq);
+		flush_workqueue(nvme_wq);
 
 	nvme_put_ctrl(&ctrl->ctrl);
 
@@ -2581,83 +2571,62 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 }
 
 static void
+nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
+{
+	/* If we are resetting/deleting then do nothing */
+	if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
+		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
+			ctrl->ctrl.state == NVME_CTRL_LIVE);
+		return;
+	}
+
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+		ctrl->cnum, status);
+
+	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
+		queue_delayed_work(nvme_wq, &ctrl->connect_work,
+				ctrl->ctrl.opts->reconnect_delay * HZ);
+	} else {
+		dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->ctrl.nr_reconnects);
+		WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
+	}
+}
+
+static void
 nvme_fc_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_fc_ctrl *ctrl =
-			container_of(work, struct nvme_fc_ctrl, reset_work);
+		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
 	int ret;
 
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
 
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
-	dev_warn(ctrl->ctrl.device,
-		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-		return -EBUSY;
-
-	flush_work(&ctrl->reset_work);
-
-	return 0;
-}
-
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_fc_reset_nvme_ctrl,
 	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
 	.submit_async_event	= nvme_fc_submit_async_event,
 	.delete_ctrl		= nvme_fc_del_nvme_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
@@ -2671,34 +2640,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 				struct nvme_fc_ctrl, connect_work);
 
 	ret = nvme_fc_create_association(ctrl);
-	if (ret) {
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
-			ctrl->cnum, ret);
-		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached. Removing controller\n",
-				ctrl->cnum, ctrl->connect_attempts);
-
-			if (!nvme_change_ctrl_state(&ctrl->ctrl,
-				NVME_CTRL_DELETING)) {
-				dev_err(ctrl->ctrl.device,
-					"NVME-FC{%d}: failed to change state "
-					"to DELETING\n", ctrl->cnum);
-				return;
-			}
-
-			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
-			return;
-		}
-
-		dev_warn(ctrl->ctrl.device,
-			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
-			ctrl->cnum, ctrl->reconnect_delay);
-		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
-				ctrl->reconnect_delay * HZ);
-	} else
+	if (ret)
+		nvme_fc_reconnect_or_delete(ctrl, ret);
+	else
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: controller reconnect complete\n",
 			ctrl->cnum);
@@ -2708,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
 	.complete	= nvme_fc_complete_rq,
-	.init_request	= nvme_fc_init_admin_request,
+	.init_request	= nvme_fc_init_request,
 	.exit_request	= nvme_fc_exit_request,
 	.reinit_request	= nvme_fc_reinit_request,
 	.init_hctx	= nvme_fc_init_admin_hctx,
@@ -2753,9 +2697,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	kref_init(&ctrl->ref);
 
 	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
-	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -2821,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		nvme_uninit_ctrl(&ctrl->ctrl);
 		nvme_put_ctrl(&ctrl->ctrl);
 
+		/* Remove core ctrl ref. */
+		nvme_put_ctrl(&ctrl->ctrl);
+
 		/* as we're past the point where we transition to the ref
 		 * counting teardown path, if we return a bad pointer here,
 		 * the calling routine, thinking it's prior to the
@@ -2835,6 +2781,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		return ERR_PTR(ret);
 	}
 
+	kref_get(&ctrl->ctrl.kref);
+
 	dev_info(ctrl->ctrl.device,
 		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
 		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
@@ -2971,26 +2919,13 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 static struct nvmf_transport_ops nvme_fc_transport = {
 	.name		= "fc",
 	.required_opts	= NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
-	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY,
+	.allowed_opts	= NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
 	.create_ctrl	= nvme_fc_create_ctrl,
 };
 
 static int __init nvme_fc_init_module(void)
 {
-	int ret;
-
-	nvme_fc_wq = create_workqueue("nvme_fc_wq");
-	if (!nvme_fc_wq)
-		return -ENOMEM;
-
-	ret = nvmf_register_transport(&nvme_fc_transport);
-	if (ret)
-		goto err;
-
-	return 0;
-err:
-	destroy_workqueue(nvme_fc_wq);
-	return ret;
+	return nvmf_register_transport(&nvme_fc_transport);
 }
 
 static void __exit nvme_fc_exit_module(void)
@@ -3001,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
 
 	nvmf_unregister_transport(&nvme_fc_transport);
 
-	destroy_workqueue(nvme_fc_wq);
-
 	ida_destroy(&nvme_fc_local_port_cnt);
 	ida_destroy(&nvme_fc_ctrl_cnt);
 }

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5df78e..be85413 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c

@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
 }
 
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
 					rqd->bio->bi_iter.bi_sector));
 }
 
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
 
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
 	if (IS_ERR(rq)) {
 		kfree(cmd);
-		return -ENOMEM;
+		return PTR_ERR(rq);
 	}
 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
 
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.max_phys_sect		= 64,
 };
 
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
-	struct completion *waiting = rq->end_io_data;
-
-	complete(waiting);
-}
-
 static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 				struct nvme_ns *ns,
 				struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 	rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
 
 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-	rq->end_io_data = &wait;
 
 	if (ppa_buf && ppa_len) {
 		ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
 	}
 
 submit:
-	blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
-	wait_for_completion_io(&wait);
+	blk_execute_rq(q, NULL, rq, 0);
 
 	if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
 		ret = -EINTR;

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 29c708c..d70ff0f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h

@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
 extern unsigned char admin_timeout;
 #define ADMIN_TIMEOUT	(admin_timeout * HZ)
 
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT	(shutdown_timeout * HZ)
-
 #define NVME_DEFAULT_KATO	5
 #define NVME_KATO_GRACE		10
 
+extern struct workqueue_struct *nvme_wq;
+
 enum {
 	NVME_NS_LBA		= 0,
 	NVME_NS_LIGHTNVM	= 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
 	struct device *device;	/* char device */
 	struct list_head node;
 	struct ida ns_ida;
+	struct work_struct reset_work;
 
 	struct opal_dev *opal_dev;
 
@@ -138,6 +138,7 @@ struct nvme_ctrl {
 	char serial[20];
 	char model[40];
 	char firmware_rev[8];
+	char subnqn[NVMF_NQN_SIZE];
 	u16 cntlid;
 
 	u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
 	u16 oncs;
 	u16 vid;
 	u16 oacs;
+	u16 nssa;
+	u16 nr_streams;
 	atomic_t abort_limit;
 	u8 event_limit;
 	u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
+	bool apst_enabled;
+
+	u32 hmpre;
+	u32 hmmin;
 
 	/* Fabrics only */
 	u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
 	u32 iorcsz;
 	u16 icdoff;
 	u16 maxcmd;
+	int nr_reconnects;
 	struct nvmf_ctrl_options *opts;
 };
 
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
 struct nvme_ns {
 	struct list_head list;
 
@@ -189,14 +194,18 @@ struct nvme_ns {
 	int instance;
 
 	u8 eui[8];
-	u8 uuid[16];
+	u8 nguid[16];
+	uuid_t uuid;
 
 	unsigned ns_id;
 	int lba_shift;
 	u16 ms;
+	u16 sgs;
+	u32 sws;
 	bool ext;
 	u8 pi_type;
 	unsigned long flags;
+	u16 noiob;
 
 #define NVME_NS_REMOVING 0
 #define NVME_NS_DEAD     1
@@ -208,15 +217,15 @@ struct nvme_ns {
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
-	bool is_fabrics;
+	unsigned int flags;
+#define NVME_F_FABRICS			(1 << 0)
+#define NVME_F_METADATA_SUPPORTED	(1 << 1)
 	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
 	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
-	int (*reset_ctrl)(struct nvme_ctrl *ctrl);
 	void (*free_ctrl)(struct nvme_ctrl *ctrl);
 	void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
 	int (*delete_ctrl)(struct nvme_ctrl *ctrl);
-	const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
 	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
@@ -294,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void *buf, unsigned bufflen);
@@ -308,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 		void __user *ubuffer, unsigned bufflen,
 		void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
 		u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
-		struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-		      void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
-		      void *buffer, size_t buflen, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 
 #ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4c2ff2b..33c3b9d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c

@@ -17,28 +17,15 @@
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
 #include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/t10-pi.h>
 #include <linux/timer.h>
 #include <linux/types.h>
@@ -49,7 +36,6 @@
 #include "nvme.h"
 
 #define NVME_Q_DEPTH		1024
-#define NVME_AQ_DEPTH		256
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
 struct nvme_dev;
 struct nvme_queue;
 
-static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
@@ -92,9 +80,8 @@ struct nvme_dev {
 	int q_depth;
 	u32 db_stride;
 	void __iomem *bar;
-	struct work_struct reset_work;
+	unsigned long bar_mapped_size;
 	struct work_struct remove_work;
-	struct timer_list watchdog_timer;
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
+
+	/* shadow doorbell buffer support: */
 	u32 *dbbuf_dbs;
 	dma_addr_t dbbuf_dbs_dma_addr;
 	u32 *dbbuf_eis;
 	dma_addr_t dbbuf_eis_dma_addr;
+
+	/* host memory buffer support: */
+	u64 host_mem_size;
+	u32 nr_host_mem_descs;
+	struct nvme_host_mem_buf_desc *host_mem_descs;
+	void **host_mem_desc_bufs;
 };
 
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -263,7 +258,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
 	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
 
 	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
-		dev_warn(dev->dev, "unable to set dbbuf\n");
+		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
 		/* Free memory and continue on */
 		nvme_dbbuf_dma_free(dev);
 	}
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
 	nvmeq->tags = NULL;
 }
 
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
-		struct request *req, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	struct nvme_dev *dev = set->driver_data;
-	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = dev->queues[0];
-
-	BUG_ON(!nvmeq);
-	iod->nvmeq = nvmeq;
-	return 0;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int hctx_idx)
 {
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
 {
 	struct nvme_dev *dev = set->driver_data;
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+	int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+	struct nvme_queue *nvmeq = dev->queues[queue_idx];
 
 	BUG_ON(!nvmeq);
 	iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
 	return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
 	int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
 		iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
 		if (!iod->sg)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+			return BLK_STS_RESOURCE;
 	} else {
 		iod->sg = iod->inline_sg;
 	}
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->nents = 0;
 	iod->length = size;
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-	int ret = BLK_MQ_RQ_QUEUE_ERROR;
+	blk_status_t ret = BLK_STS_IOERR;
 
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
 				DMA_ATTR_NO_WARN))
 		goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	if (!nvme_setup_prps(dev, req))
 		goto out_unmap;
 
-	ret = BLK_MQ_RQ_QUEUE_ERROR;
+	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
 		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
 			goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
 	cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
 	if (blk_integrity_rq(req))
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_unmap:
 	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_command cmnd;
-	int ret = BLK_MQ_RQ_QUEUE_OK;
-
-	/*
-	 * If formated with metadata, require the block layer provide a buffer
-	 * unless this namespace is formated such that the metadata can be
-	 * stripped/generated by the controller with PRACT=1.
-	 */
-	if (ns && ns->ms && !blk_integrity_rq(req)) {
-		if (!(ns->pi_type && ns->ms == 8) &&
-		    !blk_rq_is_passthrough(req)) {
-			blk_mq_end_request(req, -EFAULT);
-			return BLK_MQ_RQ_QUEUE_OK;
-		}
-	}
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &cmnd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	ret = nvme_init_iod(req, dev);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		goto out_free_cmd;
 
-	if (blk_rq_nr_phys_segments(req))
+	if (blk_rq_nr_phys_segments(req)) {
 		ret = nvme_map_data(dev, req, &cmnd);
-
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
-		goto out_cleanup_iod;
+		if (ret)
+			goto out_cleanup_iod;
+	}
 
 	blk_mq_start_request(req);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_MQ_RQ_QUEUE_ERROR;
+		ret = BLK_STS_IOERR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out_cleanup_iod;
 	}
 	__nvme_submit_cmd(nvmeq, &cmnd);
 	nvme_process_cq(nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 out_cleanup_iod:
 	nvme_free_iod(dev, req);
 out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
 	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
 }
 
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 {
-	u16 head, phase;
+	u16 head = nvmeq->cq_head;
 
-	head = nvmeq->cq_head;
-	phase = nvmeq->cq_phase;
-
-	while (nvme_cqe_valid(nvmeq, head, phase)) {
-		struct nvme_completion cqe = nvmeq->cqes[head];
-		struct request *req;
-
-		if (++head == nvmeq->q_depth) {
-			head = 0;
-			phase = !phase;
-		}
-
-		if (tag && *tag == cqe.command_id)
-			*tag = -1;
-
-		if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-			dev_warn(nvmeq->dev->ctrl.device,
-				"invalid id %d completed on queue %d\n",
-				cqe.command_id, le16_to_cpu(cqe.sq_id));
-			continue;
-		}
-
-		/*
-		 * AEN requests are special as they don't time out and can
-		 * survive any kind of queue freeze and often don't respond to
-		 * aborts.  We don't even bother to allocate a struct request
-		 * for them but rather special case them here.
-		 */
-		if (unlikely(nvmeq->qid == 0 &&
-				cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
-			nvme_complete_async_event(&nvmeq->dev->ctrl,
-					cqe.status, &cqe.result);
-			continue;
-		}
-
-		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-		nvme_end_request(req, cqe.status, cqe.result);
-	}
-
-	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
-		return;
-
-	if (likely(nvmeq->cq_vector >= 0))
+	if (likely(nvmeq->cq_vector >= 0)) {
 		if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
 						      nvmeq->dbbuf_cq_ei))
 			writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
-	nvmeq->cq_head = head;
-	nvmeq->cq_phase = phase;
+	}
+}
 
-	nvmeq->cqe_seen = 1;
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+		struct nvme_completion *cqe)
+{
+	struct request *req;
+
+	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+		dev_warn(nvmeq->dev->ctrl.device,
+			"invalid id %d completed on queue %d\n",
+			cqe->command_id, le16_to_cpu(cqe->sq_id));
+		return;
+	}
+
+	/*
+	 * AEN requests are special as they don't time out and can
+	 * survive any kind of queue freeze and often don't respond to
+	 * aborts.  We don't even bother to allocate a struct request
+	 * for them but rather special case them here.
+	 */
+	if (unlikely(nvmeq->qid == 0 &&
+			cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+		nvme_complete_async_event(&nvmeq->dev->ctrl,
+				cqe->status, &cqe->result);
+		return;
+	}
+
+	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+	nvme_end_request(req, cqe->status, cqe->result);
+}
+
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+		struct nvme_completion *cqe)
+{
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+		*cqe = nvmeq->cqes[nvmeq->cq_head];
+
+		if (++nvmeq->cq_head == nvmeq->q_depth) {
+			nvmeq->cq_head = 0;
+			nvmeq->cq_phase = !nvmeq->cq_phase;
+		}
+		return true;
+	}
+	return false;
 }
 
 static void nvme_process_cq(struct nvme_queue *nvmeq)
 {
-	__nvme_process_cq(nvmeq, NULL);
+	struct nvme_completion cqe;
+	int consumed = 0;
+
+	while (nvme_read_cqe(nvmeq, &cqe)) {
+		nvme_handle_cqe(nvmeq, &cqe);
+		consumed++;
+	}
+
+	if (consumed) {
+		nvme_ring_cq_doorbell(nvmeq);
+		nvmeq->cqe_seen = 1;
+	}
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
-		spin_lock_irq(&nvmeq->q_lock);
-		__nvme_process_cq(nvmeq, &tag);
-		spin_unlock_irq(&nvmeq->q_lock);
+	struct nvme_completion cqe;
+	int found = 0, consumed = 0;
 
-		if (tag == -1)
-			return 1;
-	}
+	if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return 0;
 
-	return 0;
+	spin_lock_irq(&nvmeq->q_lock);
+	while (nvme_read_cqe(nvmeq, &cqe)) {
+		nvme_handle_cqe(nvmeq, &cqe);
+		consumed++;
+
+		if (tag == cqe.command_id) {
+			found = 1;
+			break;
+		}
+       }
+
+	if (consumed)
+		nvme_ring_cq_doorbell(nvmeq);
+	spin_unlock_irq(&nvmeq->q_lock);
+
+	return found;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
 	blk_mq_free_request(req);
 }
 
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+	/* If true, indicates loss of adapter communication, possibly by a
+	 * NVMe Subsystem reset.
+	 */
+	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+	/* If there is a reset ongoing, we shouldn't reset again. */
+	if (dev->ctrl.state == NVME_CTRL_RESETTING)
+		return false;
+
+	/* We shouldn't reset unless the controller is on fatal error state
+	 * _or_ if we lost the communication with it.
+	 */
+	if (!(csts & NVME_CSTS_CFS) && !nssro)
+		return false;
+
+	/* If PCI error recovery process is happening, we cannot reset or
+	 * the recovery mechanism will surely fail.
+	 */
+	if (pci_channel_offline(to_pci_dev(dev->dev)))
+		return false;
+
+	return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+	/* Read a config register to help see what died. */
+	u16 pci_status;
+	int result;
+
+	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+				      &pci_status);
+	if (result == PCIBIOS_SUCCESSFUL)
+		dev_warn(dev->ctrl.device,
+			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+			 csts, pci_status);
+	else
+		dev_warn(dev->ctrl.device,
+			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+			 csts, result);
+}
+
 static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *abort_req;
 	struct nvme_command cmd;
+	u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+	/*
+	 * Reset immediately if the controller is failed
+	 */
+	if (nvme_should_reset(dev, csts)) {
+		nvme_warn_reset(dev, csts);
+		nvme_dev_disable(dev, false);
+		nvme_reset_ctrl(&dev->ctrl);
+		return BLK_EH_HANDLED;
+	}
 
 	/*
 	 * Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 			 "I/O %d QID %d timeout, reset controller\n",
 			 req->tag, nvmeq->qid);
 		nvme_dev_disable(dev, false);
-		nvme_reset(dev);
+		nvme_reset_ctrl(&dev->ctrl);
 
 		/*
 		 * Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
 	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
 	.exit_hctx      = nvme_admin_exit_hctx,
-	.init_request	= nvme_admin_init_request,
+	.init_request	= nvme_init_request,
 	.timeout	= nvme_timeout,
 };
 
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 	return 0;
 }
 
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+	return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	if (size <= dev->bar_mapped_size)
+		return 0;
+	if (size > pci_resource_len(pdev, 0))
+		return -ENOMEM;
+	if (dev->bar)
+		iounmap(dev->bar);
+	dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+	if (!dev->bar) {
+		dev->bar_mapped_size = 0;
+		return -ENOMEM;
+	}
+	dev->bar_mapped_size = size;
+	dev->dbs = dev->bar + NVME_REG_DBS;
+
+	return 0;
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
+	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+	if (result < 0)
+		return result;
+
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
 						NVME_CAP_NSSRC(cap) : 0;
 
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
-	/* If true, indicates loss of adapter communication, possibly by a
-	 * NVMe Subsystem reset.
-	 */
-	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
-	/* If there is a reset ongoing, we shouldn't reset again. */
-	if (work_busy(&dev->reset_work))
-		return false;
-
-	/* We shouldn't reset unless the controller is on fatal error state
-	 * _or_ if we lost the communication with it.
-	 */
-	if (!(csts & NVME_CSTS_CFS) && !nssro)
-		return false;
-
-	/* If PCI error recovery process is happening, we cannot reset or
-	 * the recovery mechanism will surely fail.
-	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
-		return false;
-
-	return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
-	/* Read a config register to help see what died. */
-	u16 pci_status;
-	int result;
-
-	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
-				      &pci_status);
-	if (result == PCIBIOS_SUCCESSFUL)
-		dev_warn(dev->dev,
-			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
-			 csts, pci_status);
-	else
-		dev_warn(dev->dev,
-			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
-			 csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
-	struct nvme_dev *dev = (struct nvme_dev *)data;
-	u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-	/* Skip controllers under certain specific conditions. */
-	if (nvme_should_reset(dev, csts)) {
-		if (!nvme_reset(dev))
-			nvme_warn_reset(dev, csts);
-		return;
-	}
-
-	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
 static int nvme_create_io_queues(struct nvme_dev *dev)
 {
 	unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
 	}
 }
 
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
 {
-	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+	size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+	struct nvme_command c;
+	u64 dma_addr;
+	int ret;
+
+	dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+			DMA_TO_DEVICE);
+	if (dma_mapping_error(dev->dev, dma_addr))
+		return -ENOMEM;
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode	= nvme_admin_set_features;
+	c.features.fid		= cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+	c.features.dword11	= cpu_to_le32(bits);
+	c.features.dword12	= cpu_to_le32(dev->host_mem_size >>
+					      ilog2(dev->ctrl.page_size));
+	c.features.dword13	= cpu_to_le32(lower_32_bits(dma_addr));
+	c.features.dword14	= cpu_to_le32(upper_32_bits(dma_addr));
+	c.features.dword15	= cpu_to_le32(dev->nr_host_mem_descs);
+
+	ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+	if (ret) {
+		dev_warn(dev->ctrl.device,
+			 "failed to set host mem (err %d, flags %#x).\n",
+			 ret, bits);
+	}
+	dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+	return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->nr_host_mem_descs; i++) {
+		struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+		size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+		dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+				le64_to_cpu(desc->addr));
+	}
+
+	kfree(dev->host_mem_desc_bufs);
+	dev->host_mem_desc_bufs = NULL;
+	kfree(dev->host_mem_descs);
+	dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+{
+	struct nvme_host_mem_buf_desc *descs;
+	u32 chunk_size, max_entries, i = 0;
+	void **bufs;
+	u64 size, tmp;
+
+	/* start big and work our way down */
+	chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+	tmp = (preferred + chunk_size - 1);
+	do_div(tmp, chunk_size);
+	max_entries = tmp;
+	descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+	if (!descs)
+		goto out;
+
+	bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+	if (!bufs)
+		goto out_free_descs;
+
+	for (size = 0; size < preferred; size += chunk_size) {
+		u32 len = min_t(u64, chunk_size, preferred - size);
+		dma_addr_t dma_addr;
+
+		bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+				DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+		if (!bufs[i])
+			break;
+
+		descs[i].addr = cpu_to_le64(dma_addr);
+		descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+		i++;
+	}
+
+	if (!size || (min && size < min)) {
+		dev_warn(dev->ctrl.device,
+			"failed to allocate host memory buffer.\n");
+		goto out_free_bufs;
+	}
+
+	dev_info(dev->ctrl.device,
+		"allocated %lld MiB host memory buffer.\n",
+		size >> ilog2(SZ_1M));
+	dev->nr_host_mem_descs = i;
+	dev->host_mem_size = size;
+	dev->host_mem_descs = descs;
+	dev->host_mem_desc_bufs = bufs;
+	return 0;
+
+out_free_bufs:
+	while (--i >= 0) {
+		size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+		dma_free_coherent(dev->dev, size, bufs[i],
+				le64_to_cpu(descs[i].addr));
+	}
+
+	kfree(bufs);
+out_free_descs:
+	kfree(descs);
+out:
+	/* try a smaller chunk size if we failed early */
+	if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+		chunk_size /= 2;
+		goto retry;
+	}
+	dev->host_mem_descs = NULL;
+	return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+	u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+	u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+	u64 min = (u64)dev->ctrl.hmmin * 4096;
+	u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+	preferred = min(preferred, max);
+	if (min > max) {
+		dev_warn(dev->ctrl.device,
+			"min host memory (%lld MiB) above limit (%d MiB).\n",
+			min >> ilog2(SZ_1M), max_host_mem_size_mb);
+		nvme_free_host_mem(dev);
+		return;
+	}
+
+	/*
+	 * If we already have a buffer allocated check if we can reuse it.
+	 */
+	if (dev->host_mem_descs) {
+		if (dev->host_mem_size >= min)
+			enable_bits |= NVME_HOST_MEM_RETURN;
+		else
+			nvme_free_host_mem(dev);
+	}
+
+	if (!dev->host_mem_descs) {
+		if (nvme_alloc_host_mem(dev, min, preferred))
+			return;
+	}
+
+	if (nvme_set_host_mem(dev, enable_bits))
+		nvme_free_host_mem(dev);
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int result, nr_io_queues, size;
+	int result, nr_io_queues;
+	unsigned long size;
 
 	nr_io_queues = num_online_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 			nvme_release_cmb(dev);
 	}
 
-	size = db_bar_size(dev, nr_io_queues);
-	if (size > 8192) {
-		iounmap(dev->bar);
-		do {
-			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
-			if (dev->bar)
-				break;
-			if (!--nr_io_queues)
-				return -ENOMEM;
-			size = db_bar_size(dev, nr_io_queues);
-		} while (1);
-		dev->dbs = dev->bar + 4096;
-		adminq->q_db = dev->dbs;
-	}
+	do {
+		size = db_bar_size(dev, nr_io_queues);
+		result = nvme_remap_bar(dev, size);
+		if (!result)
+			break;
+		if (!--nr_io_queues)
+			return -ENOMEM;
+	} while (1);
+	adminq->q_db = dev->dbs;
 
 	/* Deregister the admin queue's interrupt */
 	pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return nvme_create_io_queues(dev);
 }
 
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
 	complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1740,8 +1905,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	 */
 	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
 		dev->q_depth = 2;
-		dev_warn(dev->dev, "detected Apple NVMe controller, set "
-			"queue depth=%u to work around controller resets\n",
+		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
+			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
 	}
 
@@ -1759,7 +1924,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		if (dev->cmbsz) {
 			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 						    &dev_attr_cmb.attr, NULL))
-				dev_warn(dev->dev,
+				dev_warn(dev->ctrl.device,
 					 "failed to add sysfs attribute for CMB\n");
 		}
 	}
@@ -1799,13 +1964,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	bool dead = true;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-	del_timer_sync(&dev->watchdog_timer);
-
 	mutex_lock(&dev->shutdown_lock);
 	if (pci_is_enabled(pdev)) {
 		u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-		if (dev->ctrl.state == NVME_CTRL_LIVE)
+		if (dev->ctrl.state == NVME_CTRL_LIVE ||
+		    dev->ctrl.state == NVME_CTRL_RESETTING)
 			nvme_start_freeze(&dev->ctrl);
 		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
 			pdev->error_state  != pci_channel_io_normal);
@@ -1815,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	 * Give the controller a chance to complete all entered requests if
 	 * doing a safe shutdown.
 	 */
-	if (!dead && shutdown)
-		nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+	if (!dead) {
+		if (shutdown)
+			nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+		/*
+		 * If the controller is still alive tell it to stop using the
+		 * host memory buffer.  In theory the shutdown / reset should
+		 * make sure that it doesn't access the host memoery anymore,
+		 * but I'd rather be safe than sorry..
+		 */
+		if (dev->host_mem_descs)
+			nvme_set_host_mem(dev, 0);
+
+	}
 	nvme_stop_queues(&dev->ctrl);
 
 	queues = dev->online_queues - 1;
@@ -1899,11 +2075,12 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
 static void nvme_reset_work(struct work_struct *work)
 {
-	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+	struct nvme_dev *dev =
+		container_of(work, struct nvme_dev, ctrl.reset_work);
 	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
 	int result = -ENODEV;
 
-	if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
+	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
 		goto out;
 
 	/*
@@ -1913,9 +2090,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
 
-	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
-		goto out;
-
 	result = nvme_pci_enable(dev);
 	if (result)
 		goto out;
@@ -1951,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
 				 "unable to allocate dma for dbbuf\n");
 	}
 
+	if (dev->ctrl.hmpre)
+		nvme_setup_host_mem(dev);
+
 	result = nvme_setup_io_queues(dev);
 	if (result)
 		goto out;
@@ -1964,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (dev->online_queues > 1)
 		nvme_queue_async_events(&dev->ctrl);
 
-	mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
 	 * any working I/O queue.
@@ -2005,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 	nvme_put_ctrl(&dev->ctrl);
 }
 
-static int nvme_reset(struct nvme_dev *dev)
-{
-	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
-		return -ENODEV;
-	if (work_busy(&dev->reset_work))
-		return -ENODEV;
-	if (!queue_work(nvme_workq, &dev->reset_work))
-		return -EBUSY;
-	return 0;
-}
-
 static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
 	*val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2034,23 +2198,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 	return 0;
 }
 
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
-	struct nvme_dev *dev = to_nvme_dev(ctrl);
-	int ret = nvme_reset(dev);
-
-	if (!ret)
-		flush_work(&dev->reset_work);
-	return ret;
-}
-
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
 	.name			= "pcie",
 	.module			= THIS_MODULE,
+	.flags			= NVME_F_METADATA_SUPPORTED,
 	.reg_read32		= nvme_pci_reg_read32,
 	.reg_write32		= nvme_pci_reg_write32,
 	.reg_read64		= nvme_pci_reg_read64,
-	.reset_ctrl		= nvme_pci_reset_ctrl,
 	.free_ctrl		= nvme_pci_free_ctrl,
 	.submit_async_event	= nvme_pci_submit_async_event,
 };
@@ -2062,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (pci_request_mem_regions(pdev, "nvme"))
 		return -ENODEV;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
+	if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
 		goto release;
 
 	return 0;
@@ -2117,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto free;
 
-	INIT_WORK(&dev->reset_work, nvme_reset_work);
+	INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
-	setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
-		(unsigned long)dev);
 	mutex_init(&dev->shutdown_lock);
 	init_completion(&dev->ioq_wait);
 
@@ -2135,9 +2286,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release_pools;
 
+	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
 	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
-	queue_work(nvme_workq, &dev->reset_work);
+	queue_work(nvme_wq, &dev->ctrl.reset_work);
 	return 0;
 
  release_pools:
@@ -2158,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 	if (prepare)
 		nvme_dev_disable(dev, false);
 	else
-		nvme_reset(dev);
+		nvme_reset_ctrl(&dev->ctrl);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -2178,6 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
+	cancel_work_sync(&dev->ctrl.reset_work);
 	pci_set_drvdata(pdev, NULL);
 
 	if (!pci_device_is_present(pdev)) {
@@ -2185,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
 		nvme_dev_disable(dev, false);
 	}
 
-	flush_work(&dev->reset_work);
+	flush_work(&dev->ctrl.reset_work);
 	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, true);
+	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
 	nvme_release_prp_pools(dev);
@@ -2228,7 +2382,7 @@ static int nvme_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-	nvme_reset(ndev);
+	nvme_reset_ctrl(&ndev->ctrl);
 	return 0;
 }
 #endif
@@ -2267,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 
 	dev_info(dev->ctrl.device, "restart after slot reset\n");
 	pci_restore_state(pdev);
-	nvme_reset(dev);
+	nvme_reset_ctrl(&dev->ctrl);
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -2293,6 +2447,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0a54),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DEALLOCATE_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
+		.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
@@ -2321,22 +2477,12 @@ static struct pci_driver nvme_driver = {
 
 static int __init nvme_init(void)
 {
-	int result;
-
-	nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
-	if (!nvme_workq)
-		return -ENOMEM;
-
-	result = pci_register_driver(&nvme_driver);
-	if (result)
-		destroy_workqueue(nvme_workq);
-	return result;
+	return pci_register_driver(&nvme_driver);
 }
 
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
-	destroy_workqueue(nvme_workq);
 	_nvme_check_size();
 }
 

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dd1c6de..6d4119d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c

@@ -48,7 +48,7 @@
  */
 #define NVME_RDMA_NR_AEN_COMMANDS      1
 #define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-	(NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
 
 struct nvme_rdma_device {
 	struct ib_device       *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
 };
 
 enum nvme_rdma_queue_flags {
-	NVME_RDMA_Q_CONNECTED = (1 << 0),
-	NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
-	NVME_RDMA_Q_DELETING = (1 << 2),
-	NVME_RDMA_Q_LIVE = (1 << 3),
+	NVME_RDMA_Q_LIVE		= 0,
+	NVME_RDMA_Q_DELETING		= 1,
 };
 
 struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
 };
 
 struct nvme_rdma_ctrl {
-	/* read and written in the hot path */
-	spinlock_t		lock;
-
 	/* read only in the hot path */
 	struct nvme_rdma_queue	*queues;
 	u32			queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
 	/* other member variables */
 	struct blk_mq_tag_set	tag_set;
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 	struct work_struct	err_work;
 
 	struct nvme_rdma_qe	async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(nvme_rdma_ctrl_list);
 static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
 
-static struct workqueue_struct *nvme_rdma_wq;
-
 /*
  * Disabling this option makes small I/O goes faster, but is fundamentally
  * unsafe.  With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
 	return ret;
 }
 
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
-		struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+		struct request *rq, unsigned int hctx_idx)
 {
+	struct nvme_rdma_ctrl *ctrl = set->driver_data;
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
 	struct nvme_rdma_device *dev = queue->device;
 
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
 			DMA_TO_DEVICE);
 }
 
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+		struct request *rq, unsigned int hctx_idx,
+		unsigned int numa_node)
 {
-	return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx)
-{
-	return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
-		struct request *rq, unsigned int queue_idx)
-{
+	struct nvme_rdma_ctrl *ctrl = set->driver_data;
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
 	struct nvme_rdma_device *dev = queue->device;
 	struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
 	return -ENOMEM;
 }
 
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
-		struct request *rq, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
 static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 		unsigned int hctx_idx)
 {
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	struct nvme_rdma_device *dev;
 	struct ib_device *ibdev;
 
-	if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
-		return;
-
 	dev = queue->device;
 	ibdev = dev->dev;
 	rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 	nvme_rdma_dev_put(dev);
 }
 
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
-		struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 {
-	struct ib_device *ibdev = dev->dev;
+	struct ib_device *ibdev;
 	const int send_wr_factor = 3;			/* MR, SEND, INV */
 	const int cq_factor = send_wr_factor + 1;	/* + RECV */
 	int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
 	int ret;
 
-	queue->device = dev;
+	queue->device = nvme_rdma_find_get_device(queue->cm_id);
+	if (!queue->device) {
+		dev_err(queue->cm_id->device->dev.parent,
+			"no client data found!\n");
+		return -ECONNREFUSED;
+	}
+	ibdev = queue->device->dev;
 
 	/*
 	 * The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 
 
 	/* +1 for ib_stop_cq */
-	queue->ib_cq = ib_alloc_cq(dev->dev, queue,
-				cq_factor * queue->queue_size + 1, comp_vector,
-				IB_POLL_SOFTIRQ);
+	queue->ib_cq = ib_alloc_cq(ibdev, queue,
+				cq_factor * queue->queue_size + 1,
+				comp_vector, IB_POLL_SOFTIRQ);
 	if (IS_ERR(queue->ib_cq)) {
 		ret = PTR_ERR(queue->ib_cq);
-		goto out;
+		goto out_put_dev;
 	}
 
 	ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 		ret = -ENOMEM;
 		goto out_destroy_qp;
 	}
-	set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
 	return 0;
 
@@ -532,7 +503,8 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 	ib_destroy_qp(queue->qp);
 out_destroy_ib_cq:
 	ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+	nvme_rdma_dev_put(queue->device);
 	return ret;
 }
 
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 	}
 
 	clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
-	set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
 
 	return 0;
 
 out_destroy_cm_id:
-	nvme_rdma_destroy_queue_ib(queue);
 	rdma_destroy_id(queue->cm_id);
 	return ret;
 }
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 	if (nvmf_should_reconnect(&ctrl->ctrl)) {
 		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
 			ctrl->ctrl.opts->reconnect_delay);
-		queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
 				ctrl->ctrl.opts->reconnect_delay * HZ);
 	} else {
 		dev_info(ctrl->ctrl.device, "Removing controller...\n");
-		queue_work(nvme_rdma_wq, &ctrl->delete_work);
+		queue_work(nvme_wq, &ctrl->delete_work);
 	}
 }
 
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
-	++ctrl->ctrl.opts->nr_reconnects;
+	++ctrl->ctrl.nr_reconnects;
 
 	if (ctrl->queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
@@ -749,40 +719,37 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto requeue;
 
-	ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (ret)
 		goto requeue;
 
-	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
-
 	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 	if (ret)
-		goto stop_admin_q;
+		goto requeue;
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
 	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (ret)
-		goto stop_admin_q;
+		goto requeue;
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
-			goto stop_admin_q;
+			goto requeue;
 
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
-			goto stop_admin_q;
+			goto requeue;
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
-	ctrl->ctrl.opts->nr_reconnects = 0;
+	ctrl->ctrl.nr_reconnects = 0;
 
 	if (ctrl->queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
 		nvme_queue_async_events(&ctrl->ctrl);
 	}
@@ -791,11 +758,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	return;
 
-stop_admin_q:
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
-			ctrl->ctrl.opts->nr_reconnects);
+			ctrl->ctrl.nr_reconnects);
 	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -807,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
-	for (i = 0; i < ctrl->queue_count; i++) {
-		clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+	for (i = 0; i < ctrl->queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
-	}
 
 	if (ctrl->queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
@@ -823,6 +786,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
 
+	/*
+	 * queues are not a live anymore, so restart the queues to fail fast
+	 * new IO
+	 */
+	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+	nvme_start_queues(&ctrl->ctrl);
+
 	nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -831,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
 		return;
 
-	queue_work(nvme_rdma_wq, &ctrl->err_work);
+	queue_work(nvme_wq, &ctrl->err_work);
 }
 
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1038,6 +1008,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
+static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+{
+	int sig_limit;
+
+	/*
+	 * We signal completion every queue depth/2 and also handle the
+	 * degenerated case of a  device with queue_depth=1, where we
+	 * would need to signal every message.
+	 */
+	sig_limit = max(queue->queue_size / 2, 1);
+	return (++queue->sig_count % sig_limit) == 0;
+}
+
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
 		struct ib_send_wr *first, bool flush)
@@ -1065,9 +1048,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * Would have been way to obvious to handle this in hardware or
 	 * at least the RDMA stack..
 	 *
-	 * This messy and racy code sniplet is copy and pasted from the iSER
-	 * initiator, and the magic '32' comes from there as well.
-	 *
 	 * Always signal the flushes. The magic request used for the flush
 	 * sequencer is not allocated in our driver's tagset and it's
 	 * triggered to be freed by blk_cleanup_queue(). So we need to
@@ -1075,7 +1055,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	 * embedded in request's payload, is not freed when __ib_process_cq()
 	 * calls wr_cqe->done().
 	 */
-	if ((++queue->sig_count % 32) == 0 || flush)
+	if (nvme_rdma_queue_sig_limit(queue) || flush)
 		wr.send_flags |= IB_SEND_SIGNALED;
 
 	if (first)
@@ -1266,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
 
 static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 {
-	struct nvme_rdma_device *dev;
 	int ret;
 
-	dev = nvme_rdma_find_get_device(queue->cm_id);
-	if (!dev) {
-		dev_err(queue->cm_id->device->dev.parent,
-			"no client data found!\n");
-		return -ECONNREFUSED;
-	}
-
-	ret = nvme_rdma_create_queue_ib(queue, dev);
-	if (ret) {
-		nvme_rdma_dev_put(dev);
-		goto out;
-	}
+	ret = nvme_rdma_create_queue_ib(queue);
+	if (ret)
+		return ret;
 
 	ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
 	if (ret) {
@@ -1294,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 
 out_destroy_queue:
 	nvme_rdma_destroy_queue_ib(queue);
-out:
 	return ret;
 }
 
@@ -1322,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 	 * specified by the Fabrics standard.
 	 */
 	if (priv.qid == 0) {
-		priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
-		priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+		priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+		priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 	} else {
 		/*
 		 * current interpretation of the fabrics spec
@@ -1371,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		complete(&queue->cm_done);
 		return 0;
 	case RDMA_CM_EVENT_REJECTED:
+		nvme_rdma_destroy_queue_ib(queue);
 		cm_error = nvme_rdma_conn_rejected(queue, ev);
 		break;
-	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_UNREACHABLE:
+		nvme_rdma_destroy_queue_ib(queue);
+	case RDMA_CM_EVENT_ADDR_ERROR:
 		dev_dbg(queue->ctrl->ctrl.device,
 			"CM error event %d\n", ev->event);
 		cm_error = -ECONNRESET;
@@ -1423,22 +1394,32 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 /*
  * We cannot accept any other command until the Connect command has completed.
  */
-static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
-		struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
 {
 	if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
 		struct nvme_command *cmd = nvme_req(rq)->cmd;
 
 		if (!blk_rq_is_passthrough(rq) ||
 		    cmd->common.opcode != nvme_fabrics_command ||
-		    cmd->fabrics.fctype != nvme_fabrics_type_connect)
-			return false;
+		    cmd->fabrics.fctype != nvme_fabrics_type_connect) {
+			/*
+			 * reconnecting state means transport disruption, which
+			 * can take a long time and even might fail permanently,
+			 * so we can't let incoming I/O be requeued forever.
+			 * fail it fast to allow upper layers a chance to
+			 * failover.
+			 */
+			if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
+				return BLK_STS_IOERR;
+			return BLK_STS_RESOURCE; /* try again later */
+		}
 	}
 
-	return true;
+	return 0;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1449,27 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command *c = sqe->data;
 	bool flush = false;
 	struct ib_device *dev;
-	int ret;
+	blk_status_t ret;
+	int err;
 
 	WARN_ON_ONCE(rq->tag < 0);
 
-	if (!nvme_rdma_queue_is_ready(queue, rq))
-		return BLK_MQ_RQ_QUEUE_BUSY;
+	ret = nvme_rdma_queue_is_ready(queue, rq);
+	if (unlikely(ret))
+		return ret;
 
 	dev = queue->device->dev;
 	ib_dma_sync_single_for_cpu(dev, sqe->dma,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	ret = nvme_setup_cmd(ns, rq, c);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	blk_mq_start_request(rq);
 
-	ret = nvme_rdma_map_data(queue, rq, c);
-	if (ret < 0) {
+	err = nvme_rdma_map_data(queue, rq, c);
+	if (err < 0) {
 		dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
+			     "Failed to map data (%d)\n", err);
 		nvme_cleanup_cmd(rq);
 		goto err;
 	}
@@ -1479,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (req_op(rq) == REQ_OP_FLUSH)
 		flush = true;
-	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-	if (ret) {
+	if (err) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 err:
-	return (ret == -ENOMEM || ret == -EAGAIN) ?
-		BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+	if (err == -ENOMEM || err == -EAGAIN)
+		return BLK_STS_RESOURCE;
+	return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1499,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	struct ib_wc wc;
 	int found = 0;
 
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	while (ib_poll_cq(cq, 1, &wc) > 0) {
 		struct ib_cqe *cqe = wc.wr_cqe;
 
@@ -1536,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
 static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
 	.queue_rq	= nvme_rdma_queue_rq,
 	.complete	= nvme_rdma_complete_rq,
-	.init_request	= nvme_rdma_init_admin_request,
-	.exit_request	= nvme_rdma_exit_admin_request,
+	.init_request	= nvme_rdma_init_request,
+	.exit_request	= nvme_rdma_exit_request,
 	.reinit_request	= nvme_rdma_reinit_request,
 	.init_hctx	= nvme_rdma_init_admin_hctx,
 	.timeout	= nvme_rdma_timeout,
@@ -1547,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 {
 	int error;
 
-	error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+	error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (error)
 		return error;
 
@@ -1648,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 		nvme_rdma_free_io_queues(ctrl);
 	}
 
-	if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+	if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1685,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 		return -EBUSY;
 
-	if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
 		return -EBUSY;
 
 	return 0;
@@ -1719,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
 
 static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
-	struct nvme_rdma_ctrl *ctrl = container_of(work,
-					struct nvme_rdma_ctrl, reset_work);
+	struct nvme_rdma_ctrl *ctrl =
+		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
 	int ret;
 	bool changed;
 
@@ -1761,36 +1744,19 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 del_dead_ctrl:
 	/* Deleting this dead controller... */
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-	WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
-		return -EBUSY;
-
-	flush_work(&ctrl->reset_work);
-
-	return 0;
+	WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 	.name			= "rdma",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_rdma_reset_ctrl,
 	.free_ctrl		= nvme_rdma_free_ctrl,
 	.submit_async_event	= nvme_rdma_submit_async_event,
 	.delete_ctrl		= nvme_rdma_del_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
@@ -1895,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 			nvme_rdma_reconnect_ctrl_work);
 	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
 	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
-	spin_lock_init(&ctrl->lock);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
 	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1915,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	/* sanity check icdoff */
 	if (ctrl->ctrl.icdoff) {
 		dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+		ret = -EINVAL;
 		goto out_remove_admin_queue;
 	}
 
 	/* sanity check keyed sgls */
 	if (!(ctrl->ctrl.sgls & (1 << 20))) {
 		dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+		ret = -EINVAL;
 		goto out_remove_admin_queue;
 	}
 
@@ -2009,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 	}
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	flush_workqueue(nvme_rdma_wq);
+	flush_workqueue(nvme_wq);
 }
 
 static struct ib_client nvme_rdma_ib_client = {
@@ -2022,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
 {
 	int ret;
 
-	nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
-	if (!nvme_rdma_wq)
-		return -ENOMEM;
-
 	ret = ib_register_client(&nvme_rdma_ib_client);
 	if (ret)
-		goto err_destroy_wq;
+		return ret;
 
 	ret = nvmf_register_transport(&nvme_rdma_transport);
 	if (ret)
@@ -2038,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
 
 err_unreg_client:
 	ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
-	destroy_workqueue(nvme_rdma_wq);
 	return ret;
 }
 
@@ -2047,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
 {
 	nvmf_unregister_transport(&nvme_rdma_transport);
 	ib_unregister_client(&nvme_rdma_ib_client);
-	destroy_workqueue(nvme_rdma_wq);
 }
 
 module_init(nvme_rdma_init_module);

diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e..0000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null

@@ -1,2460 +0,0 @@
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534;	/* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES				0x00
-#define VPD_SERIAL_NUMBER				0x80
-#define VPD_DEVICE_IDENTIFIERS				0x83
-#define VPD_EXTENDED_INQUIRY				0x86
-#define VPD_BLOCK_LIMITS				0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS			0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN			4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN			8
-#define FORMAT_UNIT_PROT_INT_OFFSET			3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET		0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK		0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA				0x70
-#define DESC_FORMAT_SENSE_DATA				0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH			10
-#define LUN_ENTRY_SIZE					8
-#define LUN_DATA_HEADER_SIZE				8
-#define ALL_LUNS_RETURNED				0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED			0x01
-#define RESTRICTED_LUNS_RETURNED			0x00
-#define DOWNLOAD_SAVE_ACTIVATE				0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE			0x0E
-#define ACTIVATE_DEFERRED_MICROCODE			0x0F
-#define FORMAT_UNIT_IMMED_MASK				0x2
-#define FORMAT_UNIT_IMMED_OFFSET			1
-#define KELVIN_TEMP_FACTOR				273
-#define FIXED_FMT_SENSE_DATA_SIZE			18
-#define DESC_FMT_SENSE_DATA_SIZE			8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE			0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE			0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE			0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE			0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE			0x86
-#define INQ_BDEV_LIMITS_PAGE				0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE			0xB1
-#define INQ_SERIAL_NUMBER_LENGTH			0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES			6
-#define VERSION_SPC_4					0x06
-#define ACA_UNSUPPORTED					0
-#define STANDARD_INQUIRY_LENGTH				36
-#define ADDITIONAL_STD_INQ_LENGTH			31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH		0x3C
-#define RESERVED_FIELD					0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP				0x1C
-#define MODE_PAGE_CACHING				0x08
-#define MODE_PAGE_CONTROL				0x0A
-#define MODE_PAGE_POWER_CONDITION			0x1A
-#define MODE_PAGE_RETURN_ALL				0x3F
-#define MODE_PAGE_BLK_DES_LEN				0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN			0x10
-#define MODE_PAGE_CACHING_LEN				0x14
-#define MODE_PAGE_CONTROL_LEN				0x0C
-#define MODE_PAGE_POW_CND_LEN				0x28
-#define MODE_PAGE_INF_EXC_LEN				0x0C
-#define MODE_PAGE_ALL_LEN				0x54
-#define MODE_SENSE6_MPH_SIZE				4
-#define MODE_SENSE_PAGE_CONTROL_MASK			0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET			2
-#define MODE_SENSE_PAGE_CODE_MASK			0x3F
-#define MODE_SENSE_LLBAA_MASK				0x10
-#define MODE_SENSE_LLBAA_SHIFT				4
-#define MODE_SENSE_DBD_MASK				8
-#define MODE_SENSE_DBD_SHIFT				3
-#define MODE_SENSE10_MPH_SIZE				8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK		0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK			0x1
-#define MODE_SELECT_6_BD_OFFSET				3
-#define MODE_SELECT_10_BD_OFFSET			6
-#define MODE_SELECT_10_LLBAA_OFFSET			4
-#define MODE_SELECT_10_LLBAA_MASK			1
-#define MODE_SELECT_6_MPH_SIZE				4
-#define MODE_SELECT_10_MPH_SIZE				8
-#define CACHING_MODE_PAGE_WCE_MASK			0x04
-#define MODE_SENSE_BLK_DESC_ENABLED			0
-#define MODE_SENSE_BLK_DESC_COUNT			1
-#define MODE_SELECT_PAGE_CODE_MASK			0x3F
-#define SHORT_DESC_BLOCK				8
-#define LONG_DESC_BLOCK					16
-#define MODE_PAGE_POW_CND_LEN_FIELD			0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD			0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD			0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD			0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES			0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE		0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH		0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE		0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE			0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED			0
-#define LOG_SENSE_CDB_PC_MASK				0xC0
-#define LOG_SENSE_CDB_PC_SHIFT				6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES		1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK			0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH			0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH			0xC
-#define REMAINING_TEMP_PAGE_LENGTH			0xC
-#define LOG_TEMP_PAGE_LENGTH				0x10
-#define LOG_TEMP_UNKNOWN				0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH			0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE				8
-#define READ_CAP_16_RESP_SIZE				32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS					4
-#define NVME_MAX_FIRMWARE_SLOT				7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET			8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE				0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT		0x03
-#define SCSI_ASC_LUN_NOT_READY				0x04
-#define SCSI_ASC_WARNING				0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED		0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED		0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED		0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR			0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY		0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID		0x20
-#define SCSI_ASC_ILLEGAL_COMMAND			0x20
-#define SCSI_ASC_ILLEGAL_BLOCK				0x21
-#define SCSI_ASC_INVALID_CDB				0x24
-#define SCSI_ASC_INVALID_LUN				0x25
-#define SCSI_ASC_INVALID_PARAMETER			0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED			0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE		0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE			0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED			0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED		0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED		0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED		0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS			0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED			0x08
-#define SCSI_ASCQ_INVALID_LUN_ID			0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
-	return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
-   Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
-	u8 fua;
-	u8 prot_info;
-	u64 lba;
-	u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
-								unsigned long n)
-{
-	int i;
-	void *index = from;
-	size_t remaining = n;
-	size_t xfer_len;
-
-	if (hdr->iovec_count > 0) {
-		struct sg_iovec sgl;
-
-		for (i = 0; i < hdr->iovec_count; i++) {
-			if (copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec)))
-				return -EFAULT;
-			xfer_len = min(remaining, sgl.iov_len);
-			if (copy_to_user(sgl.iov_base, index, xfer_len))
-				return -EFAULT;
-
-			index += xfer_len;
-			remaining -= xfer_len;
-			if (remaining == 0)
-				break;
-		}
-		return 0;
-	}
-
-	if (copy_to_user(hdr->dxferp, from, n))
-		return -EFAULT;
-	return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
-								unsigned long n)
-{
-	int i;
-	void *index = to;
-	size_t remaining = n;
-	size_t xfer_len;
-
-	if (hdr->iovec_count > 0) {
-		struct sg_iovec sgl;
-
-		for (i = 0; i < hdr->iovec_count; i++) {
-			if (copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec)))
-				return -EFAULT;
-			xfer_len = min(remaining, sgl.iov_len);
-			if (copy_from_user(index, sgl.iov_base, xfer_len))
-				return -EFAULT;
-			index += xfer_len;
-			remaining -= xfer_len;
-			if (remaining == 0)
-				break;
-		}
-		return 0;
-	}
-
-	if (copy_from_user(to, hdr->dxferp, n))
-		return -EFAULT;
-	return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
-				 u8 asc, u8 ascq)
-{
-	u8 xfer_len;
-	u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
-	if (scsi_status_is_good(status)) {
-		hdr->status = SAM_STAT_GOOD;
-		hdr->masked_status = GOOD;
-		hdr->host_status = DID_OK;
-		hdr->driver_status = DRIVER_OK;
-		hdr->sb_len_wr = 0;
-	} else {
-		hdr->status = status;
-		hdr->masked_status = status >> 1;
-		hdr->host_status = DID_OK;
-		hdr->driver_status = DRIVER_OK;
-
-		memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
-		resp[0] = DESC_FORMAT_SENSE_DATA;
-		resp[1] = sense_key;
-		resp[2] = asc;
-		resp[3] = ascq;
-
-		xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
-		hdr->sb_len_wr = xfer_len;
-		if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it.  In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
-	u8 status, sense_key, asc, ascq;
-	int res;
-
-	/* For non-nvme (Linux) errors, simply return the error code */
-	if (nvme_sc < 0)
-		return nvme_sc;
-
-	/* Mask DNR, More, and reserved fields */
-	switch (nvme_sc & 0x7FF) {
-	/* Generic Command Status */
-	case NVME_SC_SUCCESS:
-		status = SAM_STAT_GOOD;
-		sense_key = NO_SENSE;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_OPCODE:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ILLEGAL_COMMAND;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_FIELD:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_INVALID_CDB;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_DATA_XFER_ERROR:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_POWER_LOSS:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_WARNING;
-		ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
-		break;
-	case NVME_SC_INTERNAL:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = HARDWARE_ERROR;
-		asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ABORT_REQ:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ABORT_QUEUE:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_FUSED_FAIL:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_FUSED_MISSING:
-		status = SAM_STAT_TASK_ABORTED;
-		sense_key = ABORTED_COMMAND;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_INVALID_NS:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-		ascq = SCSI_ASCQ_INVALID_LUN_ID;
-		break;
-	case NVME_SC_LBA_RANGE:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ILLEGAL_BLOCK;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_CAP_EXCEEDED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_NS_NOT_READY:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = NOT_READY;
-		asc = SCSI_ASC_LUN_NOT_READY;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-
-	/* Command Specific Status */
-	case NVME_SC_INVALID_FORMAT:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
-		ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
-		break;
-	case NVME_SC_BAD_ATTRIBUTES:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_INVALID_CDB;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-
-	/* Media Errors */
-	case NVME_SC_WRITE_FAULT:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_READ_ERROR:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_GUARD_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
-		break;
-	case NVME_SC_APPTAG_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
-		break;
-	case NVME_SC_REFTAG_CHECK:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MEDIUM_ERROR;
-		asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
-		ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
-		break;
-	case NVME_SC_COMPARE_FAILED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = MISCOMPARE;
-		asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	case NVME_SC_ACCESS_DENIED:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-		ascq = SCSI_ASCQ_INVALID_LUN_ID;
-		break;
-
-	/* Unspecified/Default */
-	case NVME_SC_CMDID_CONFLICT:
-	case NVME_SC_CMD_SEQ_ERROR:
-	case NVME_SC_CQ_INVALID:
-	case NVME_SC_QID_INVALID:
-	case NVME_SC_QUEUE_SIZE:
-	case NVME_SC_ABORT_LIMIT:
-	case NVME_SC_ABORT_MISSING:
-	case NVME_SC_ASYNC_LIMIT:
-	case NVME_SC_FIRMWARE_SLOT:
-	case NVME_SC_FIRMWARE_IMAGE:
-	case NVME_SC_INVALID_VECTOR:
-	case NVME_SC_INVALID_LOG_PAGE:
-	default:
-		status = SAM_STAT_CHECK_CONDITION;
-		sense_key = ILLEGAL_REQUEST;
-		asc = SCSI_ASC_NO_SENSE;
-		ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		break;
-	}
-
-	res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-	return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ns *id_ns;
-	int res;
-	int nvme_sc;
-	int xfer_len;
-	u8 resp_data_format = 0x02;
-	u8 protect;
-	u8 cmdque = 0x01 << 1;
-	u8 fw_offset = sizeof(ctrl->firmware_rev);
-
-	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	if (id_ns->dps)
-		protect = 0x01;
-	else
-		protect = 0;
-	kfree(id_ns);
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[2] = VERSION_SPC_4;
-	inq_response[3] = resp_data_format;	/*normaca=0 | hisup=0 */
-	inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
-	inq_response[5] = protect;	/* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
-	inq_response[7] = cmdque;	/* wbus16=0 | sync=0 | vs=0 */
-	strncpy(&inq_response[8], "NVMe    ", 8);
-	strncpy(&inq_response[16], ctrl->model, 16);
-
-	while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
-		fw_offset--;
-	fw_offset -= 4;
-	strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	int xfer_len;
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE;   /* Page Code */
-	inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES;    /* Page Length */
-	inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
-	inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
-	inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
-	inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
-	inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *inq_response,
-					int alloc_len)
-{
-	int xfer_len;
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
-	inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-	strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-		u8 *inq_response, int alloc_len)
-{
-	struct nvme_id_ns *id_ns;
-	int nvme_sc, res;
-	size_t len;
-	void *eui;
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	eui = id_ns->eui64;
-	len = sizeof(id_ns->eui64);
-
-	if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
-		if (bitmap_empty(eui, len * 8)) {
-			eui = id_ns->nguid;
-			len = sizeof(id_ns->nguid);
-		}
-	}
-
-	if (bitmap_empty(eui, len * 8)) {
-		res = -EOPNOTSUPP;
-		goto out_free_id;
-	}
-
-	memset(inq_response, 0, alloc_len);
-	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[3] = 4 + len; /* Page Length */
-
-	/* Designation Descriptor start */
-	inq_response[4] = 0x01;	/* Proto ID=0h | Code set=1h */
-	inq_response[5] = 0x02;	/* PIV=0b | Asso=00b | Designator Type=2h */
-	inq_response[6] = 0x00;	/* Rsvd */
-	inq_response[7] = len;	/* Designator Length */
-	memcpy(&inq_response[8], eui, len);
-
-	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
-		struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ctrl *id_ctrl;
-	int nvme_sc, res;
-
-	if (alloc_len < 72) {
-		return nvme_trans_completion(hdr,
-				SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	memset(inq_response, 0, alloc_len);
-	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-	inq_response[3] = 0x48;	/* Page Length */
-
-	/* Designation Descriptor start */
-	inq_response[4] = 0x03;	/* Proto ID=0h | Code set=3h */
-	inq_response[5] = 0x08;	/* PIV=0b | Asso=00b | Designator Type=8h */
-	inq_response[6] = 0x00;	/* Rsvd */
-	inq_response[7] = 0x44;	/* Designator Length */
-
-	sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
-	memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
-	sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
-	memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
-	res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-	kfree(id_ctrl);
-	return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *resp, int alloc_len)
-{
-	int res;
-
-	if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
-		res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
-		if (res != -EOPNOTSUPP)
-			return res;
-	}
-
-	return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	u8 *inq_response;
-	int res;
-	int nvme_sc;
-	struct nvme_ctrl *ctrl = ns->ctrl;
-	struct nvme_id_ctrl *id_ctrl;
-	struct nvme_id_ns *id_ns;
-	int xfer_len;
-	u8 microcode = 0x80;
-	u8 spt;
-	u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
-	u8 grd_chk, app_chk, ref_chk, protect;
-	u8 uask_sup = 0x20;
-	u8 v_sup;
-	u8 luiclr = 0x01;
-
-	inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL)
-		return -ENOMEM;
-
-	nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_free_inq;
-
-	spt = spt_lut[id_ns->dpc & 0x07] << 3;
-	if (id_ns->dps)
-		protect = 0x01;
-	else
-		protect = 0;
-	kfree(id_ns);
-
-	grd_chk = protect << 2;
-	app_chk = protect << 1;
-	ref_chk = protect;
-
-	nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_free_inq;
-
-	v_sup = id_ctrl->vwc;
-	kfree(id_ctrl);
-
-	memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE;    /* Page Code */
-	inq_response[2] = 0x00;    /* Page Length MSB */
-	inq_response[3] = 0x3C;    /* Page Length LSB */
-	inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
-	inq_response[5] = uask_sup;
-	inq_response[6] = v_sup;
-	inq_response[7] = luiclr;
-	inq_response[8] = 0;
-	inq_response[9] = 0;
-
-	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
-	kfree(inq_response);
-	return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *inq_response, int alloc_len)
-{
-	__be32 max_sectors = cpu_to_be32(
-		nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
-	__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
-	__be32 discard_desc_count = cpu_to_be32(0x100);
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-	inq_response[1] = VPD_BLOCK_LIMITS;
-	inq_response[3] = 0x3c; /* Page Length */
-	memcpy(&inq_response[8], &max_sectors, sizeof(u32));
-	memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
-	if (max_discard)
-		memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
-	return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	u8 *inq_response;
-	int res;
-	int xfer_len;
-
-	inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE;    /* Page Code */
-	inq_response[2] = 0x00;    /* Page Length MSB */
-	inq_response[3] = 0x3C;    /* Page Length LSB */
-	inq_response[4] = 0x00;    /* Medium Rotation Rate MSB */
-	inq_response[5] = 0x01;    /* Medium Rotation Rate LSB */
-	inq_response[6] = 0x00;    /* Form Factor */
-
-	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	kfree(inq_response);
- out_mem:
-	return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-
-	log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
-	log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-	log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-	log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
-	xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
-	kfree(log_response);
- out_mem:
-	return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-	struct nvme_smart_log *smart_log;
-	u8 temp_c;
-	u16 temp_k;
-
-	log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL)
-		return -ENOMEM;
-
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
-		goto out_free_response;
-
-	if (res != NVME_SC_SUCCESS) {
-		temp_c = LOG_TEMP_UNKNOWN;
-	} else {
-		temp_k = (smart_log->temperature[1] << 8) +
-				(smart_log->temperature[0]);
-		temp_c = temp_k - KELVIN_TEMP_FACTOR;
-	}
-	kfree(smart_log);
-
-	log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
-	/* Informational Exceptions Log Parameter 1 Start */
-	/* Parameter Code=0x0000 bytes 4,5 */
-	log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
-	log_response[7] = 0x04; /* PARAMETER LENGTH */
-	/* Add sense Code and qualifier = 0x00 each */
-	/* Use Temperature from NVMe Get Log Page, convert to C from K */
-	log_response[10] = temp_c;
-
-	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-	kfree(log_response);
-	return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					int alloc_len)
-{
-	int res;
-	int xfer_len;
-	u8 *log_response;
-	struct nvme_smart_log *smart_log;
-	u32 feature_resp;
-	u8 temp_c_cur, temp_c_thresh;
-	u16 temp_k;
-
-	log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL)
-		return -ENOMEM;
-
-	res = nvme_get_log_page(ns->ctrl, &smart_log);
-	if (res < 0)
-		goto out_free_response;
-
-	if (res != NVME_SC_SUCCESS) {
-		temp_c_cur = LOG_TEMP_UNKNOWN;
-	} else {
-		temp_k = (smart_log->temperature[1] << 8) +
-				(smart_log->temperature[0]);
-		temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
-	}
-	kfree(smart_log);
-
-	/* Get Features for Temp Threshold */
-	res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
-								&feature_resp);
-	if (res != NVME_SC_SUCCESS)
-		temp_c_thresh = LOG_TEMP_UNKNOWN;
-	else
-		temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
-	log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
-	/* Subpage=0x00, Page Length MSB=0 */
-	log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
-	/* Temperature Log Parameter 1 (Temperature) Start */
-	/* Parameter Code = 0x0000 */
-	log_response[6] = 0x01;		/* Format and Linking = 01b */
-	log_response[7] = 0x02;		/* Parameter Length */
-	/* Use Temperature from NVMe Get Log Page, convert to C from K */
-	log_response[9] = temp_c_cur;
-	/* Temperature Log Parameter 2 (Reference Temperature) Start */
-	log_response[11] = 0x01;	/* Parameter Code = 0x0001 */
-	log_response[12] = 0x01;	/* Format and Linking = 01b */
-	log_response[13] = 0x02;	/* Parameter Length */
-	/* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
-	log_response[15] = temp_c_thresh;
-
-	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-	kfree(log_response);
-	return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
-					u16 mode_data_length, u16 blk_desc_len)
-{
-	/* Quick check to make sure I don't stomp on my own memory... */
-	if ((cdb10 && len < 8) || (!cdb10 && len < 4))
-		return -EINVAL;
-
-	if (cdb10) {
-		resp[0] = (mode_data_length & 0xFF00) >> 8;
-		resp[1] = (mode_data_length & 0x00FF);
-		resp[3] = 0x10 /* DPOFUA */;
-		resp[4] = llbaa;
-		resp[5] = RESERVED_FIELD;
-		resp[6] = (blk_desc_len & 0xFF00) >> 8;
-		resp[7] = (blk_desc_len & 0x00FF);
-	} else {
-		resp[0] = (mode_data_length & 0x00FF);
-		resp[2] = 0x10 /* DPOFUA */;
-		resp[3] = (blk_desc_len & 0x00FF);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				    u8 *resp, int len, u8 llbaa)
-{
-	int res;
-	int nvme_sc;
-	struct nvme_id_ns *id_ns;
-	u8 flbas;
-	u32 lba_length;
-
-	if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
-		return -EINVAL;
-	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
-		return -EINVAL;
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	flbas = (id_ns->flbas) & 0x0F;
-	lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
-	if (llbaa == 0) {
-		__be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
-		/* Byte 4 is reserved */
-		__be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
-		memcpy(resp, &tmp_cap, sizeof(u32));
-		memcpy(&resp[4], &tmp_len, sizeof(u32));
-	} else {
-		__be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
-		__be32 tmp_len = cpu_to_be32(lba_length);
-
-		memcpy(resp, &tmp_cap, sizeof(u64));
-		/* Bytes 8, 9, 10, 11 are reserved */
-		memcpy(&resp[12], &tmp_len, sizeof(u32));
-	}
-
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_CONTROL_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_CONTROL;
-	resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
-	resp[2] = 0x0E;		/* TST=000b, TMF_ONLY=0, DPICZ=1,
-				 * D_SENSE=1, GLTSD=1, RLEC=0 */
-	resp[3] = 0x12;		/* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
-	/* Byte 4:  VS=0, RAC=0, UA_INT=0, SWP=0 */
-	resp[5] = 0x40;		/* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
-	/* resp[6] and [7] are obsolete, thus zero */
-	resp[8] = 0xFF;		/* Busy timeout period = 0xffff */
-	resp[9] = 0xFF;
-	/* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
-	return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr,
-					u8 *resp, int len)
-{
-	int res = 0;
-	int nvme_sc;
-	u32 feature_resp;
-	u8 vwc;
-
-	if (len < MODE_PAGE_CACHING_LEN)
-		return -EINVAL;
-
-	nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
-								&feature_resp);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	vwc = feature_resp & 0x00000001;
-
-	resp[0] = MODE_PAGE_CACHING;
-	resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
-	resp[2] = vwc << 2;
-	return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_POW_CND_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_POWER_CONDITION;
-	resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
-	/* All other bytes are zero */
-
-	return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *resp,
-					int len)
-{
-	if (len < MODE_PAGE_INF_EXC_LEN)
-		return -EINVAL;
-
-	resp[0] = MODE_PAGE_INFO_EXCEP;
-	resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
-	resp[2] = 0x88;
-	/* All other bytes are zero */
-
-	return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				     u8 *resp, int len)
-{
-	int res;
-	u16 mode_pages_offset_1 = 0;
-	u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
-	mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
-	mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
-	mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
-	res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
-					MODE_PAGE_CACHING_LEN);
-	if (res)
-		return res;
-	res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
-					MODE_PAGE_CONTROL_LEN);
-	if (res)
-		return res;
-	res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
-					MODE_PAGE_POW_CND_LEN);
-	if (res)
-		return res;
-	return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
-					MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
-	if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
-		/* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
-		return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
-	} else {
-		return 0;
-	}
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *cmd,
-					u16 alloc_len, u8 cdb10,
-					int (*mode_page_fill_func)
-					(struct nvme_ns *,
-					struct sg_io_hdr *hdr, u8 *, int),
-					u16 mode_pages_tot_len)
-{
-	int res;
-	int xfer_len;
-	u8 *response;
-	u8 dbd, llbaa;
-	u16 resp_size;
-	int mph_size;
-	u16 mode_pages_offset_1;
-	u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
-	dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
-	llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
-	mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
-	blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
-	resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
-	/* Refer spc4r34 Table 440 for calculation of Mode data Length field */
-	mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
-	blk_desc_offset = mph_size;
-	mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
-					llbaa, mode_data_length, blk_desc_len);
-	if (res)
-		goto out_free;
-	if (blk_desc_len > 0) {
-		res = nvme_trans_fill_blk_desc(ns, hdr,
-					       &response[blk_desc_offset],
-					       blk_desc_len, llbaa);
-		if (res)
-			goto out_free;
-	}
-	res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
-					mode_pages_tot_len);
-	if (res)
-		goto out_free;
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
-	kfree(response);
- out_mem:
-	return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
-								u8 cdb16)
-{
-	u8 flbas;
-	u32 lba_length;
-	u64 rlba;
-	u8 prot_en;
-	u8 p_type_lut[4] = {0, 0, 1, 2};
-	__be64 tmp_rlba;
-	__be32 tmp_rlba_32;
-	__be32 tmp_len;
-
-	flbas = (id_ns->flbas) & 0x0F;
-	lba_length = (1 << (id_ns->lbaf[flbas].ds));
-	rlba = le64_to_cpup(&id_ns->nsze) - 1;
-	(id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
-	if (!cdb16) {
-		if (rlba > 0xFFFFFFFF)
-			rlba = 0xFFFFFFFF;
-		tmp_rlba_32 = cpu_to_be32(rlba);
-		tmp_len = cpu_to_be32(lba_length);
-		memcpy(response, &tmp_rlba_32, sizeof(u32));
-		memcpy(&response[4], &tmp_len, sizeof(u32));
-	} else {
-		tmp_rlba = cpu_to_be64(rlba);
-		tmp_len = cpu_to_be32(lba_length);
-		memcpy(response, &tmp_rlba, sizeof(u64));
-		memcpy(&response[8], &tmp_len, sizeof(u32));
-		response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
-		/* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
-		/* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
-		/* Bytes 16-31 - Reserved */
-	}
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 buffer_id)
-{
-	struct nvme_command c;
-	int nvme_sc;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_activate_fw;
-	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 opcode, u32 tot_len, u32 offset,
-					u8 buffer_id)
-{
-	int nvme_sc;
-	struct nvme_command c;
-
-	if (hdr->iovec_count > 0) {
-		/* Assuming SGL is not allowed for this command */
-		return nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST,
-					SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_download_fw;
-	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
-	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
-	nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
-			hdr->dxferp, tot_len, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
-						u16 *bd_len, u8 *llbaa)
-{
-	if (cdb10) {
-		/* 10 Byte CDB */
-		*bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
-			parm_list[MODE_SELECT_10_BD_OFFSET + 1];
-		*llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
-				MODE_SELECT_10_LLBAA_MASK;
-	} else {
-		/* 6 Byte CDB */
-		*bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
-	}
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
-					u16 idx, u16 bd_len, u8 llbaa)
-{
-	/* Store block descriptor info if a FORMAT UNIT comes later */
-	/* TODO Saving 1st BD info; what to do if multiple BD received? */
-	if (llbaa == 0) {
-		/* Standard Block Descriptor - spc4r34 7.5.5.1 */
-		ns->mode_select_num_blocks =
-				(parm_list[idx + 1] << 16) +
-				(parm_list[idx + 2] << 8) +
-				(parm_list[idx + 3]);
-
-		ns->mode_select_block_len =
-				(parm_list[idx + 5] << 16) +
-				(parm_list[idx + 6] << 8) +
-				(parm_list[idx + 7]);
-	} else {
-		/* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
-		ns->mode_select_num_blocks =
-				(((u64)parm_list[idx + 0]) << 56) +
-				(((u64)parm_list[idx + 1]) << 48) +
-				(((u64)parm_list[idx + 2]) << 40) +
-				(((u64)parm_list[idx + 3]) << 32) +
-				(((u64)parm_list[idx + 4]) << 24) +
-				(((u64)parm_list[idx + 5]) << 16) +
-				(((u64)parm_list[idx + 6]) << 8) +
-				((u64)parm_list[idx + 7]);
-
-		ns->mode_select_block_len =
-				(parm_list[idx + 12] << 24) +
-				(parm_list[idx + 13] << 16) +
-				(parm_list[idx + 14] << 8) +
-				(parm_list[idx + 15]);
-	}
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *mode_page, u8 page_code)
-{
-	int res = 0;
-	int nvme_sc;
-	unsigned dword11;
-
-	switch (page_code) {
-	case MODE_PAGE_CACHING:
-		dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-		nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
-					    dword11, NULL, 0, NULL);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		break;
-	case MODE_PAGE_CONTROL:
-		break;
-	case MODE_PAGE_POWER_CONDITION:
-		/* Verify the OS is not trying to set timers */
-		if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_PARAMETER,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			break;
-		}
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
-	return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-					u8 *cmd, u16 parm_list_len, u8 pf,
-					u8 sp, u8 cdb10)
-{
-	int res;
-	u8 *parm_list;
-	u16 bd_len;
-	u8 llbaa = 0;
-	u16 index, saved_index;
-	u8 page_code;
-	u16 mp_size;
-
-	/* Get parm list from data-in/out buffer */
-	parm_list = kmalloc(parm_list_len, GFP_KERNEL);
-	if (parm_list == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
-	if (res)
-		goto out_mem;
-
-	nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
-	index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
-	if (bd_len != 0) {
-		/* Block Descriptors present, parse */
-		nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
-		index += bd_len;
-	}
-	saved_index = index;
-
-	/* Multiple mode pages may be present; iterate through all */
-	/* In 1st Iteration, don't do NVME Command, only check for CDB errors */
-	do {
-		page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-		mp_size = parm_list[index + 1] + 2;
-		if ((page_code != MODE_PAGE_CACHING) &&
-		    (page_code != MODE_PAGE_CONTROL) &&
-		    (page_code != MODE_PAGE_POWER_CONDITION)) {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_mem;
-		}
-		index += mp_size;
-	} while (index < parm_list_len);
-
-	/* In 2nd Iteration, do the NVME Commands */
-	index = saved_index;
-	do {
-		page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-		mp_size = parm_list[index + 1] + 2;
-		res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
-								page_code);
-		if (res)
-			break;
-		index += mp_size;
-	} while (index < parm_list_len);
-
- out_mem:
-	kfree(parm_list);
- out:
-	return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
-					     struct sg_io_hdr *hdr)
-{
-	int res = 0;
-	int nvme_sc;
-	u8 flbas;
-
-	/*
-	 * SCSI Expects a MODE SELECT would have been issued prior to
-	 * a FORMAT UNIT, and the block size and number would be used
-	 * from the block descriptor in it. If a MODE SELECT had not
-	 * been issued, FORMAT shall use the current values for both.
-	 */
-
-	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-		struct nvme_id_ns *id_ns;
-
-		nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			return res;
-
-		if (ns->mode_select_num_blocks == 0)
-			ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
-		if (ns->mode_select_block_len == 0) {
-			flbas = (id_ns->flbas) & 0x0F;
-			ns->mode_select_block_len =
-						(1 << (id_ns->lbaf[flbas].ds));
-		}
-
-		kfree(id_ns);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
-					u8 format_prot_info, u8 *nvme_pf_code)
-{
-	int res;
-	u8 *parm_list;
-	u8 pf_usage, pf_code;
-
-	parm_list = kmalloc(len, GFP_KERNEL);
-	if (parm_list == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	res = nvme_trans_copy_from_user(hdr, parm_list, len);
-	if (res)
-		goto out_mem;
-
-	if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
-				FORMAT_UNIT_IMMED_MASK) != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out_mem;
-	}
-
-	if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
-	    (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out_mem;
-	}
-	pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
-			FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
-	pf_code = (pf_usage << 2) | format_prot_info;
-	switch (pf_code) {
-	case 0:
-		*nvme_pf_code = 0;
-		break;
-	case 2:
-		*nvme_pf_code = 1;
-		break;
-	case 3:
-		*nvme_pf_code = 2;
-		break;
-	case 7:
-		*nvme_pf_code = 3;
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out_mem:
-	kfree(parm_list);
- out:
-	return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				   u8 prot_info)
-{
-	int res;
-	int nvme_sc;
-	struct nvme_id_ns *id_ns;
-	u8 i;
-	u8 nlbaf;
-	u8 selected_lbaf = 0xFF;
-	u32 cdw10 = 0;
-	struct nvme_command c;
-
-	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-
-	nlbaf = id_ns->nlbaf;
-
-	for (i = 0; i < nlbaf; i++) {
-		if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
-			selected_lbaf = i;
-			break;
-		}
-	}
-	if (selected_lbaf > 0x0F) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-	if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	}
-
-	cdw10 |= prot_info << 5;
-	cdw10 |= selected_lbaf & 0x0F;
-	memset(&c, 0, sizeof(c));
-	c.format.opcode = nvme_admin_format_nvm;
-	c.format.nsid = cpu_to_le32(ns->ns_id);
-	c.format.cdw10 = cpu_to_le32(cdw10);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
-	kfree(id_ns);
-	return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
-					struct nvme_trans_io_cdb *cdb_info,
-					u32 max_blocks)
-{
-	/* If using iovecs, send one nvme command per vector */
-	if (hdr->iovec_count > 0)
-		return hdr->iovec_count;
-	else if (cdb_info->xfer_len > max_blocks)
-		return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
-	else
-		return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	u16 control = 0;
-
-	/* When Protection information support is added, implement here */
-
-	if (cdb_info->fua > 0)
-		control |= NVME_RW_FUA;
-
-	return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-				struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
-	int nvme_sc = NVME_SC_SUCCESS;
-	u32 num_cmds;
-	u64 unit_len;
-	u64 unit_num_blocks;	/* Number of blocks to xfer in each nvme cmd */
-	u32 retcode;
-	u32 i = 0;
-	u64 nvme_offset = 0;
-	void __user *next_mapping_addr;
-	struct nvme_command c;
-	u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
-	u16 control;
-	u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
-	num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
-	/*
-	 * This loop handles two cases.
-	 * First, when an SGL is used in the form of an iovec list:
-	 *   - Use iov_base as the next mapping address for the nvme command_id
-	 *   - Use iov_len as the data transfer length for the command.
-	 * Second, when we have a single buffer
-	 *   - If larger than max_blocks, split into chunks, offset
-	 *        each nvme command accordingly.
-	 */
-	for (i = 0; i < num_cmds; i++) {
-		memset(&c, 0, sizeof(c));
-		if (hdr->iovec_count > 0) {
-			struct sg_iovec sgl;
-
-			retcode = copy_from_user(&sgl, hdr->dxferp +
-					i * sizeof(struct sg_iovec),
-					sizeof(struct sg_iovec));
-			if (retcode)
-				return -EFAULT;
-			unit_len = sgl.iov_len;
-			unit_num_blocks = unit_len >> ns->lba_shift;
-			next_mapping_addr = sgl.iov_base;
-		} else {
-			unit_num_blocks = min((u64)max_blocks,
-					(cdb_info->xfer_len - nvme_offset));
-			unit_len = unit_num_blocks << ns->lba_shift;
-			next_mapping_addr = hdr->dxferp +
-					((1 << ns->lba_shift) * nvme_offset);
-		}
-
-		c.rw.opcode = opcode;
-		c.rw.nsid = cpu_to_le32(ns->ns_id);
-		c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
-		c.rw.length = cpu_to_le16(unit_num_blocks - 1);
-		control = nvme_trans_io_get_control(ns, cdb_info);
-		c.rw.control = cpu_to_le16(control);
-
-		if (get_capacity(ns->disk) - unit_num_blocks <
-				cdb_info->lba + nvme_offset) {
-			nvme_sc = NVME_SC_LBA_RANGE;
-			break;
-		}
-		nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
-				next_mapping_addr, unit_len, NULL, 0);
-		if (nvme_sc)
-			break;
-
-		nvme_offset += unit_num_blocks;
-	}
-
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
-							u8 *cmd)
-{
-	int res = 0;
-	struct nvme_trans_io_cdb cdb_info = { 0, };
-	u8 opcode = cmd[0];
-	u64 xfer_bytes;
-	u64 sum_iov_len = 0;
-	struct sg_iovec sgl;
-	int i;
-	size_t not_copied;
-
-	/*
-	 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
-	 * but always in the same place for all others.
-	 */
-	switch (opcode) {
-	case WRITE_6:
-	case READ_6:
-		break;
-	default:
-		cdb_info.fua = cmd[1] & 0x8;
-		cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
-		if (cdb_info.prot_info && !ns->pi_type) {
-			return nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST,
-					SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		}
-	}
-
-	switch (opcode) {
-	case WRITE_6:
-	case READ_6:
-		cdb_info.lba = get_unaligned_be24(&cmd[1]);
-		cdb_info.xfer_len = cmd[4];
-		if (cdb_info.xfer_len == 0)
-			cdb_info.xfer_len = 256;
-		break;
-	case WRITE_10:
-	case READ_10:
-		cdb_info.lba = get_unaligned_be32(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
-		break;
-	case WRITE_12:
-	case READ_12:
-		cdb_info.lba = get_unaligned_be32(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
-		break;
-	case WRITE_16:
-	case READ_16:
-		cdb_info.lba = get_unaligned_be64(&cmd[2]);
-		cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
-		break;
-	default:
-		/* Will never really reach here */
-		res = -EIO;
-		goto out;
-	}
-
-	/* Calculate total length of transfer (in bytes) */
-	if (hdr->iovec_count > 0) {
-		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
-						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
-				return -EFAULT;
-			sum_iov_len += sgl.iov_len;
-			/* IO vector sizes should be multiples of block size */
-			if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
-				res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_PARAMETER,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-				goto out;
-			}
-		}
-	} else {
-		sum_iov_len = hdr->dxfer_len;
-	}
-
-	/* As Per sg ioctl howto, if the lengths differ, use the lower one */
-	xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
-	/* If block count and actual data buffer size dont match, error out */
-	if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
-		res = -EINVAL;
-		goto out;
-	}
-
-	/* Check for 0 length transfer - it is not illegal */
-	if (cdb_info.xfer_len == 0)
-		goto out;
-
-	/* Send NVMe IO Command(s) */
-	res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
-	if (res)
-		goto out;
-
- out:
-	return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u8 evpd;
-	u8 page_code;
-	int alloc_len;
-	u8 *inq_response;
-
-	evpd = cmd[1] & 0x01;
-	page_code = cmd[2];
-	alloc_len = get_unaligned_be16(&cmd[3]);
-
-	inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
-				GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	if (evpd == 0) {
-		if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
-			res = nvme_trans_standard_inquiry_page(ns, hdr,
-						inq_response, alloc_len);
-		} else {
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		}
-	} else {
-		switch (page_code) {
-		case VPD_SUPPORTED_PAGES:
-			res = nvme_trans_supported_vpd_pages(ns, hdr,
-						inq_response, alloc_len);
-			break;
-		case VPD_SERIAL_NUMBER:
-			res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_DEVICE_IDENTIFIERS:
-			res = nvme_trans_device_id_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_EXTENDED_INQUIRY:
-			res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
-			break;
-		case VPD_BLOCK_LIMITS:
-			res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
-								alloc_len);
-			break;
-		case VPD_BLOCK_DEV_CHARACTERISTICS:
-			res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
-			break;
-		default:
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			break;
-		}
-	}
-	kfree(inq_response);
- out_mem:
-	return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u16 alloc_len;
-	u8 pc;
-	u8 page_code;
-
-	if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
-	pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
-	if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	alloc_len = get_unaligned_be16(&cmd[7]);
-	switch (page_code) {
-	case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
-		res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
-		break;
-	case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
-		res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
-		break;
-	case LOG_PAGE_TEMPERATURE_PAGE:
-		res = nvme_trans_log_temperature(ns, hdr, alloc_len);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	u8 cdb10 = 0;
-	u16 parm_list_len;
-	u8 page_format;
-	u8 save_pages;
-
-	page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
-	save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
-	if (cmd[0] == MODE_SELECT) {
-		parm_list_len = cmd[4];
-	} else {
-		parm_list_len = cmd[7];
-		cdb10 = 1;
-	}
-
-	if (parm_list_len != 0) {
-		/*
-		 * According to SPC-4 r24, a paramter list length field of 0
-		 * shall not be considered an error
-		 */
-		return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
-						page_format, save_pages, cdb10);
-	}
-
-	return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u16 alloc_len;
-	u8 cdb10 = 0;
-
-	if (cmd[0] == MODE_SENSE) {
-		alloc_len = cmd[4];
-	} else {
-		alloc_len = get_unaligned_be16(&cmd[7]);
-		cdb10 = 1;
-	}
-
-	if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
-			MODE_SENSE_PC_CURRENT_VALUES) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
-	case MODE_PAGE_CACHING:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_caching_page,
-						MODE_PAGE_CACHING_LEN);
-		break;
-	case MODE_PAGE_CONTROL:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_control_page,
-						MODE_PAGE_CONTROL_LEN);
-		break;
-	case MODE_PAGE_POWER_CONDITION:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_pow_cnd_page,
-						MODE_PAGE_POW_CND_LEN);
-		break;
-	case MODE_PAGE_INFO_EXCEP:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_inf_exc_page,
-						MODE_PAGE_INF_EXC_LEN);
-		break;
-	case MODE_PAGE_RETURN_ALL:
-		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-						cdb10,
-						&nvme_trans_fill_all_pages,
-						MODE_PAGE_ALL_LEN);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd, u8 cdb16)
-{
-	int res;
-	int nvme_sc;
-	u32 alloc_len;
-	u32 resp_size;
-	u32 xfer_len;
-	struct nvme_id_ns *id_ns;
-	u8 *response;
-
-	if (cdb16) {
-		alloc_len = get_unaligned_be32(&cmd[10]);
-		resp_size = READ_CAP_16_RESP_SIZE;
-	} else {
-		alloc_len = READ_CAP_10_RESP_SIZE;
-		resp_size = READ_CAP_10_RESP_SIZE;
-	}
-
-	nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;	
-
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out_free_id;
-	}
-	nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out_free_id:
-	kfree(id_ns);
-	return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	int nvme_sc;
-	u32 alloc_len, xfer_len, resp_size;
-	u8 *response;
-	struct nvme_id_ctrl *id_ctrl;
-	u32 ll_length, lun_id;
-	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
-	__be32 tmp_len;
-
-	switch (cmd[2]) {
-	default:
-		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	case ALL_LUNS_RETURNED:
-	case ALL_WELL_KNOWN_LUNS_RETURNED:
-	case RESTRICTED_LUNS_RETURNED:
-		nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
-		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			return res;
-
-		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
-		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
-		alloc_len = get_unaligned_be32(&cmd[6]);
-		if (alloc_len < resp_size) {
-			res = nvme_trans_completion(hdr,
-					SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_free_id;
-		}
-
-		response = kzalloc(resp_size, GFP_KERNEL);
-		if (response == NULL) {
-			res = -ENOMEM;
-			goto out_free_id;
-		}
-
-		/* The first LUN ID will always be 0 per the SAM spec */
-		for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
-			/*
-			 * Set the LUN Id and then increment to the next LUN
-			 * location in the parameter data.
-			 */
-			__be64 tmp_id = cpu_to_be64(lun_id);
-			memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
-			lun_id_offset += LUN_ENTRY_SIZE;
-		}
-		tmp_len = cpu_to_be32(ll_length);
-		memcpy(response, &tmp_len, sizeof(u32));
-	}
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out_free_id:
-	kfree(id_ctrl);
-	return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u8 alloc_len, xfer_len, resp_size;
-	u8 desc_format;
-	u8 *response;
-
-	desc_format = cmd[1] & 0x01;
-	alloc_len = cmd[4];
-
-	resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
-					(FIXED_FMT_SENSE_DATA_SIZE));
-	response = kzalloc(resp_size, GFP_KERNEL);
-	if (response == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	if (desc_format) {
-		/* Descriptor Format Sense Data */
-		response[0] = DESC_FORMAT_SENSE_DATA;
-		response[1] = NO_SENSE;
-		/* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
-		response[2] = SCSI_ASC_NO_SENSE;
-		response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		/* SDAT_OVFL = 0 | Additional Sense Length = 0 */
-	} else {
-		/* Fixed Format Sense Data */
-		response[0] = FIXED_SENSE_DATA;
-		/* Byte 1 = Obsolete */
-		response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
-		/* Bytes 3-6 - Information - set to zero */
-		response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
-		/* Bytes 8-11 - Cmd Specific Information - set to zero */
-		response[12] = SCSI_ASC_NO_SENSE;
-		response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-		/* Byte 14 = Field Replaceable Unit Code = 0 */
-		/* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
-	}
-
-	xfer_len = min(alloc_len, resp_size);
-	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-	kfree(response);
- out:
-	return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr)
-{
-	int nvme_sc;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_cmd_flush;
-	c.common.nsid = cpu_to_le32(ns->ns_id);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res;
-	u8 parm_hdr_len = 0;
-	u8 nvme_pf_code = 0;
-	u8 format_prot_info, long_list, format_data;
-
-	format_prot_info = (cmd[1] & 0xc0) >> 6;
-	long_list = cmd[1] & 0x20;
-	format_data = cmd[1] & 0x10;
-
-	if (format_data != 0) {
-		if (format_prot_info != 0) {
-			if (long_list == 0)
-				parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
-			else
-				parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
-		}
-	} else if (format_data == 0 && format_prot_info != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-
-	/* Get parm header from data-in/out buffer */
-	/*
-	 * According to the translation spec, the only fields in the parameter
-	 * list we are concerned with are in the header. So allocate only that.
-	 */
-	if (parm_hdr_len > 0) {
-		res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
-					format_prot_info, &nvme_pf_code);
-		if (res)
-			goto out;
-	}
-
-	/* Attempt to activate any previously downloaded firmware image */
-	res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
-	/* Determine Block size and count and send format command */
-	res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
-	if (res)
-		goto out;
-
-	res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
-	return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr,
-					u8 *cmd)
-{
-	if (nvme_ctrl_ready(ns->ctrl))
-		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
-					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-	else
-		return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	int res = 0;
-	u32 buffer_offset, parm_list_length;
-	u8 buffer_id, mode;
-
-	parm_list_length = get_unaligned_be24(&cmd[6]);
-	if (parm_list_length % BYTES_TO_DWORDS != 0) {
-		/* NVMe expects Firmware file to be a whole number of DWORDS */
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	buffer_id = cmd[2];
-	if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	}
-	mode = cmd[1] & 0x1f;
-	buffer_offset = get_unaligned_be24(&cmd[3]);
-
-	switch (mode) {
-	case DOWNLOAD_SAVE_ACTIVATE:
-		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
-		if (res)
-			goto out;
-		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-		break;
-	case DOWNLOAD_SAVE_DEFER_ACTIVATE:
-		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
-		break;
-	case ACTIVATE_DEFERRED_MICROCODE:
-		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-		break;
-	default:
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-
- out:
-	return res;
-}
-
-struct scsi_unmap_blk_desc {
-	__be64	slba;
-	__be32	nlb;
-	u32	resv;
-};
-
-struct scsi_unmap_parm_list {
-	__be16	unmap_data_len;
-	__be16	unmap_blk_desc_data_len;
-	u32	resv;
-	struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
-{
-	struct scsi_unmap_parm_list *plist;
-	struct nvme_dsm_range *range;
-	struct nvme_command c;
-	int i, nvme_sc, res;
-	u16 ndesc, list_len;
-
-	list_len = get_unaligned_be16(&cmd[7]);
-	if (!list_len)
-		return -EINVAL;
-
-	plist = kmalloc(list_len, GFP_KERNEL);
-	if (!plist)
-		return -ENOMEM;
-
-	res = nvme_trans_copy_from_user(hdr, plist, list_len);
-	if (res)
-		goto out;
-
-	ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
-	if (!ndesc || ndesc > 256) {
-		res = -EINVAL;
-		goto out;
-	}
-
-	range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
-	if (!range) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0; i < ndesc; i++) {
-		range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
-		range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
-		range[i].cattr = 0;
-	}
-
-	memset(&c, 0, sizeof(c));
-	c.dsm.opcode = nvme_cmd_dsm;
-	c.dsm.nsid = cpu_to_le32(ns->ns_id);
-	c.dsm.nr = cpu_to_le32(ndesc - 1);
-	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
-			ndesc * sizeof(*range));
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
-	kfree(range);
- out:
-	kfree(plist);
-	return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
-	u8 cmd[16];
-	int retcode;
-	unsigned int opcode;
-
-	if (hdr->cmdp == NULL)
-		return -EMSGSIZE;
-	if (hdr->cmd_len > sizeof(cmd))
-		return -EINVAL;
-	if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
-		return -EFAULT;
-
-	/*
-	 * Prime the hdr with good status for scsi commands that don't require
-	 * an nvme command for translation.
-	 */
-	retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
-	if (retcode)
-		return retcode;
-
-	opcode = cmd[0];
-
-	switch (opcode) {
-	case READ_6:
-	case READ_10:
-	case READ_12:
-	case READ_16:
-		retcode = nvme_trans_io(ns, hdr, 0, cmd);
-		break;
-	case WRITE_6:
-	case WRITE_10:
-	case WRITE_12:
-	case WRITE_16:
-		retcode = nvme_trans_io(ns, hdr, 1, cmd);
-		break;
-	case INQUIRY:
-		retcode = nvme_trans_inquiry(ns, hdr, cmd);
-		break;
-	case LOG_SENSE:
-		retcode = nvme_trans_log_sense(ns, hdr, cmd);
-		break;
-	case MODE_SELECT:
-	case MODE_SELECT_10:
-		retcode = nvme_trans_mode_select(ns, hdr, cmd);
-		break;
-	case MODE_SENSE:
-	case MODE_SENSE_10:
-		retcode = nvme_trans_mode_sense(ns, hdr, cmd);
-		break;
-	case READ_CAPACITY:
-		retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
-		break;
-	case SERVICE_ACTION_IN_16:
-		switch (cmd[1]) {
-		case SAI_READ_CAPACITY_16:
-			retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
-			break;
-		default:
-			goto out;
-		}
-		break;
-	case REPORT_LUNS:
-		retcode = nvme_trans_report_luns(ns, hdr, cmd);
-		break;
-	case REQUEST_SENSE:
-		retcode = nvme_trans_request_sense(ns, hdr, cmd);
-		break;
-	case SYNCHRONIZE_CACHE:
-		retcode = nvme_trans_synchronize_cache(ns, hdr);
-		break;
-	case FORMAT_UNIT:
-		retcode = nvme_trans_format_unit(ns, hdr, cmd);
-		break;
-	case TEST_UNIT_READY:
-		retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
-		break;
-	case WRITE_BUFFER:
-		retcode = nvme_trans_write_buffer(ns, hdr, cmd);
-		break;
-	case UNMAP:
-		retcode = nvme_trans_unmap(ns, hdr, cmd);
-		break;
-	default:
- out:
-		retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-				ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
-				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		break;
-	}
-	return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
-	struct sg_io_hdr hdr;
-	int retcode;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
-		return -EFAULT;
-	if (hdr.interface_id != 'S')
-		return -EINVAL;
-
-	/*
-	 * A positive return code means a NVMe status, which has been
-	 * translated to sense data.
-	 */
-	retcode = nvme_scsi_translate(ns, &hdr);
-	if (retcode < 0)
-		return retcode;
-	if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
-		return -EFAULT;
-	return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
-	return put_user(sg_version_num, ip);
-}

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f970..35f930d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c

@@ -336,7 +336,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
 
 static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 {
-	static const int buf_size = 4096;
+	static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmet_ns *ns;
 	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+				    void *id, off_t *off)
+{
+	struct nvme_ns_id_desc desc = {
+		.nidt = type,
+		.nidl = len,
+	};
+	u16 status;
+
+	status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+	if (status)
+		return status;
+	*off += sizeof(desc);
+
+	status = nvmet_copy_to_sgl(req, *off, id, len);
+	if (status)
+		return status;
+	*off += len;
+
+	return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+	struct nvmet_ns *ns;
+	u16 status = 0;
+	off_t off = 0;
+
+	ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+	if (!ns) {
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+		status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+						  NVME_NIDT_UUID_LEN,
+						  &ns->uuid, &off);
+		if (status)
+			goto out_put_ns;
+	}
+	if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+		status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+						  NVME_NIDT_NGUID_LEN,
+						  &ns->nguid, &off);
+		if (status)
+			goto out_put_ns;
+	}
+
+	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+			off) != NVME_IDENTIFY_DATA_SIZE - off)
+		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+	nvmet_put_namespace(ns);
+out:
+	nvmet_req_complete(req, status);
+}
+
 /*
  * A "mimimum viable" abort implementation: the command is mandatory in the
  * spec, but we are not required to do any useful work.  We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 		}
 		break;
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_NS:
 			req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 		case NVME_ID_CNS_NS_ACTIVE_LIST:
 			req->execute = nvmet_execute_identify_nslist;
 			return 0;
+		case NVME_ID_CNS_NS_DESC_LIST:
+			req->execute = nvmet_execute_identify_desclist;
+			return 0;
 		}
 		break;
 	case nvme_admin_abort_cmd:

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800..a358ecd 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c

@@ -305,11 +305,41 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_ns_, device_path);
 
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+					  const char *page, size_t count)
+{
+	struct nvmet_ns *ns = to_nvmet_ns(item);
+	struct nvmet_subsys *subsys = ns->subsys;
+	int ret = 0;
+
+
+	mutex_lock(&subsys->lock);
+	if (ns->enabled) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+
+	if (uuid_parse(page, &ns->uuid))
+		ret = -EINVAL;
+
+out_unlock:
+	mutex_unlock(&subsys->lock);
+	return ret ? ret : count;
+}
+
 static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
 {
 	return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
 }
 
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
 static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
 		const char *page, size_t count)
 {
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
 static struct configfs_attribute *nvmet_ns_attrs[] = {
 	&nvmet_ns_attr_device_path,
 	&nvmet_ns_attr_device_nguid,
+	&nvmet_ns_attr_device_uuid,
 	&nvmet_ns_attr_enable,
 	NULL,
 };
@@ -619,8 +650,45 @@ static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
 
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+					      char *page)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+
+	if (NVME_TERTIARY(subsys->ver))
+		return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver),
+				(int)NVME_TERTIARY(subsys->ver));
+	else
+		return snprintf(page, PAGE_SIZE, "%d.%d\n",
+				(int)NVME_MAJOR(subsys->ver),
+				(int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+					       const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	int major, minor, tertiary = 0;
+	int ret;
+
+
+	ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+	if (ret != 2 && ret != 3)
+		return -EINVAL;
+
+	down_write(&nvmet_config_sem);
+	subsys->ver = NVME_VS(major, minor, tertiary);
+	up_write(&nvmet_config_sem);
+
+	return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
 	&nvmet_subsys_attr_attr_allow_any_host,
+	&nvmet_subsys_attr_version,
 	NULL,
 };
 

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index eb9399a..b5b4ac1 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c

@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 
 	ns->nsid = nsid;
 	ns->subsys = subsys;
+	uuid_gen(&ns->uuid);
 
 	return ns;
 }
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	if (!subsys)
 		return NULL;
 
-	subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+	subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 
 	switch (type) {
 	case NVME_NQN_NVME:

diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597..8f3b57b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c

@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
 	e->portid = port->disc_addr.portid;
 	/* we support only dynamic controllers */
 	e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
-	e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+	e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
 	e->subtype = type;
 	memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
 	memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 	case nvme_admin_identify:
-		req->data_len = 4096;
+		req->data_len = NVME_IDENTIFY_DATA_SIZE;
 		switch (cmd->identify.cns) {
 		case NVME_ID_CNS_CTRL:
 			req->execute =

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae..7692a96 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c

@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	/* clear any response payload */
 	memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
 
+	fod->data_sg = NULL;
+	fod->data_sg_cnt = 0;
+
 	ret = nvmet_req_init(&fod->req,
 				&fod->queue->nvme_cq,
 				&fod->queue->nvme_sq,
 				&nvmet_fc_tgt_fcp_ops);
-	if (!ret) {	/* bad SQE content or invalid ctrl state */
-		nvmet_fc_abort_op(tgtport, fod);
+	if (!ret) {
+		/* bad SQE content or invalid ctrl state */
+		/* nvmet layer has already called op done to send rsp. */
 		return;
 	}
 
 	/* keep a running counter of tail position */
 	atomic_inc(&fod->queue->sqtail);
 
-	fod->data_sg = NULL;
-	fod->data_sg_cnt = 0;
 	if (fod->total_length) {
 		ret = nvmet_fc_alloc_tgt_pgs(fod);
 		if (ret) {

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a661..1bb9d5b 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c

@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 			struct nvmefc_tgt_fcp_req *tgt_fcpreq)
 {
 	struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
-	int active;
 
 	/*
 	 * mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	 * after the abort request
 	 */
 	spin_lock(&tfcp_req->reqlock);
-	active = tfcp_req->active;
 	tfcp_req->aborted = true;
 	spin_unlock(&tfcp_req->reqlock);
 

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d..4012879 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c

@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
 	struct nvmet_req *req = bio->bi_private;
 
 	nvmet_req_complete(req,
-		bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
 
 	if (bio != &req->inline_bio)
 		bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
 		if (status) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 		} else {
 			submit_bio(bio);

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index feb4971..5f55c68 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c

@@ -21,8 +21,6 @@
 #include "../host/nvme.h"
 #include "../host/fabrics.h"
 
-#define NVME_LOOP_AQ_DEPTH		256
-
 #define NVME_LOOP_MAX_SEGMENTS		256
 
 /*
@@ -31,7 +29,7 @@
  */
 #define NVME_LOOP_NR_AEN_COMMANDS	1
 #define NVME_LOOP_AQ_BLKMQ_DEPTH	\
-	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+	(NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
 
 struct nvme_loop_iod {
 	struct nvme_request	nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
 };
 
 struct nvme_loop_ctrl {
-	spinlock_t		lock;
 	struct nvme_loop_queue	*queues;
 	u32			queue_count;
 
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
 
 	struct nvmet_ctrl	*target_ctrl;
 	struct work_struct	delete_work;
-	struct work_struct	reset_work;
 };
 
 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
 
 	/* queue error recovery */
-	schedule_work(&iod->queue->ctrl->reset_work);
+	nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
 
 	/* fail with DNR on admin cmd timeout */
 	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_loop_queue *queue = hctx->driver_data;
 	struct request *req = bd->rq;
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 
 	ret = nvme_setup_cmd(ns, req, &iod->cmd);
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret)
 		return ret;
 
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		nvme_cleanup_cmd(req);
 		blk_mq_start_request(req);
 		nvme_loop_queue_response(&iod->req);
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	}
 
 	if (blk_rq_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
-		ret = sg_alloc_table_chained(&iod->sg_table,
+		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
-				iod->sg_table.sgl);
-		if (ret)
-			return BLK_MQ_RQ_QUEUE_BUSY;
+				iod->sg_table.sgl))
+			return BLK_STS_RESOURCE;
 
 		iod->req.sg = iod->sg_table.sgl;
 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_mq_start_request(req);
 
 	schedule_work(&iod->work);
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 }
 
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
 		struct request *req, unsigned int hctx_idx,
 		unsigned int numa_node)
 {
-	return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
-			hctx_idx + 1);
-}
+	struct nvme_loop_ctrl *ctrl = set->driver_data;
 
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
-		struct request *req, unsigned int hctx_idx,
-		unsigned int numa_node)
-{
-	return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+	return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+			(set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
 }
 
 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
 static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
-	.init_request	= nvme_loop_init_admin_request,
+	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_admin_hctx,
 	.timeout	= nvme_loop_timeout,
 };
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
 		return -EBUSY;
 
-	if (!schedule_work(&ctrl->delete_work))
+	if (!queue_work(nvme_wq, &ctrl->delete_work))
 		return -EBUSY;
 
 	return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
 
 static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 {
-	struct nvme_loop_ctrl *ctrl = container_of(work,
-					struct nvme_loop_ctrl, reset_work);
+	struct nvme_loop_ctrl *ctrl =
+		container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
 	bool changed;
 	int ret;
 
@@ -540,33 +530,16 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-		return -EBUSY;
-
-	if (!schedule_work(&ctrl->reset_work))
-		return -EBUSY;
-
-	flush_work(&ctrl->reset_work);
-
-	return 0;
-}
-
 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 	.name			= "loop",
 	.module			= THIS_MODULE,
-	.is_fabrics		= true,
+	.flags			= NVME_F_FABRICS,
 	.reg_read32		= nvmf_reg_read32,
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
-	.reset_ctrl		= nvme_loop_reset_ctrl,
 	.free_ctrl		= nvme_loop_free_ctrl,
 	.submit_async_event	= nvme_loop_submit_async_event,
 	.delete_ctrl		= nvme_loop_del_ctrl,
-	.get_subsysnqn		= nvmf_get_subsysnqn,
 };
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	INIT_LIST_HEAD(&ctrl->list);
 
 	INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
-	INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+	INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
 
 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
 				0 /* no quirks, we're perfect! */);
 	if (ret)
 		goto out_put_ctrl;
 
-	spin_lock_init(&ctrl->lock);
-
 	ret = -ENOMEM;
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
 		__nvme_loop_del_ctrl(ctrl);
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
-	flush_scheduled_work();
+	flush_workqueue(nvme_wq);
 }
 
 module_init(nvme_loop_init_module);

diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index cfc5c7f..747bbdb 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h

@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/uuid.h>
 #include <linux/nvme.h>
 #include <linux/configfs.h>
 #include <linux/rcupdate.h>
@@ -46,6 +47,7 @@ struct nvmet_ns {
 	u32			blksize_shift;
 	loff_t			size;
 	u8			nguid[16];
+	uuid_t			uuid;
 
 	bool			enabled;
 	struct nvmet_subsys	*subsys;

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde..56a4cba 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c

@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
 	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
-	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+	if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
 		return NVME_RDMA_CM_INVALID_HSQSIZE;
 
 	/* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 
 /**
  * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id:	rdma_cm id, used for nvmet port
  * @queue:      nvmet rdma queue (cm id qp_context)
- * @addr:	nvmet address (cm_id context)
  *
  * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
  *
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
  * we nullify the priv to prevent double cm_id destruction and destroying
  * the cm_id implicitely by returning a non-zero rc to the callout.
  */
 static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
 		struct nvmet_rdma_queue *queue)
 {
-	unsigned long flags;
+	struct nvmet_port *port;
 
-	if (!queue) {
-		struct nvmet_port *port = cm_id->context;
-
+	if (queue) {
 		/*
-		 * This is a listener cm_id. Make sure that
-		 * future remove_port won't invoke a double
-		 * cm_id destroy. use atomic xchg to make sure
-		 * we don't compete with remove_port.
+		 * This is a queue cm_id. we have registered
+		 * an ib_client to handle queues removal
+		 * so don't interfear and just return.
 		 */
-		if (xchg(&port->priv, NULL) != cm_id)
-			return 0;
-	} else {
-		/*
-		 * This is a queue cm_id. Make sure that
-		 * release queue will not destroy the cm_id
-		 * and schedule all ctrl queues removal (only
-		 * if the queue is not disconnecting already).
-		 */
-		spin_lock_irqsave(&queue->state_lock, flags);
-		if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
-			queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
-		spin_unlock_irqrestore(&queue->state_lock, flags);
-		nvmet_rdma_queue_disconnect(queue);
-		flush_scheduled_work();
+		return 0;
 	}
 
+	port = cm_id->context;
+
+	/*
+	 * This is a listener cm_id. Make sure that
+	 * future remove_port won't invoke a double
+	 * cm_id destroy. use atomic xchg to make sure
+	 * we don't compete with remove_port.
+	 */
+	if (xchg(&port->priv, NULL) != cm_id)
+		return 0;
+
 	/*
 	 * We need to return 1 so that the core will destroy
 	 * it's own ID.  What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.delete_ctrl		= nvmet_rdma_delete_ctrl,
 };
 
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+	struct nvmet_rdma_queue *queue;
+
+	/* Device is being removed, delete all queues using this device */
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+		if (queue->dev->device != ib_device)
+			continue;
+
+		pr_info("Removing queue %d\n", queue->idx);
+		__nvmet_rdma_queue_disconnect(queue);
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+	.name   = "nvmet_rdma",
+	.add = nvmet_rdma_add_one,
+	.remove = nvmet_rdma_remove_one
+};
+
 static int __init nvmet_rdma_init(void)
 {
-	return nvmet_register_transport(&nvmet_rdma_ops);
+	int ret;
+
+	ret = ib_register_client(&nvmet_rdma_ib_client);
+	if (ret)
+		return ret;
+
+	ret = nvmet_register_transport(&nvmet_rdma_ops);
+	if (ret)
+		goto err_ib_client;
+
+	return 0;
+
+err_ib_client:
+	ib_unregister_client(&nvmet_rdma_ib_client);
+	return ret;
 }
 
 static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
 	mutex_unlock(&nvmet_rdma_queue_mutex);
 
 	flush_scheduled_work();
+	ib_unregister_client(&nvmet_rdma_ib_client);
 	ida_destroy(&nvmet_rdma_queue_ida);
 }
 

diff --git a/drivers/of/device.c b/drivers/of/device.c
index 9416d05..28c38c7 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c

@@ -144,8 +144,8 @@ int of_dma_configure(struct device *dev, struct device_node *np)
 		coherent ? " " : " not ");
 
 	iommu = of_iommu_configure(dev, np);
-	if (IS_ERR(iommu))
-		return PTR_ERR(iommu);
+	if (IS_ERR(iommu) && PTR_ERR(iommu) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	dev_dbg(dev, "device is%sbehind an iommu\n",
 		iommu ? " " : " not ");

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 71fecc2..703a421 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c

@@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void)
 arch_initcall_sync(of_platform_default_populate_init);
 #endif
 
-static int of_platform_device_destroy(struct device *dev, void *data)
+int of_platform_device_destroy(struct device *dev, void *data)
 {
 	/* Do not touch devices not populated from the device tree */
 	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
@@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_platform_device_destroy);
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 74cf5ff..c80e37a 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c

@@ -896,7 +896,7 @@ int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val)
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
 }
@@ -906,7 +906,7 @@ int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val)
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
 }
@@ -917,7 +917,7 @@ int pci_read_config_dword(const struct pci_dev *dev, int where,
 {
 	if (pci_dev_is_disconnected(dev)) {
 		*val = ~0;
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	}
 	return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
 }
@@ -926,7 +926,7 @@ EXPORT_SYMBOL(pci_read_config_dword);
 int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_byte);
@@ -934,7 +934,7 @@ EXPORT_SYMBOL(pci_write_config_byte);
 int pci_write_config_word(const struct pci_dev *dev, int where, u16 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_word(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_word);
@@ -943,7 +943,7 @@ int pci_write_config_dword(const struct pci_dev *dev, int where,
 					 u32 val)
 {
 	if (pci_dev_is_disconnected(dev))
-		return -ENODEV;
+		return PCIBIOS_DEVICE_NOT_FOUND;
 	return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_dword);

diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index a98cba5..19a289b 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c

@@ -252,7 +252,34 @@ static void imx6_pcie_reset_phy(struct imx6_pcie *imx6_pcie)
 static int imx6q_pcie_abort_handler(unsigned long addr,
 		unsigned int fsr, struct pt_regs *regs)
 {
-	return 0;
+	unsigned long pc = instruction_pointer(regs);
+	unsigned long instr = *(unsigned long *)pc;
+	int reg = (instr >> 12) & 15;
+
+	/*
+	 * If the instruction being executed was a read,
+	 * make it look like it read all-ones.
+	 */
+	if ((instr & 0x0c100000) == 0x04100000) {
+		unsigned long val;
+
+		if (instr & 0x00400000)
+			val = 255;
+		else
+			val = -1;
+
+		regs->uregs[reg] = val;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	if ((instr & 0x0e100090) == 0x00100090) {
+		regs->uregs[reg] = -1;
+		regs->ARM_pc += 4;
+		return 0;
+	}
+
+	return 1;
 }
 
 static void imx6_pcie_assert_core_reset(struct imx6_pcie *imx6_pcie)
@@ -819,8 +846,8 @@ static int __init imx6_pcie_init(void)
 	 * we can install the handler here without risking it
 	 * accessing some uninitialized driver state.
 	 */
-	hook_fault_code(16 + 6, imx6q_pcie_abort_handler, SIGBUS, 0,
-			"imprecise external abort");
+	hook_fault_code(8, imx6q_pcie_abort_handler, SIGBUS, 0,
+			"external abort on non-linefetch");
 
 	return platform_driver_register(&imx6_pcie_driver);
 }

diff --git a/drivers/pci/endpoint/Kconfig b/drivers/pci/endpoint/Kconfig
index c23f146..c09623c 100644
--- a/drivers/pci/endpoint/Kconfig
+++ b/drivers/pci/endpoint/Kconfig

@@ -6,6 +6,7 @@
 
 config PCI_ENDPOINT
 	bool "PCI Endpoint Support"
+	depends on HAS_DMA
 	help
 	   Enable this configuration option to support configurable PCI
 	   endpoint. This should be enabled if the platform has a PCI

diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig
index 175edad..2942066 100644
--- a/drivers/pci/endpoint/functions/Kconfig
+++ b/drivers/pci/endpoint/functions/Kconfig

@@ -5,6 +5,7 @@
 config PCI_EPF_TEST
 	tristate "PCI Endpoint Test driver"
 	depends on PCI_ENDPOINT
+	select CRC32
 	help
 	   Enable this configuration option to enable the test driver
 	   for PCI Endpoint.

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 0018603..47070cf 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c

@@ -21,13 +21,12 @@
 #include "pci.h"
 
 /*
- * The UUID is defined in the PCI Firmware Specification available here:
+ * The GUID is defined in the PCI Firmware Specification available here:
  * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
  */
-const u8 pci_acpi_dsm_uuid[] = {
-	0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
-	0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
-};
+const guid_t pci_acpi_dsm_guid =
+	GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+		  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
 
 #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
 static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
@@ -680,7 +679,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
 	if (!pci_is_root_bus(bus))
 		return;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
 				RESET_DELAY_DSM, NULL);
 	if (!obj)
 		return;
@@ -745,7 +744,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
 	if (bridge->ignore_reset_delay)
 		pdev->d3cold_delay = 0;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
 				FUNCTION_DELAY_DSM, NULL);
 	if (!obj)
 		return;

diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 5135737..2d8db3e 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c

@@ -172,7 +172,7 @@ static int dsm_get_label(struct device *dev, char *buf,
 	if (!handle)
 		return -1;
 
-	obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				DEVICE_LABEL_DSM, NULL);
 	if (!obj)
 		return -1;
@@ -212,7 +212,7 @@ static bool device_has_dsm(struct device *dev)
 	if (!handle)
 		return false;
 
-	return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+	return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
 				1 << DEVICE_LABEL_DSM);
 }
 

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b01bd5b..563901c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c

@@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
 
 	if (!pm_runtime_suspended(dev)
 	    || pci_target_state(pci_dev) != pci_dev->current_state
-	    || platform_pci_need_resume(pci_dev))
+	    || platform_pci_need_resume(pci_dev)
+	    || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME))
 		return false;
 
 	/*

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index cc6e085..f6a6340 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c

@@ -1291,7 +1291,6 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 	cdev = &stdev->cdev;
 	cdev_init(cdev, &switchtec_fops);
 	cdev->owner = THIS_MODULE;
-	cdev->kobj.parent = &dev->kobj;
 
 	return stdev;
 
@@ -1442,12 +1441,15 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
 	stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET;
 	stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET;
 	stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET;
-	stdev->partition = ioread8(&stdev->mmio_ntb->partition_id);
+	stdev->partition = ioread8(&stdev->mmio_sys_info->partition_id);
 	stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count);
 	stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET;
 	stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition];
 	stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET;
 
+	if (stdev->partition_count < 1)
+		stdev->partition_count = 1;
+
 	init_pff(stdev);
 
 	pci_set_drvdata(pdev, stdev);
@@ -1479,11 +1481,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 		  SWITCHTEC_EVENT_EN_IRQ,
 		  &stdev->mmio_part_cfg->mrpc_comp_hdr);
 
-	rc = cdev_add(&stdev->cdev, stdev->dev.devt, 1);
-	if (rc)
-		goto err_put;
-
-	rc = device_add(&stdev->dev);
+	rc = cdev_device_add(&stdev->cdev, &stdev->dev);
 	if (rc)
 		goto err_devadd;
 
@@ -1492,7 +1490,6 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	return 0;
 
 err_devadd:
-	cdev_del(&stdev->cdev);
 	stdev_kill(stdev);
 err_put:
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
@@ -1506,8 +1503,7 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 
-	device_del(&stdev->dev);
-	cdev_del(&stdev->cdev);
+	cdev_device_del(&stdev->cdev, &stdev->dev);
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 34c862f..0a9b787 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c

@@ -29,6 +29,17 @@ static int arm_pmu_acpi_register_irq(int cpu)
 		return -EINVAL;
 
 	gsi = gicc->performance_interrupt;
+
+	/*
+	 * Per the ACPI spec, the MADT cannot describe a PMU that doesn't
+	 * have an interrupt. QEMU advertises this by using a GSI of zero,
+	 * which is not known to be valid on any hardware despite being
+	 * valid per the spec. Take the pragmatic approach and reject a
+	 * GSI of zero for now.
+	 */
+	if (!gsi)
+		return 0;
+
 	if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE)
 		trigger = ACPI_EDGE_SENSITIVE;
 	else

diff --git a/drivers/phy/phy-qcom-qmp.c b/drivers/phy/phy-qcom-qmp.c
index 727e23b..78ca628 100644
--- a/drivers/phy/phy-qcom-qmp.c
+++ b/drivers/phy/phy-qcom-qmp.c

@@ -844,7 +844,7 @@ static int qcom_qmp_phy_vreg_init(struct device *dev)
 	int num = qmp->cfg->num_vregs;
 	int i;
 
-	qmp->vregs = devm_kcalloc(dev, num, sizeof(qmp->vregs), GFP_KERNEL);
+	qmp->vregs = devm_kcalloc(dev, num, sizeof(*qmp->vregs), GFP_KERNEL);
 	if (!qmp->vregs)
 		return -ENOMEM;
 
@@ -983,16 +983,16 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id)
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 */
 	qphy->tx = of_iomap(np, 0);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	if (!qphy->tx)
+		return -ENOMEM;
 
 	qphy->rx = of_iomap(np, 1);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+	if (!qphy->rx)
+		return -ENOMEM;
 
 	qphy->pcs = of_iomap(np, 2);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	if (!qphy->pcs)
+		return -ENOMEM;
 
 	/*
 	 * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 1653cbd..bd459a9 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c

@@ -680,30 +680,16 @@ EXPORT_SYMBOL_GPL(pinctrl_generic_remove_group);
  * pinctrl_generic_free_groups() - removes all pin groups
  * @pctldev: pin controller device
  *
- * Note that the caller must take care of locking.
+ * Note that the caller must take care of locking. The pinctrl groups
+ * are allocated with devm_kzalloc() so no need to free them here.
  */
 static void pinctrl_generic_free_groups(struct pinctrl_dev *pctldev)
 {
 	struct radix_tree_iter iter;
-	struct group_desc *group;
-	unsigned long *indices;
 	void **slot;
-	int i = 0;
-
-	indices = devm_kzalloc(pctldev->dev, sizeof(*indices) *
-			       pctldev->num_groups, GFP_KERNEL);
-	if (!indices)
-		return;
 
 	radix_tree_for_each_slot(slot, &pctldev->pin_group_tree, &iter, 0)
-		indices[i++] = iter.index;
-
-	for (i = 0; i < pctldev->num_groups; i++) {
-		group = radix_tree_lookup(&pctldev->pin_group_tree,
-					  indices[i]);
-		radix_tree_delete(&pctldev->pin_group_tree, indices[i]);
-		devm_kfree(pctldev->dev, group);
-	}
+		radix_tree_delete(&pctldev->pin_group_tree, iter.index);
 
 	pctldev->num_groups = 0;
 }

diff --git a/drivers/pinctrl/freescale/pinctrl-mxs.c b/drivers/pinctrl/freescale/pinctrl-mxs.c
index 41b5b07..6852010 100644
--- a/drivers/pinctrl/freescale/pinctrl-mxs.c
+++ b/drivers/pinctrl/freescale/pinctrl-mxs.c

@@ -194,6 +194,16 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
+static void mxs_pinctrl_rmwl(u32 value, u32 mask, u8 shift, void __iomem *reg)
+{
+	u32 tmp;
+
+	tmp = readl(reg);
+	tmp &= ~(mask << shift);
+	tmp |= value << shift;
+	writel(tmp, reg);
+}
+
 static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 			       unsigned group)
 {
@@ -211,8 +221,7 @@ static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 		reg += bank * 0x20 + pin / 16 * 0x10;
 		shift = pin % 16 * 2;
 
-		writel(0x3 << shift, reg + CLR);
-		writel(g->muxsel[i] << shift, reg + SET);
+		mxs_pinctrl_rmwl(g->muxsel[i], 0x3, shift, reg);
 	}
 
 	return 0;
@@ -279,8 +288,7 @@ static int mxs_pinconf_group_set(struct pinctrl_dev *pctldev,
 			/* mA */
 			if (config & MA_PRESENT) {
 				shift = pin % 8 * 4;
-				writel(0x3 << shift, reg + CLR);
-				writel(ma << shift, reg + SET);
+				mxs_pinctrl_rmwl(ma, 0x3, shift, reg);
 			}
 
 			/* vol */

diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 2debba6..20f1b44 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c

@@ -1539,15 +1539,29 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
  * is not listed below.
  */
 static const struct dmi_system_id chv_no_valid_mask[] = {
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
 	{
-		/* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
-		.ident = "Acer Chromebook (CYAN)",
+		.ident = "Intel_Strago based Chromebooks (All models)",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
-			DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"),
+			DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
 		},
-	}
+	},
+	{
+		.ident = "Acer Chromebook R11 (Cyan)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
+		},
+	},
+	{
+		.ident = "Samsung Chromebook 3 (Celes)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
+		},
+	},
+	{}
 };
 
 static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 0d6b7f4..720a19f 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c

@@ -35,7 +35,6 @@ static const struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 				"input bias pull to pin specific state", NULL, false),
 	PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false),
-	PCONFDUMP(PIN_CONFIG_BIDIRECTIONAL, "bi-directional pin operations", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false),
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false),
@@ -161,7 +160,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 },
 	{ "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 },
 	{ "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 },
-	{ "bi-directional", PIN_CONFIG_BIDIRECTIONAL, 1 },
 	{ "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 },
 	{ "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 },
 	{ "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 },
@@ -174,7 +172,6 @@ static const struct pinconf_generic_params dt_params[] = {
 	{ "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 },
 	{ "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
 	{ "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
-	{ "output-enable", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-high", PIN_CONFIG_OUTPUT, 1, },
 	{ "output-low", PIN_CONFIG_OUTPUT, 0, },
 	{ "power-source", PIN_CONFIG_POWER_SOURCE, 0 },

diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 1482d13..e432ec8 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c

@@ -495,64 +495,54 @@ static struct irq_chip amd_gpio_irqchip = {
 	.flags        = IRQCHIP_SKIP_SET_WAKE,
 };
 
-static void amd_gpio_irq_handler(struct irq_desc *desc)
+#define PIN_IRQ_PENDING	(BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
+
+static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
 {
-	u32 i;
-	u32 off;
-	u32 reg;
-	u32 pin_reg;
-	u64 reg64;
-	int handled = 0;
-	unsigned int irq;
+	struct amd_gpio *gpio_dev = dev_id;
+	struct gpio_chip *gc = &gpio_dev->gc;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned int i, irqnr;
 	unsigned long flags;
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	struct gpio_chip *gc = irq_desc_get_handler_data(desc);
-	struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+	u32 *regs, regval;
+	u64 status, mask;
 
-	chained_irq_enter(chip, desc);
-	/*enable GPIO interrupt again*/
+	/* Read the wake status */
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-	reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
-	reg64 = reg;
-	reg64 = reg64 << 32;
-
-	reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
-	reg64 |= reg;
+	status = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
+	status <<= 32;
+	status |= readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-	/*
-	 * first 46 bits indicates interrupt status.
-	 * one bit represents four interrupt sources.
-	*/
-	for (off = 0; off < 46 ; off++) {
-		if (reg64 & BIT(off)) {
-			for (i = 0; i < 4; i++) {
-				pin_reg = readl(gpio_dev->base +
-						(off * 4 + i) * 4);
-				if ((pin_reg & BIT(INTERRUPT_STS_OFF)) ||
-					(pin_reg & BIT(WAKE_STS_OFF))) {
-					irq = irq_find_mapping(gc->irqdomain,
-								off * 4 + i);
-					generic_handle_irq(irq);
-					writel(pin_reg,
-						gpio_dev->base
-						+ (off * 4 + i) * 4);
-					handled++;
-				}
-			}
+	/* Bit 0-45 contain the relevant status bits */
+	status &= (1ULL << 46) - 1;
+	regs = gpio_dev->base;
+	for (mask = 1, irqnr = 0; status; mask <<= 1, regs += 4, irqnr += 4) {
+		if (!(status & mask))
+			continue;
+		status &= ~mask;
+
+		/* Each status bit covers four pins */
+		for (i = 0; i < 4; i++) {
+			regval = readl(regs + i);
+			if (!(regval & PIN_IRQ_PENDING))
+				continue;
+			irq = irq_find_mapping(gc->irqdomain, irqnr + i);
+			generic_handle_irq(irq);
+			/* Clear interrupt */
+			writel(regval, regs + i);
+			ret = IRQ_HANDLED;
 		}
 	}
 
-	if (handled == 0)
-		handle_bad_irq(desc);
-
+	/* Signal EOI to the GPIO unit */
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-	reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
-	reg |= EOI_MASK;
-	writel(reg, gpio_dev->base + WAKE_INT_MASTER_REG);
+	regval = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
+	regval |= EOI_MASK;
+	writel(regval, gpio_dev->base + WAKE_INT_MASTER_REG);
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-	chained_irq_exit(chip, desc);
+	return ret;
 }
 
 static int amd_get_groups_count(struct pinctrl_dev *pctldev)
@@ -821,10 +811,11 @@ static int amd_gpio_probe(struct platform_device *pdev)
 		goto out2;
 	}
 
-	gpiochip_set_chained_irqchip(&gpio_dev->gc,
-				 &amd_gpio_irqchip,
-				 irq_base,
-				 amd_gpio_irq_handler);
+	ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler, 0,
+			       KBUILD_MODNAME, gpio_dev);
+	if (ret)
+		goto out2;
+
 	platform_set_drvdata(pdev, gpio_dev);
 
 	dev_dbg(&pdev->dev, "amd gpio driver loaded\n");

diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index f141aa0..9dd981d 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c

@@ -143,9 +143,6 @@ struct rockchip_drv {
  * @gpio_chip: gpiolib chip
  * @grange: gpio range
  * @slock: spinlock for the gpio bank
- * @irq_lock: bus lock for irq chip
- * @new_irqs: newly configured irqs which must be muxed as GPIOs in
- *	irq_bus_sync_unlock()
  */
 struct rockchip_pin_bank {
 	void __iomem			*reg_base;
@@ -168,8 +165,6 @@ struct rockchip_pin_bank {
 	struct pinctrl_gpio_range	grange;
 	raw_spinlock_t			slock;
 	u32				toggle_edge_mode;
-	struct mutex			irq_lock;
-	u32				new_irqs;
 };
 
 #define PIN_BANK(id, pins, label)			\
@@ -2134,12 +2129,11 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 	int ret;
 
 	/* make sure the pin is configured as gpio input */
-	ret = rockchip_verify_mux(bank, d->hwirq, RK_FUNC_GPIO);
+	ret = rockchip_set_mux(bank, d->hwirq, RK_FUNC_GPIO);
 	if (ret < 0)
 		return ret;
 
-	bank->new_irqs |= mask;
-
+	clk_enable(bank->clk);
 	raw_spin_lock_irqsave(&bank->slock, flags);
 
 	data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -2197,6 +2191,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 	default:
 		irq_gc_unlock(gc);
 		raw_spin_unlock_irqrestore(&bank->slock, flags);
+		clk_disable(bank->clk);
 		return -EINVAL;
 	}
 
@@ -2205,6 +2200,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 
 	irq_gc_unlock(gc);
 	raw_spin_unlock_irqrestore(&bank->slock, flags);
+	clk_disable(bank->clk);
 
 	return 0;
 }
@@ -2248,34 +2244,6 @@ static void rockchip_irq_disable(struct irq_data *d)
 	clk_disable(bank->clk);
 }
 
-static void rockchip_irq_bus_lock(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct rockchip_pin_bank *bank = gc->private;
-
-	clk_enable(bank->clk);
-	mutex_lock(&bank->irq_lock);
-}
-
-static void rockchip_irq_bus_sync_unlock(struct irq_data *d)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct rockchip_pin_bank *bank = gc->private;
-
-	while (bank->new_irqs) {
-		unsigned int irq = __ffs(bank->new_irqs);
-		int ret;
-
-		ret = rockchip_set_mux(bank, irq, RK_FUNC_GPIO);
-		WARN_ON(ret < 0);
-
-		bank->new_irqs &= ~BIT(irq);
-	}
-
-	mutex_unlock(&bank->irq_lock);
-	clk_disable(bank->clk);
-}
-
 static int rockchip_interrupts_register(struct platform_device *pdev,
 						struct rockchip_pinctrl *info)
 {
@@ -2342,9 +2310,6 @@ static int rockchip_interrupts_register(struct platform_device *pdev,
 		gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
 		gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
 		gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
-		gc->chip_types[0].chip.irq_bus_lock = rockchip_irq_bus_lock;
-		gc->chip_types[0].chip.irq_bus_sync_unlock =
-						rockchip_irq_bus_sync_unlock;
 		gc->wake_enabled = IRQ_MSK(bank->nr_pins);
 
 		irq_set_chained_handler_and_data(bank->irq,
@@ -2518,7 +2483,6 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
 		int bank_pins = 0;
 
 		raw_spin_lock_init(&bank->slock);
-		mutex_init(&bank->irq_lock);
 		bank->drvdata = d;
 		bank->pin_base = ctrl->nr_pins;
 		ctrl->nr_pins += bank->nr_pins;

diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 9fd6d90..16b3ae5 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c

@@ -826,30 +826,17 @@ EXPORT_SYMBOL_GPL(pinmux_generic_remove_function);
  * pinmux_generic_free_functions() - removes all functions
  * @pctldev: pin controller device
  *
- * Note that the caller must take care of locking.
+ * Note that the caller must take care of locking. The pinctrl
+ * functions are allocated with devm_kzalloc() so no need to free
+ * them here.
  */
 void pinmux_generic_free_functions(struct pinctrl_dev *pctldev)
 {
 	struct radix_tree_iter iter;
-	struct function_desc *function;
-	unsigned long *indices;
 	void **slot;
-	int i = 0;
-
-	indices = devm_kzalloc(pctldev->dev, sizeof(*indices) *
-			       pctldev->num_functions, GFP_KERNEL);
-	if (!indices)
-		return;
 
 	radix_tree_for_each_slot(slot, &pctldev->pin_function_tree, &iter, 0)
-		indices[i++] = iter.index;
-
-	for (i = 0; i < pctldev->num_functions; i++) {
-		function = radix_tree_lookup(&pctldev->pin_function_tree,
-					     indices[i]);
-		radix_tree_delete(&pctldev->pin_function_tree, indices[i]);
-		devm_kfree(pctldev->dev, function);
-	}
+		radix_tree_delete(&pctldev->pin_function_tree, iter.index);
 
 	pctldev->num_functions = 0;
 }

diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index d3c5f5d..222b668 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c

@@ -798,7 +798,7 @@ static int stm32_pconf_parse_conf(struct pinctrl_dev *pctldev,
 		break;
 	case PIN_CONFIG_OUTPUT:
 		__stm32_gpio_set(bank, offset, arg);
-		ret = stm32_pmx_gpio_set_direction(pctldev, NULL, pin, false);
+		ret = stm32_pmx_gpio_set_direction(pctldev, range, pin, false);
 		break;
 	default:
 		ret = -EINVAL;

diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
index 9aec1d2..6624499 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c

@@ -394,7 +394,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = {
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 18),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x3, "owa")),		/* DOUT */
+		  SUNXI_FUNCTION(0x3, "spdif")),	/* DOUT */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 19),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out")),

diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 2de1e60..5f36721 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c

@@ -704,7 +704,7 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
 		/* Reallocate the array */
 		u32 new_capacity = 2 * dev->pipes_capacity;
 		struct goldfish_pipe **pipes =
-			kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL);
+			kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC);
 		if (!pipes)
 			return -ENOMEM;
 		memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index ef29f18..4cc2f4e 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c

@@ -97,11 +97,9 @@
 	} \
 }
 
-#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
-#endif
 
 struct telemetry_susp_stats {
 	u32 shlw_swake_ctr;
@@ -807,7 +805,6 @@ static const struct file_operations telem_ioss_trc_verb_ops = {
 	.release	= single_release,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int pm_suspend_prep_cb(void)
 {
 	struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
@@ -937,7 +934,6 @@ static int pm_notification(struct notifier_block *this,
 static struct notifier_block pm_notifier = {
 	.notifier_call = pm_notification,
 };
-#endif /* CONFIG_PM_SLEEP */
 
 static int __init telemetry_debugfs_init(void)
 {
@@ -960,14 +956,13 @@ static int __init telemetry_debugfs_init(void)
 	if (err < 0)
 		return -EINVAL;
 
-
-#ifdef CONFIG_PM_SLEEP
 	register_pm_notifier(&pm_notifier);
-#endif /* CONFIG_PM_SLEEP */
 
 	debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-	if (!debugfs_conf->telemetry_dbg_dir)
-		return -ENOMEM;
+	if (!debugfs_conf->telemetry_dbg_dir) {
+		err = -ENOMEM;
+		goto out_pm;
+	}
 
 	f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
@@ -1014,6 +1009,8 @@ static int __init telemetry_debugfs_init(void)
 out:
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+out_pm:
+	unregister_pm_notifier(&pm_notifier);
 
 	return err;
 }
@@ -1022,6 +1019,7 @@ static void __exit telemetry_debugfs_exit(void)
 {
 	debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
 	debugfs_conf->telemetry_dbg_dir = NULL;
+	unregister_pm_notifier(&pm_notifier);
 }
 
 late_initcall(telemetry_debugfs_init);

diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 14bde0d..5b10b50 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c

@@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone(
 
 	power_zone->id = result;
 	idr_init(&power_zone->idr);
+	result = -ENOMEM;
 	power_zone->name = kstrdup(name, GFP_KERNEL);
 	if (!power_zone->name)
 		goto err_name_alloc;

diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
index 35ce53e..d5e5229 100644
--- a/drivers/reset/hisilicon/hi6220_reset.c
+++ b/drivers/reset/hisilicon/hi6220_reset.c

@@ -155,3 +155,5 @@ static int __init hi6220_reset_init(void)
 }
 
 postcore_initcall(hi6220_reset_init);
+
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index b3de973..9dca53d 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c

@@ -1088,7 +1088,7 @@ static u32 rtc_handler(void *context)
 	}
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	pm_wakeup_event(dev, 0);
+	pm_wakeup_hard_event(dev);
 	acpi_clear_event(ACPI_EVENT_RTC);
 	acpi_disable_event(ACPI_EVENT_RTC, 0);
 	return ACPI_INTERRUPT_HANDLED;

diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 6b54f6c..8093111 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c

@@ -709,7 +709,7 @@ static irqreturn_t dryice_irq(int irq, void *dev_id)
 		/*If the write wait queue is empty then there is no pending
 		  operations. It means the interrupt is for DryIce -Security.
 		  IRQ must be returned as none.*/
-		if (list_empty_careful(&imxdi->write_wait.task_list))
+		if (list_empty_careful(&imxdi->write_wait.head))
 			return rc;
 
 		/* DSR_WCF clears itself on DSR read */

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5..b7cbd5d 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c

@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	 */
 	if (basedev->state < DASD_STATE_READY) {
 		while ((req = blk_fetch_request(block->request_queue)))
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 		return;
 	}
 
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting failfast request %p",
 				      req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -ETIMEDOUT);
+			__blk_end_request_all(req, BLK_STS_TIMEOUT);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blk_start_request(req);
-			__blk_end_request_all(req, -EIO);
+			__blk_end_request_all(req, BLK_STS_IOERR);
 			continue;
 		}
 		/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 {
 	struct request *req;
 	int status;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	req = (struct request *) cqr->callback_data;
 	dasd_profile_end(cqr->block, cqr, req);
+
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status < 0)
-		error = status;
+		error = errno_to_blk_status(status);
 	else if (status == 0) {
-		if (cqr->intrc == -EPERM)
-			error = -EBADE;
-		else if (cqr->intrc == -ENOLINK ||
-			 cqr->intrc == -ETIMEDOUT)
-			error = cqr->intrc;
-		else
-			error = -EIO;
+		switch (cqr->intrc) {
+		case -EPERM:
+			error = BLK_STS_NEXUS;
+			break;
+		case -ENOLINK:
+			error = BLK_STS_TRANSPORT;
+			break;
+		case -ETIMEDOUT:
+			error = BLK_STS_TIMEOUT;
+			break;
+		default:
+			error = BLK_STS_IOERR;
+			break;
+		}
 	}
 	__blk_end_request_all(req, error);
 }
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = blk_fetch_request(block->request_queue)))
-		__blk_end_request_all(req, -EIO);
+		__blk_end_request_all(req, BLK_STS_IOERR);
 	spin_unlock_irq(&block->request_queue_lock);
 }
 

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280..06eb1de 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c

@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de68..3c2c84b 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c

@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	aob->request.data = (u64) aobrq;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
-	scmrq->error = 0;
+	scmrq->error = BLK_STS_OK;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
 	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 {
 	struct aob *aob = scmrq->aob;
 
-	if (scmrq->error == -ETIMEDOUT)
+	if (scmrq->error == BLK_STS_TIMEOUT)
 		SCM_LOG(1, "Request timeout");
 	else {
 		SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 		       scmrq->error);
 }
 
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 {
 	struct scm_request *scmrq = data;
 	struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	unsigned long flags;
 
-	if (scmrq->error != -EIO)
+	if (scmrq->error != BLK_STS_IOERR)
 		goto restart;
 
 	/* For -EIO the response block is valid. */

diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cd..cd598d1 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h

@@ -35,7 +35,7 @@ struct scm_request {
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-	int error;
+	blk_status_t error;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
 	struct {
 		enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
 void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
 
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);

diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e75..a48f0d4 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c

@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
 	unsigned long page_addr;
 	unsigned long bytes;
 
-	blk_queue_split(q, &bio, q->bio_split);
+	blk_queue_split(q, &bio);
 
 	if ((bio->bi_iter.bi_sector & 7) != 0 ||
 	    (bio->bi_iter.bi_size & 4095) != 0)

diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc..0f11f3b 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c

@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 	struct eadm_private *private = get_eadm_private(sch);
 	struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
 	struct irb *irb = this_cpu_ptr(&cio_irb);
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 
 	EADM_LOG(6, "irq");
 	EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 
 	if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
 	    && scsw->eswf == 1 && irb->esw.eadm.erw.r)
-		error = -EIO;
+		error = BLK_STS_IOERR;
 
 	if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
-		error = -ETIMEDOUT;
+		error = BLK_STS_TIMEOUT;
 
 	eadm_subchannel_set_timeout(sch, 0);
 

diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268ed..1fa53ec 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c

@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
 }
 EXPORT_SYMBOL_GPL(scm_driver_unregister);
 
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
 {
 	struct aob_rq_header *aobrq = (void *) aob->request.data;
 	struct scm_device *scmdev = aobrq->scmdev;

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index e72abbc..a66a317 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c

@@ -70,14 +70,14 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
 {
 	return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
 }
-MDEV_TYPE_ATTR_RO(name);
+static MDEV_TYPE_ATTR_RO(name);
 
 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
 			       char *buf)
 {
 	return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
 }
-MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(device_api);
 
 static ssize_t available_instances_show(struct kobject *kobj,
 					struct device *dev, char *buf)
@@ -86,7 +86,7 @@ static ssize_t available_instances_show(struct kobject *kobj,
 
 	return sprintf(buf, "%d\n", atomic_read(&private->avail));
 }
-MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(available_instances);
 
 static struct attribute *mdev_types_attrs[] = {
 	&mdev_type_attr_name.attr,
@@ -100,7 +100,7 @@ static struct attribute_group mdev_type_group = {
 	.attrs = mdev_types_attrs,
 };
 
-struct attribute_group *mdev_type_groups[] = {
+static struct attribute_group *mdev_type_groups[] = {
 	&mdev_type_group,
 	NULL,
 };
@@ -152,7 +152,7 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
 				      &events, &private->nb);
 }
 
-void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
@@ -233,7 +233,7 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
 	}
 }
 
-int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
 {
 	if (info->index != VFIO_CCW_IO_IRQ_INDEX)
 		return -EINVAL;

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 9be4596..ea09991 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c

@@ -668,10 +668,28 @@ static int ap_device_probe(struct device *dev)
 	struct ap_driver *ap_drv = to_ap_drv(dev->driver);
 	int rc;
 
+	/* Add queue/card to list of active queues/cards */
+	spin_lock_bh(&ap_list_lock);
+	if (is_card_dev(dev))
+		list_add(&to_ap_card(dev)->list, &ap_card_list);
+	else
+		list_add(&to_ap_queue(dev)->list,
+			 &to_ap_queue(dev)->card->queues);
+	spin_unlock_bh(&ap_list_lock);
+
 	ap_dev->drv = ap_drv;
 	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
-	if (rc)
+
+	if (rc) {
+		spin_lock_bh(&ap_list_lock);
+		if (is_card_dev(dev))
+			list_del_init(&to_ap_card(dev)->list);
+		else
+			list_del_init(&to_ap_queue(dev)->list);
+		spin_unlock_bh(&ap_list_lock);
 		ap_dev->drv = NULL;
+	}
+
 	return rc;
 }
 
@@ -680,14 +698,17 @@ static int ap_device_remove(struct device *dev)
 	struct ap_device *ap_dev = to_ap_dev(dev);
 	struct ap_driver *ap_drv = ap_dev->drv;
 
+	if (ap_drv->remove)
+		ap_drv->remove(ap_dev);
+
+	/* Remove queue/card from list of active queues/cards */
 	spin_lock_bh(&ap_list_lock);
 	if (is_card_dev(dev))
 		list_del_init(&to_ap_card(dev)->list);
 	else
 		list_del_init(&to_ap_queue(dev)->list);
 	spin_unlock_bh(&ap_list_lock);
-	if (ap_drv->remove)
-		ap_drv->remove(ap_dev);
+
 	return 0;
 }
 
@@ -1056,10 +1077,6 @@ static void ap_scan_bus(struct work_struct *unused)
 				}
 				/* get it and thus adjust reference counter */
 				get_device(&ac->ap_dev.device);
-				/* Add card device to card list */
-				spin_lock_bh(&ap_list_lock);
-				list_add(&ac->list, &ap_card_list);
-				spin_unlock_bh(&ap_list_lock);
 			}
 			/* now create the new queue device */
 			aq = ap_queue_create(qid, type);
@@ -1070,10 +1087,6 @@ static void ap_scan_bus(struct work_struct *unused)
 			aq->ap_dev.device.parent = &ac->ap_dev.device;
 			dev_set_name(&aq->ap_dev.device,
 				     "%02x.%04x", id, dom);
-			/* Add queue device to card queue list */
-			spin_lock_bh(&ap_list_lock);
-			list_add(&aq->list, &ac->queues);
-			spin_unlock_bh(&ap_list_lock);
 			/* Start with a device reset */
 			spin_lock_bh(&aq->lock);
 			ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
@@ -1081,9 +1094,6 @@ static void ap_scan_bus(struct work_struct *unused)
 			/* Register device */
 			rc = device_register(&aq->ap_dev.device);
 			if (rc) {
-				spin_lock_bh(&ap_list_lock);
-				list_del_init(&aq->list);
-				spin_unlock_bh(&ap_list_lock);
 				put_device(&aq->ap_dev.device);
 				continue;
 			}

diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c
index cfa161c..836efac9 100644
--- a/drivers/s390/crypto/ap_card.c
+++ b/drivers/s390/crypto/ap_card.c

@@ -160,7 +160,14 @@ static struct device_type ap_card_type = {
 
 static void ap_card_device_release(struct device *dev)
 {
-	kfree(to_ap_card(dev));
+	struct ap_card *ac = to_ap_card(dev);
+
+	if (!list_empty(&ac->list)) {
+		spin_lock_bh(&ap_list_lock);
+		list_del_init(&ac->list);
+		spin_unlock_bh(&ap_list_lock);
+	}
+	kfree(ac);
 }
 
 struct ap_card *ap_card_create(int id, int queue_depth, int device_type,

diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 480c58a..0f1a5d0 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c

@@ -584,7 +584,14 @@ static struct device_type ap_queue_type = {
 
 static void ap_queue_device_release(struct device *dev)
 {
-	kfree(to_ap_queue(dev));
+	struct ap_queue *aq = to_ap_queue(dev);
+
+	if (!list_empty(&aq->list)) {
+		spin_lock_bh(&ap_list_lock);
+		list_del_init(&aq->list);
+		spin_unlock_bh(&ap_list_lock);
+	}
+	kfree(aq);
 }
 
 struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)

diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index dba94b4..fa732bd 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c

@@ -1954,7 +1954,6 @@ static void netiucv_free_netdevice(struct net_device *dev)
 		privptr->conn = NULL; privptr->fsm = NULL;
 		/* privptr gets freed by free_netdev() */
 	}
-	free_netdev(dev);
 }
 
 /**
@@ -1972,7 +1971,8 @@ static void netiucv_setup_netdevice(struct net_device *dev)
 	dev->mtu	         = NETIUCV_MTU_DEFAULT;
 	dev->min_mtu		 = 576;
 	dev->max_mtu		 = NETIUCV_MTU_MAX;
-	dev->destructor          = netiucv_free_netdevice;
+	dev->needs_free_netdev   = true;
+	dev->priv_destructor     = netiucv_free_netdevice;
 	dev->hard_header_len     = NETIUCV_HDRLEN;
 	dev->addr_len            = 0;
 	dev->type                = ARPHRD_SLIP;

diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9d..14f377a 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c

@@ -214,7 +214,7 @@ static void jsfd_request(void)
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
 		size_t len = blk_rq_cur_bytes(req);
-		int err = -EIO;
+		blk_status_t err = BLK_STS_IOERR;
 
 		if ((offset + len) > jdp->dsize)
 			goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
 		}
 
 		jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-		err = 0;
+		err = BLK_STS_OK;
 	end:
 		if (!__blk_end_request_cur(req, err))
 			req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
 			put_disk(disk);
 			goto out;
 		}
+		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		jsfd_disk[i] = disk;
 	}
 

diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index 4fc8ed5..1f424e4 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h

@@ -191,6 +191,7 @@ struct bnx2fc_hba {
 	struct bnx2fc_cmd_mgr *cmd_mgr;
 	spinlock_t hba_lock;
 	struct mutex hba_mutex;
+	struct mutex hba_stats_mutex;
 	unsigned long adapter_state;
 		#define ADAPTER_STATE_UP		0
 		#define ADAPTER_STATE_GOING_DOWN	1

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 93b5a00..902722d 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c

@@ -663,15 +663,17 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 	if (!fw_stats)
 		return NULL;
 
+	mutex_lock(&hba->hba_stats_mutex);
+
 	bnx2fc_stats = fc_get_host_stats(shost);
 
 	init_completion(&hba->stat_req_done);
 	if (bnx2fc_send_stat_req(hba))
-		return bnx2fc_stats;
+		goto unlock_stats_mutex;
 	rc = wait_for_completion_timeout(&hba->stat_req_done, (2 * HZ));
 	if (!rc) {
 		BNX2FC_HBA_DBG(lport, "FW stat req timed out\n");
-		return bnx2fc_stats;
+		goto unlock_stats_mutex;
 	}
 	BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt);
 	bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt;
@@ -693,6 +695,9 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
 
 	memcpy(&hba->prev_stats, hba->stats_buffer,
 	       sizeof(struct fcoe_statistics_params));
+
+unlock_stats_mutex:
+	mutex_unlock(&hba->hba_stats_mutex);
 	return bnx2fc_stats;
 }
 
@@ -1340,6 +1345,7 @@ static struct bnx2fc_hba *bnx2fc_hba_create(struct cnic_dev *cnic)
 	}
 	spin_lock_init(&hba->hba_lock);
 	mutex_init(&hba->hba_mutex);
+	mutex_init(&hba->hba_stats_mutex);
 
 	hba->cnic = cnic;
 

diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 622bdab..dab195f 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c

@@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 		goto bye;
 	}
 
-	mempool_free(mbp, hw->mb_mempool);
 	if (finicsum != cfcsum) {
 		csio_warn(hw,
 		      "Config File checksum mismatch: csum=%#x, computed=%#x\n",
@@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param)
 	rv = csio_hw_validate_caps(hw, mbp);
 	if (rv != 0)
 		goto bye;
+
+	mempool_free(mbp, hw->mb_mempool);
+	mbp = NULL;
+
 	/*
 	 * Note that we're operating with parameters
 	 * not supplied by the driver, rather than from hard-wired

diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 1076c15..0aae094 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c

@@ -1595,7 +1595,6 @@ static void release_offload_resources(struct cxgbi_sock *csk)
 		cxgbi_sock_put(csk);
 	}
 	csk->dst = NULL;
-	csk->cdev = NULL;
 }
 
 static int init_act_open(struct cxgbi_sock *csk)

diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index bd7d39e..e4c83b7 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c

@@ -867,7 +867,8 @@ static void need_active_close(struct cxgbi_sock *csk)
 	log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
 		csk, (csk)->state, (csk)->flags, (csk)->tid);
 	spin_lock_bh(&csk->lock);
-	dst_confirm(csk->dst);
+	if (csk->dst)
+		dst_confirm(csk->dst);
 	data_lost = skb_queue_len(&csk->receive_queue);
 	__skb_queue_purge(&csk->receive_queue);
 
@@ -882,7 +883,8 @@ static void need_active_close(struct cxgbi_sock *csk)
 	}
 
 	if (close_req) {
-		if (data_lost)
+		if (!cxgbi_sock_flag(csk, CTPF_LOGOUT_RSP_RCVD) ||
+		    data_lost)
 			csk->cdev->csk_send_abort_req(csk);
 		else
 			csk->cdev->csk_send_close_req(csk);
@@ -1186,9 +1188,10 @@ static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb)
 				cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
 		skb = next;
 	}
-done:
+
 	if (likely(skb_queue_len(&csk->write_queue)))
 		cdev->csk_push_tx_frames(csk, 1);
+done:
 	spin_unlock_bh(&csk->lock);
 	return copied;
 
@@ -1568,9 +1571,12 @@ static inline int read_pdu_skb(struct iscsi_conn *conn,
 	}
 }
 
-static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
+static int
+skb_read_pdu_bhs(struct cxgbi_sock *csk, struct iscsi_conn *conn,
+		 struct sk_buff *skb)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int err;
 
 	log_debug(1 << CXGBI_DBG_PDU_RX,
 		"conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
@@ -1608,7 +1614,16 @@ static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
 		}
 	}
 
-	return read_pdu_skb(conn, skb, 0, 0);
+	err = read_pdu_skb(conn, skb, 0, 0);
+	if (likely(err >= 0)) {
+		struct iscsi_hdr *hdr = (struct iscsi_hdr *)skb->data;
+		u8 opcode = hdr->opcode & ISCSI_OPCODE_MASK;
+
+		if (unlikely(opcode == ISCSI_OP_LOGOUT_RSP))
+			cxgbi_sock_set_flag(csk, CTPF_LOGOUT_RSP_RCVD);
+	}
+
+	return err;
 }
 
 static int skb_read_pdu_data(struct iscsi_conn *conn, struct sk_buff *lskb,
@@ -1713,7 +1728,7 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
 			cxgbi_skcb_rx_pdulen(skb));
 
 		if (cxgbi_skcb_test_flag(skb, SKCBF_RX_COALESCED)) {
-			err = skb_read_pdu_bhs(conn, skb);
+			err = skb_read_pdu_bhs(csk, conn, skb);
 			if (err < 0) {
 				pr_err("coalesced bhs, csk 0x%p, skb 0x%p,%u, "
 					"f 0x%lx, plen %u.\n",
@@ -1731,7 +1746,7 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
 					cxgbi_skcb_flags(skb),
 					cxgbi_skcb_rx_pdulen(skb));
 		} else {
-			err = skb_read_pdu_bhs(conn, skb);
+			err = skb_read_pdu_bhs(csk, conn, skb);
 			if (err < 0) {
 				pr_err("bhs, csk 0x%p, skb 0x%p,%u, "
 					"f 0x%lx, plen %u.\n",
@@ -1873,6 +1888,11 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
 	tcp_task->dd_data = tdata;
 	task->hdr = NULL;
 
+	if (tdata->skb) {
+		kfree_skb(tdata->skb);
+		tdata->skb = NULL;
+	}
+
 	if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
 	    (opcode == ISCSI_OP_SCSI_DATA_OUT ||
 	     (opcode == ISCSI_OP_SCSI_CMD &&
@@ -1890,6 +1910,7 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
 		return -ENOMEM;
 	}
 
+	skb_get(tdata->skb);
 	skb_reserve(tdata->skb, cdev->skb_tx_rsvd);
 	task->hdr = (struct iscsi_hdr *)tdata->skb->data;
 	task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */
@@ -2035,9 +2056,9 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 	unsigned int datalen;
 	int err;
 
-	if (!skb) {
+	if (!skb || cxgbi_skcb_test_flag(skb, SKCBF_TX_DONE)) {
 		log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
-			"task 0x%p, skb NULL.\n", task);
+			"task 0x%p, skb 0x%p\n", task, skb);
 		return 0;
 	}
 
@@ -2050,7 +2071,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 	}
 
 	datalen = skb->data_len;
-	tdata->skb = NULL;
 
 	/* write ppod first if using ofldq to write ppod */
 	if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_VALID) {
@@ -2078,6 +2098,7 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 			pdulen += ISCSI_DIGEST_SIZE;
 
 		task->conn->txdata_octets += pdulen;
+		cxgbi_skcb_set_flag(skb, SKCBF_TX_DONE);
 		return 0;
 	}
 
@@ -2086,7 +2107,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 			"task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n",
 			task, skb, skb->len, skb->data_len, err);
 		/* reset skb to send when we are called again */
-		tdata->skb = skb;
 		return err;
 	}
 
@@ -2094,7 +2114,8 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
 		"itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
 		task->itt, skb, skb->len, skb->data_len, err);
 
-	kfree_skb(skb);
+	__kfree_skb(tdata->skb);
+	tdata->skb = NULL;
 
 	iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
 	iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
@@ -2113,8 +2134,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task)
 
 	tcp_task->dd_data = NULL;
 	/*  never reached the xmit task callout */
-	if (tdata->skb)
-		__kfree_skb(tdata->skb);
+	if (tdata->skb) {
+		kfree_skb(tdata->skb);
+		tdata->skb = NULL;
+	}
 
 	task_release_itt(task, task->hdr_itt);
 	memset(tdata, 0, sizeof(*tdata));
@@ -2714,6 +2737,9 @@ EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible);
 static int __init libcxgbi_init_module(void)
 {
 	pr_info("%s", version);
+
+	BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
+		     sizeof(struct cxgbi_skb_cb));
 	return 0;
 }
 

diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 18e0ea8..37f07aa 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h

@@ -187,6 +187,7 @@ enum cxgbi_sock_flags {
 	CTPF_HAS_ATID,		/* reserved atid */
 	CTPF_HAS_TID,		/* reserved hw tid */
 	CTPF_OFFLOAD_DOWN,	/* offload function off */
+	CTPF_LOGOUT_RSP_RCVD,   /* received logout response */
 };
 
 struct cxgbi_skb_rx_cb {
@@ -195,7 +196,8 @@ struct cxgbi_skb_rx_cb {
 };
 
 struct cxgbi_skb_tx_cb {
-	void *l2t;
+	void *handle;
+	void *arp_err_handler;
 	struct sk_buff *wr_next;
 };
 
@@ -203,6 +205,7 @@ enum cxgbi_skcb_flags {
 	SKCBF_TX_NEED_HDR,	/* packet needs a header */
 	SKCBF_TX_MEM_WRITE,     /* memory write */
 	SKCBF_TX_FLAG_COMPL,    /* wr completion flag */
+	SKCBF_TX_DONE,		/* skb tx done */
 	SKCBF_RX_COALESCED,	/* received whole pdu */
 	SKCBF_RX_HDR,		/* received pdu header */
 	SKCBF_RX_DATA,		/* received pdu payload */
@@ -215,13 +218,13 @@ enum cxgbi_skcb_flags {
 };
 
 struct cxgbi_skb_cb {
-	unsigned char ulp_mode;
-	unsigned long flags;
-	unsigned int seq;
 	union {
 		struct cxgbi_skb_rx_cb rx;
 		struct cxgbi_skb_tx_cb tx;
 	};
+	unsigned char ulp_mode;
+	unsigned long flags;
+	unsigned int seq;
 };
 
 #define CXGBI_SKB_CB(skb)	((struct cxgbi_skb_cb *)&((skb)->cb[0]))
@@ -374,11 +377,9 @@ static inline void cxgbi_sock_enqueue_wr(struct cxgbi_sock *csk,
 	cxgbi_skcb_tx_wr_next(skb) = NULL;
 	/*
 	 * We want to take an extra reference since both us and the driver
-	 * need to free the packet before it's really freed. We know there's
-	 * just one user currently so we use atomic_set rather than skb_get
-	 * to avoid the atomic op.
+	 * need to free the packet before it's really freed.
 	 */
-	atomic_set(&skb->users, 2);
+	skb_get(skb);
 
 	if (!csk->wr_pending_head)
 		csk->wr_pending_head = skb;

diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 3cbab87..2ceff58 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c

@@ -265,18 +265,16 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 				      struct list_head *list,
 				      unsigned char *cdb)
 {
-	struct scsi_device *sdev = ctlr->ms_sdev;
-	struct rdac_dh_data *h = sdev->handler_data;
 	struct rdac_mode_common *common;
 	unsigned data_size;
 	struct rdac_queue_data *qdata;
 	u8 *lun_table;
 
-	if (h->ctlr->use_ms10) {
+	if (ctlr->use_ms10) {
 		struct rdac_pg_expanded *rdac_pg;
 
 		data_size = sizeof(struct rdac_pg_expanded);
-		rdac_pg = &h->ctlr->mode_select.expanded;
+		rdac_pg = &ctlr->mode_select.expanded;
 		memset(rdac_pg, 0, data_size);
 		common = &rdac_pg->common;
 		rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
@@ -288,7 +286,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 		struct rdac_pg_legacy *rdac_pg;
 
 		data_size = sizeof(struct rdac_pg_legacy);
-		rdac_pg = &h->ctlr->mode_select.legacy;
+		rdac_pg = &ctlr->mode_select.legacy;
 		memset(rdac_pg, 0, data_size);
 		common = &rdac_pg->common;
 		rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
@@ -304,7 +302,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
 	}
 
 	/* Prepare the command. */
-	if (h->ctlr->use_ms10) {
+	if (ctlr->use_ms10) {
 		cdb[0] = MODE_SELECT_10;
 		cdb[7] = data_size >> 8;
 		cdb[8] = data_size & 0xff;

diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index bd9e31e..16fc380 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h

@@ -48,7 +48,7 @@
 #include <linux/wait.h>
 typedef wait_queue_head_t adpt_wait_queue_head_t;
 #define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait)
-typedef wait_queue_t adpt_wait_queue_t;
+typedef wait_queue_entry_t adpt_wait_queue_entry_t;
 
 /*
  * message structures

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index d390325..abf6026 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c

@@ -1170,6 +1170,8 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
 		cmd = list_first_entry_or_null(&vscsi->free_cmd,
 					       struct ibmvscsis_cmd, list);
 		if (cmd) {
+			if (cmd->abort_cmd)
+				cmd->abort_cmd = NULL;
 			cmd->flags &= ~(DELAY_SEND);
 			list_del(&cmd->list);
 			cmd->iue = iue;
@@ -1774,6 +1776,7 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi)
 				if (cmd->abort_cmd) {
 					retry = true;
 					cmd->abort_cmd->flags &= ~(DELAY_SEND);
+					cmd->abort_cmd = NULL;
 				}
 
 				/*
@@ -1788,6 +1791,25 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi)
 					list_del(&cmd->list);
 					ibmvscsis_free_cmd_resources(vscsi,
 								     cmd);
+					/*
+					 * With a successfully aborted op
+					 * through LIO we want to increment the
+					 * the vscsi credit so that when we dont
+					 * send a rsp to the original scsi abort
+					 * op (h_send_crq), but the tm rsp to
+					 * the abort is sent, the credit is
+					 * correctly sent with the abort tm rsp.
+					 * We would need 1 for the abort tm rsp
+					 * and 1 credit for the aborted scsi op.
+					 * Thus we need to increment here.
+					 * Also we want to increment the credit
+					 * here because we want to make sure
+					 * cmd is actually released first
+					 * otherwise the client will think it
+					 * it can send a new cmd, and we could
+					 * find ourselves short of cmd elements.
+					 */
+					vscsi->credit += 1;
 				} else {
 					iue = cmd->iue;
 
@@ -2962,10 +2984,7 @@ static long srp_build_response(struct scsi_info *vscsi,
 
 	rsp->opcode = SRP_RSP;
 
-	if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING)
-		rsp->req_lim_delta = cpu_to_be32(vscsi->credit);
-	else
-		rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit);
+	rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit);
 	rsp->tag = cmd->rsp.tag;
 	rsp->flags = 0;
 

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 3419e1b..6762130 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c

@@ -301,13 +301,13 @@ static uint32_t ips_statupd_copperhead_memio(ips_ha_t *);
 static uint32_t ips_statupd_morpheus(ips_ha_t *);
 static ips_scb_t *ips_getscb(ips_ha_t *);
 static void ips_putq_scb_head(ips_scb_queue_t *, ips_scb_t *);
-static void ips_putq_wait_tail(ips_wait_queue_t *, struct scsi_cmnd *);
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *, struct scsi_cmnd *);
 static void ips_putq_copp_tail(ips_copp_queue_t *,
 				      ips_copp_wait_item_t *);
 static ips_scb_t *ips_removeq_scb_head(ips_scb_queue_t *);
 static ips_scb_t *ips_removeq_scb(ips_scb_queue_t *, ips_scb_t *);
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *);
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *,
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *);
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *,
 					  struct scsi_cmnd *);
 static ips_copp_wait_item_t *ips_removeq_copp(ips_copp_queue_t *,
 						     ips_copp_wait_item_t *);
@@ -2871,7 +2871,7 @@ ips_removeq_scb(ips_scb_queue_t * queue, ips_scb_t * item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
+static void ips_putq_wait_tail(ips_wait_queue_entry_t *queue, struct scsi_cmnd *item)
 {
 	METHOD_TRACE("ips_putq_wait_tail", 1);
 
@@ -2902,7 +2902,7 @@ static void ips_putq_wait_tail(ips_wait_queue_t *queue, struct scsi_cmnd *item)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
+static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_entry_t *queue)
 {
 	struct scsi_cmnd *item;
 
@@ -2936,7 +2936,7 @@ static struct scsi_cmnd *ips_removeq_wait_head(ips_wait_queue_t *queue)
 /* ASSUMED to be called from within the HA lock                             */
 /*                                                                          */
 /****************************************************************************/
-static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_t *queue,
+static struct scsi_cmnd *ips_removeq_wait(ips_wait_queue_entry_t *queue,
 					  struct scsi_cmnd *item)
 {
 	struct scsi_cmnd *p;

diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h
index b782bb6..366be3b 100644
--- a/drivers/scsi/ips.h
+++ b/drivers/scsi/ips.h

@@ -989,7 +989,7 @@ typedef struct ips_wait_queue {
 	struct scsi_cmnd *head;
 	struct scsi_cmnd *tail;
 	int count;
-} ips_wait_queue_t;
+} ips_wait_queue_entry_t;
 
 typedef struct ips_copp_wait_item {
 	struct scsi_cmnd *scsi_cmd;
@@ -1035,7 +1035,7 @@ typedef struct ips_ha {
    ips_stat_t         sp;                 /* Status packer pointer      */
    struct ips_scb    *scbs;               /* Array of all CCBS          */
    struct ips_scb    *scb_freelist;       /* SCB free list              */
-   ips_wait_queue_t   scb_waitlist;       /* Pending SCB list           */
+   ips_wait_queue_entry_t   scb_waitlist;       /* Pending SCB list           */
    ips_copp_queue_t   copp_waitlist;      /* Pending PT list            */
    ips_scb_queue_t    scb_activelist;     /* Active SCB list            */
    IPS_IO_CMD        *dummy;              /* dummy command              */

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index b44c313..5203258 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c

@@ -1422,7 +1422,7 @@ static void fc_rport_recv_rtv_req(struct fc_rport_priv *rdata,
 	fp = fc_frame_alloc(lport, sizeof(*rtv));
 	if (!fp) {
 		rjt_data.reason = ELS_RJT_UNAB;
-		rjt_data.reason = ELS_EXPL_INSUF_RES;
+		rjt_data.explan = ELS_EXPL_INSUF_RES;
 		fc_seq_els_rsp_send(in_fp, ELS_LS_RJT, &rjt_data);
 		goto drop;
 	}

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6d7840b..f2c0ba6c 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h

@@ -141,6 +141,13 @@ struct lpfc_dmabuf {
 	uint32_t   buffer_tag;	/* used for tagged queue ring */
 };
 
+struct lpfc_nvmet_ctxbuf {
+	struct list_head list;
+	struct lpfc_nvmet_rcv_ctx *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+};
+
 struct lpfc_dma_pool {
 	struct lpfc_dmabuf   *elements;
 	uint32_t    max_count;
@@ -163,9 +170,7 @@ struct rqb_dmabuf {
 	struct lpfc_dmabuf dbuf;
 	uint16_t total_size;
 	uint16_t bytes_recv;
-	void *context;
-	struct lpfc_iocbq *iocbq;
-	struct lpfc_sglq *sglq;
+	uint16_t idx;
 	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
 	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
@@ -670,6 +675,8 @@ struct lpfc_hba {
 					/* INIT_LINK mailbox command */
 #define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
 #define LS_IGNORE_ERATT       0x4	/* intr handler should ignore ERATT */
+#define LS_MDS_LINK_DOWN      0x8	/* MDS Diagnostics Link Down */
+#define LS_MDS_LOOPBACK      0x16	/* MDS Diagnostics Link Up (Loopback) */
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */
@@ -777,7 +784,6 @@ struct lpfc_hba {
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
 	uint32_t cfg_nvmet_mrq;
-	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_nvmet_fb_size;
@@ -943,6 +949,7 @@ struct lpfc_hba {
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
+	struct pci_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
 	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
@@ -1228,7 +1235,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 static inline struct lpfc_sli_ring *
 lpfc_phba_elsring(struct lpfc_hba *phba)
 {
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		return phba->sli4_hba.els_wq->pring;
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		if (phba->sli4_hba.els_wq)
+			return phba->sli4_hba.els_wq->pring;
+		else
+			return NULL;
+	}
 	return &phba->sli.sli3_ring[LPFC_ELS_RING];
 }

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4830370..bb2d9e2 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c

@@ -60,9 +60,9 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
-#define LPFC_DEF_MRQ_POST	256
-#define LPFC_MIN_MRQ_POST	32
-#define LPFC_MAX_MRQ_POST	512
+#define LPFC_DEF_MRQ_POST	512
+#define LPFC_MIN_MRQ_POST	512
+#define LPFC_MAX_MRQ_POST	2048
 
 /*
  * Write key size should be multiple of 4. If write key is changed
@@ -205,8 +205,9 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP: Rcv %08x Drop %08x\n",
+				"FCP: Rcv %08x Release %08x Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
@@ -218,15 +219,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		}
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x "
+				"drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_read),
 				atomic_read(&tgtp->xmt_fcp_read_rsp),
 				atomic_read(&tgtp->xmt_fcp_write),
-				atomic_read(&tgtp->xmt_fcp_rsp));
-
-		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_rsp),
 				atomic_read(&tgtp->xmt_fcp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
@@ -236,10 +234,22 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
+
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"IO_CTX: %08x outstanding %08x total %x",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 
 		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
 		return len;
@@ -3312,14 +3322,6 @@ LPFC_ATTR_R(nvmet_mrq,
 	    "Specify number of RQ pairs for processing NVMET cmds");
 
 /*
- * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
- *
- */
-LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
-	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
-	    "Specify number of buffers to post on every MRQ");
-
-/*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
@@ -5154,7 +5156,6 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvmet_mrq,
-	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
@@ -6194,7 +6195,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
-	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
@@ -6291,7 +6291,6 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
 		phba->cfg_nvmet_mrq = 0;
-		phba->cfg_nvmet_mrq_post = 0;
 		phba->cfg_nvmet_fb_size = 0;
 	}
 

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 1c55408..da669dc 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h

@@ -75,6 +75,10 @@ void lpfc_init_vpi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_cancel_all_vport_retry_delay_timer(struct lpfc_hba *);
 void lpfc_retry_pport_discovery(struct lpfc_hba *);
 void lpfc_release_rpi(struct lpfc_hba *, struct lpfc_vport *, uint16_t);
+int lpfc_init_iocb_list(struct lpfc_hba *phba, int cnt);
+void lpfc_free_iocb_list(struct lpfc_hba *phba);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+			struct lpfc_queue *drq, int count, int idx);
 
 void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
@@ -123,7 +127,7 @@ int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *,
 void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *);
 int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *,
 		     struct serv_parm *, uint32_t, int);
-int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
+void lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
 void lpfc_more_plogi(struct lpfc_vport *);
 void lpfc_more_adisc(struct lpfc_vport *);
 void lpfc_end_rscn(struct lpfc_vport *);
@@ -246,16 +250,14 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
 void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
-void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-			struct lpfc_dmabuf *mp);
+void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
+			    struct lpfc_nvmet_ctxbuf *ctxp);
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
-int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
-			struct lpfc_queue *dq, int count);
 int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
@@ -271,6 +273,7 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *, uint16_t);
 void lpfc_sli4_clear_fcf_rr_bmask(struct lpfc_hba *);
 
 int lpfc_mem_alloc(struct lpfc_hba *, int align);
+int lpfc_nvmet_mem_alloc(struct lpfc_hba *phba);
 int lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *);
 void lpfc_mem_free(struct lpfc_hba *);
 void lpfc_mem_free_all(struct lpfc_hba *);

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c7962da..24ce96d 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c

@@ -978,9 +978,10 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 					 ndlp, did, ndlp->nlp_fc4_type,
 					 FC_TYPE_FCP, FC_TYPE_NVME);
 			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+			lpfc_issue_els_prli(vport, ndlp, 0);
 		}
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
-		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
 				 "3065 GFT_ID failed x%08x\n", irsp->ulpStatus);
@@ -2092,6 +2093,7 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 8 - FCP */
+	ae->un.AttrTypes[6] = 0x01; /* Type 40 - NVME */
 	ae->un.AttrTypes[7] = 0x01; /* Type 32 - CT */
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index fce549a..4bcb92c 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c

@@ -798,21 +798,22 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_fcp_rsp));
 
 		len += snprintf(buf + len, size - len,
-				"FCP Rsp: abort %08x drop %08x\n",
-				atomic_read(&tgtp->xmt_fcp_abort),
-				atomic_read(&tgtp->xmt_fcp_drop));
-
-		len += snprintf(buf + len, size - len,
 				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
 				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
 				atomic_read(&tgtp->xmt_fcp_rsp_error),
 				atomic_read(&tgtp->xmt_fcp_rsp_drop));
 
 		len += snprintf(buf + len, size - len,
-				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				"ABORT: Xmt %08x Cmpl %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_abort_cmpl));
+
+		len += snprintf(buf + len, size - len,
+				"ABORT: Sol %08x  Usol %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_sol),
+				atomic_read(&tgtp->xmt_abort_unsol),
 				atomic_read(&tgtp->xmt_abort_rsp),
-				atomic_read(&tgtp->xmt_abort_rsp_error),
-				atomic_read(&tgtp->xmt_abort_cmpl));
+				atomic_read(&tgtp->xmt_abort_rsp_error));
 
 		len +=  snprintf(buf + len, size - len, "\n");
 
@@ -841,6 +842,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			}
 			spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		}
+
+		len += snprintf(buf + len, size - len,
+				"IO_CTX: %08x  outstanding %08x total %08x\n",
+				phba->sli4_hba.nvmet_ctx_cnt,
+				phba->sli4_hba.nvmet_io_wait_cnt,
+				phba->sli4_hba.nvmet_io_wait_total);
 	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
@@ -1959,6 +1966,7 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -1967,19 +1975,22 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
 
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return nbytes;
 }
@@ -3070,11 +3081,11 @@ __lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
 			qp->assoc_qid, qp->q_cnt_1,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len,
 			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 	return len;
@@ -3121,11 +3132,11 @@ __lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"\tCQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count,
 			qp->entry_size, qp->host_index,
-			qp->hba_index);
+			qp->hba_index, qp->entry_repost);
 
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
@@ -3143,20 +3154,20 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 			"\t\t%s RQ info: ", rqtype);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
-			"trunc:x%x rcv:x%llx]\n",
+			"posted:x%x rcv:x%llx]\n",
 			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
 			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]\n",
 			datqp->queue_id, datqp->entry_count,
 			datqp->entry_size, datqp->host_index,
-			datqp->hba_index);
+			datqp->hba_index, datqp->entry_repost);
 	return len;
 }
 
@@ -3242,10 +3253,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
 			(unsigned long long)qp->q_cnt_4);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index);
+			qp->host_index, qp->hba_index, qp->entry_repost);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
@@ -5855,8 +5866,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		debugfs_remove(lpfc_debugfs_root); /* lpfc */
-		lpfc_debugfs_root = NULL;
+		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
+			debugfs_remove(lpfc_debugfs_root); /* lpfc */
+			lpfc_debugfs_root = NULL;
+		}
 	}
 #endif
 	return;

diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 9d5a379..094c97b 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h

@@ -90,6 +90,7 @@ struct lpfc_nodelist {
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
 #define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
 #define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+#define NLP_NVME_DISCOVERY 0x80                 /* entry has NVME disc srvc */
 
 	uint16_t	nlp_fc4_type;		/* FC types node supports. */
 						/* Assigned from GID_FF, only

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 67827e3..8e532b3 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c

@@ -1047,6 +1047,13 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 irsp->ulpStatus, irsp->un.ulpWord[4],
 				 irsp->ulpTimeout);
 
+
+		/* If this is not a loop open failure, bail out */
+		if (!(irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+		      ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) ==
+					IOERR_LOOP_OPEN_FAILURE)))
+			goto flogifail;
+
 		/* FLOGI failed, so there is no fabric */
 		spin_lock_irq(shost->host_lock);
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
@@ -2077,16 +2084,19 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	if (irsp->ulpStatus) {
 		/* Check for retry */
+		ndlp->fc4_prli_sent--;
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
-			ndlp->fc4_prli_sent--;
 			goto out;
 		}
+
 		/* PRLI failed */
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-				 "2754 PRLI failure DID:%06X Status:x%x/x%x\n",
+				 "2754 PRLI failure DID:%06X Status:x%x/x%x, "
+				 "data: x%x\n",
 				 ndlp->nlp_DID, irsp->ulpStatus,
-				 irsp->un.ulpWord[4]);
+				 irsp->un.ulpWord[4], ndlp->fc4_prli_sent);
+
 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
 		if (lpfc_error_lost_link(irsp))
 			goto out;
@@ -7441,6 +7451,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 */
 	spin_lock_irq(&phba->hbalock);
 	pring = lpfc_phba_elsring(phba);
+
+	/* Bail out if we've no ELS wq, like in PCI error recovery case. */
+	if (unlikely(!pring)) {
+		spin_unlock_irq(&phba->hbalock);
+		return;
+	}
+
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
@@ -8667,7 +8684,8 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		lpfc_do_scr_ns_plogi(phba, vport);
 	goto out;
 fdisc_failed:
-	if (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS)
+	if (vport->fc_vport &&
+	    (vport->fc_vport->vport_state != FC_VPORT_NO_FABRIC_RSCS))
 		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
 	/* Cancel discovery timer */
 	lpfc_can_disctmo(vport);

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0482c558..3ffcd92 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c

@@ -693,15 +693,16 @@ lpfc_work_done(struct lpfc_hba *phba)
 	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
-	if ((status & HA_RXMASK) ||
-	    (pring->flag & LPFC_DEFERRED_RING_EVENT) ||
-	    (phba->hba_flag & HBA_SP_QUEUE_EVT)) {
+	if (pring && (status & HA_RXMASK ||
+		      pring->flag & LPFC_DEFERRED_RING_EVENT ||
+		      phba->hba_flag & HBA_SP_QUEUE_EVT)) {
 		if (pring->flag & LPFC_STOP_IOCB_EVENT) {
 			pring->flag |= LPFC_DEFERRED_RING_EVENT;
 			/* Set the lpfc data pending flag */
 			set_bit(LPFC_DATA_READY, &phba->data_flags);
 		} else {
-			if (phba->link_state >= LPFC_LINK_UP) {
+			if (phba->link_state >= LPFC_LINK_UP ||
+			    phba->link_flag & LS_MDS_LOOPBACK) {
 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
 				lpfc_sli_handle_slow_ring_event(phba, pring,
 								(status &

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 1d12f2b..e0a5fce 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h

@@ -1356,6 +1356,7 @@ struct lpfc_mbx_wq_destroy {
 
 #define LPFC_HDR_BUF_SIZE 128
 #define LPFC_DATA_BUF_SIZE 2048
+#define LPFC_NVMET_DATA_BUF_SIZE 128
 struct rq_context {
 	uint32_t word0;
 #define lpfc_rq_context_rqe_count_SHIFT	16	/* Version 0 Only */
@@ -4420,6 +4421,19 @@ struct fcp_treceive64_wqe {
 };
 #define TXRDY_PAYLOAD_LEN      12
 
+#define CMD_SEND_FRAME	0xE1
+
+struct send_frame_wqe {
+	struct ulp_bde64 bde;          /* words 0-2 */
+	uint32_t frame_len;            /* word 3 */
+	uint32_t fc_hdr_wd0;           /* word 4 */
+	uint32_t fc_hdr_wd1;           /* word 5 */
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fc_hdr_wd2;           /* word 12 */
+	uint32_t fc_hdr_wd3;           /* word 13 */
+	uint32_t fc_hdr_wd4;           /* word 14 */
+	uint32_t fc_hdr_wd5;           /* word 15 */
+};
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4438,7 +4452,7 @@ union lpfc_wqe {
 	struct fcp_trsp64_wqe fcp_trsp;
 	struct fcp_tsend64_wqe fcp_tsend;
 	struct fcp_treceive64_wqe fcp_treceive;
-
+	struct send_frame_wqe send_frame;
 };
 
 union lpfc_wqe128 {

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4b1eb98..9add947 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c

@@ -1099,7 +1099,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 
 		list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
 			ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
-			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 		}
 	}
 
@@ -3381,7 +3381,7 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
 	uint16_t i, lxri, xri_cnt, els_xri_cnt;
-	uint16_t nvmet_xri_cnt, tot_cnt;
+	uint16_t nvmet_xri_cnt;
 	LIST_HEAD(nvmet_sgl_list);
 	int rc;
 
@@ -3389,15 +3389,9 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
-	if (nvmet_xri_cnt > tot_cnt) {
-		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
-		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
-		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-				"6301 NVMET post-sgl count changed to %d\n",
-				phba->cfg_nvmet_mrq_post);
-	}
+
+	/* For NVMET, ALL remaining XRIs are dedicated for IO processing */
+	nvmet_xri_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -4546,6 +4540,19 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	pmb->vport = phba->pport;
 
 	if (phba->sli4_hba.link_state.status != LPFC_FC_LA_TYPE_LINK_UP) {
+		phba->link_flag &= ~(LS_MDS_LINK_DOWN | LS_MDS_LOOPBACK);
+
+		switch (phba->sli4_hba.link_state.status) {
+		case LPFC_FC_LA_TYPE_MDS_LINK_DOWN:
+			phba->link_flag |= LS_MDS_LINK_DOWN;
+			break;
+		case LPFC_FC_LA_TYPE_MDS_LOOPBACK:
+			phba->link_flag |= LS_MDS_LOOPBACK;
+			break;
+		default:
+			break;
+		}
+
 		/* Parse and translate status field */
 		mb = &pmb->u.mb;
 		mb->mbxStatus = lpfc_sli4_parse_latt_fault(phba,
@@ -5830,6 +5837,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_ctx_list);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
+
 		/* Fast-path XRI aborted CQ Event work queue list */
 		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
@@ -5837,6 +5847,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5951,16 +5962,21 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+				if (lpfc_nvmet_mem_alloc(phba))
+					break;
+
+				phba->nvmet_support = 1; /* a match */
+
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
-				phba->nvmet_support = 1; /* a match */
 #else
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6021 Can't enable NVME Target."
 						" NVME_TARGET_FC infrastructure"
 						" is not in kernel\n");
 #endif
+				break;
 			}
 		}
 	}
@@ -6269,7 +6285,7 @@ lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
  *
  * This routine is invoked to free the driver's IOCB list and memory.
  **/
-static void
+void
 lpfc_free_iocb_list(struct lpfc_hba *phba)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
@@ -6297,7 +6313,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba)
  *	0 - successful
  *	other values - error
  **/
-static int
+int
 lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count)
 {
 	struct lpfc_iocbq *iocbq_entry = NULL;
@@ -6525,7 +6541,6 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	uint16_t rpi_limit, curr_rpi_range;
 	struct lpfc_dmabuf *dmabuf;
 	struct lpfc_rpi_hdr *rpi_hdr;
-	uint32_t rpi_count;
 
 	/*
 	 * If the SLI4 port supports extents, posting the rpi header isn't
@@ -6538,8 +6553,7 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 		return NULL;
 
 	/* The limit on the logical index is just the max_rpi count. */
-	rpi_limit = phba->sli4_hba.max_cfg_param.rpi_base +
-	phba->sli4_hba.max_cfg_param.max_rpi - 1;
+	rpi_limit = phba->sli4_hba.max_cfg_param.max_rpi;
 
 	spin_lock_irq(&phba->hbalock);
 	/*
@@ -6550,18 +6564,10 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 	curr_rpi_range = phba->sli4_hba.next_rpi;
 	spin_unlock_irq(&phba->hbalock);
 
-	/*
-	 * The port has a limited number of rpis. The increment here
-	 * is LPFC_RPI_HDR_COUNT - 1 to account for the starting value
-	 * and to allow the full max_rpi range per port.
-	 */
-	if ((curr_rpi_range + (LPFC_RPI_HDR_COUNT - 1)) > rpi_limit)
-		rpi_count = rpi_limit - curr_rpi_range;
-	else
-		rpi_count = LPFC_RPI_HDR_COUNT;
-
-	if (!rpi_count)
+	/* Reached full RPI range */
+	if (curr_rpi_range == rpi_limit)
 		return NULL;
+
 	/*
 	 * First allocate the protocol header region for the port.  The
 	 * port expects a 4KB DMA-mapped memory region that is 4K aligned.
@@ -6595,13 +6601,9 @@ lpfc_sli4_create_rpi_hdr(struct lpfc_hba *phba)
 
 	/* The rpi_hdr stores the logical index only. */
 	rpi_hdr->start_rpi = curr_rpi_range;
+	rpi_hdr->next_rpi = phba->sli4_hba.next_rpi + LPFC_RPI_HDR_COUNT;
 	list_add_tail(&rpi_hdr->list, &phba->sli4_hba.lpfc_rpi_hdr_list);
 
-	/*
-	 * The next_rpi stores the next logical module-64 rpi value used
-	 * to post physical rpis in subsequent rpi postings.
-	 */
-	phba->sli4_hba.next_rpi += rpi_count;
 	spin_unlock_irq(&phba->hbalock);
 	return rpi_hdr;
 
@@ -8172,7 +8174,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for header */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3146 Failed allocate "
@@ -8194,7 +8196,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			/* Create NVMET Receive Queue for data */
 			qdesc = lpfc_sli4_queue_alloc(phba,
 						      phba->sli4_hba.rq_esize,
-						      phba->sli4_hba.rq_ecount);
+						      LPFC_NVMET_RQE_DEF_COUNT);
 			if (!qdesc) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"3156 Failed allocate "
@@ -8326,46 +8328,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 }
 
 int
-lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
-		    struct lpfc_queue *drq, int count)
-{
-	int rc, i;
-	struct lpfc_rqe hrqe;
-	struct lpfc_rqe drqe;
-	struct lpfc_rqb *rqbp;
-	struct rqb_dmabuf *rqb_buffer;
-	LIST_HEAD(rqb_buf_list);
-
-	rqbp = hrq->rqbp;
-	for (i = 0; i < count; i++) {
-		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
-		if (!rqb_buffer)
-			break;
-		rqb_buffer->hrq = hrq;
-		rqb_buffer->drq = drq;
-		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
-	}
-	while (!list_empty(&rqb_buf_list)) {
-		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
-				 hbuf.list);
-
-		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
-		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
-		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
-		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
-		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
-		if (rc < 0) {
-			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
-		} else {
-			list_add_tail(&rqb_buffer->hbuf.list,
-				      &rqbp->rqb_buffer_list);
-			rqbp->buffer_count++;
-		}
-	}
-	return 1;
-}
-
-int
 lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
 {
 	struct lpfc_rqb *rqbp;
@@ -8784,9 +8746,6 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		goto out_destroy;
 	}
 
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
-	lpfc_rq_adjust_repost(phba, phba->sli4_hba.dat_rq, LPFC_ELS_HBQ);
-
 	rc = lpfc_rq_create(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
 			    phba->sli4_hba.els_cq, LPFC_USOL);
 	if (rc) {
@@ -11110,7 +11069,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct lpfc_hba   *phba;
 	struct lpfc_vport *vport = NULL;
 	struct Scsi_Host  *shost = NULL;
-	int error, cnt;
+	int error;
 	uint32_t cfg_mode, intr_mode;
 
 	/* Allocate memory for HBA structure */
@@ -11144,22 +11103,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_unset_pci_mem_s4;
 	}
 
-	cnt = phba->cfg_iocb_cnt * 1024;
-	if (phba->nvmet_support)
-		cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq;
-
-	/* Initialize and populate the iocb list per host */
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2821 initialize iocb list %d total %d\n",
-			phba->cfg_iocb_cnt, cnt);
-	error = lpfc_init_iocb_list(phba, cnt);
-
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1413 Failed to initialize iocb list.\n");
-		goto out_unset_driver_resource_s4;
-	}
-
 	INIT_LIST_HEAD(&phba->active_rrq_list);
 	INIT_LIST_HEAD(&phba->fcf.fcf_pri_list);
 
@@ -11168,7 +11111,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"1414 Failed to set up driver resource.\n");
-		goto out_free_iocb_list;
+		goto out_unset_driver_resource_s4;
 	}
 
 	/* Get the default values for Model Name and Description */
@@ -11268,8 +11211,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	lpfc_destroy_shost(phba);
 out_unset_driver_resource:
 	lpfc_unset_driver_resource_phase2(phba);
-out_free_iocb_list:
-	lpfc_free_iocb_list(phba);
 out_unset_driver_resource_s4:
 	lpfc_sli4_driver_resource_unset(phba);
 out_unset_pci_mem_s4:

diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 5986c79..fcc05a1 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c

@@ -214,6 +214,21 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 	return -ENOMEM;
 }
 
+int
+lpfc_nvmet_mem_alloc(struct lpfc_hba *phba)
+{
+	phba->lpfc_nvmet_drb_pool =
+		pci_pool_create("lpfc_nvmet_drb_pool",
+				phba->pcidev, LPFC_NVMET_DATA_BUF_SIZE,
+				SGL_ALIGN_SZ, 0);
+	if (!phba->lpfc_nvmet_drb_pool) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6024 Can't enable NVME Target - no memory\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * lpfc_mem_free - Frees memory allocated by lpfc_mem_alloc
  * @phba: HBA to free memory for
@@ -232,6 +247,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 
 	/* Free HBQ pools */
 	lpfc_sli_hbqbuf_free_all(phba);
+	if (phba->lpfc_nvmet_drb_pool)
+		pci_pool_destroy(phba->lpfc_nvmet_drb_pool);
+	phba->lpfc_nvmet_drb_pool = NULL;
 	if (phba->lpfc_drb_pool)
 		pci_pool_destroy(phba->lpfc_drb_pool);
 	phba->lpfc_drb_pool = NULL;
@@ -611,8 +629,6 @@ struct rqb_dmabuf *
 lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 {
 	struct rqb_dmabuf *dma_buf;
-	struct lpfc_iocbq *nvmewqe;
-	union lpfc_wqe128 *wqe;
 
 	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
 	if (!dma_buf)
@@ -624,69 +640,15 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
-					    &dma_buf->dbuf.phys);
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_nvmet_drb_pool,
+					    GFP_KERNEL, &dma_buf->dbuf.phys);
 	if (!dma_buf->dbuf.virt) {
 		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
 			      dma_buf->hbuf.phys);
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
-
-	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
-				   GFP_KERNEL);
-	if (!dma_buf->context) {
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		return NULL;
-	}
-
-	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	if (!dma_buf->iocbq) {
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"2621 Ran out of nvmet iocb/WQEs\n");
-		return NULL;
-	}
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
-	nvmewqe = dma_buf->iocbq;
-	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
-	/* Initialize WQE */
-	memset(wqe, 0, sizeof(union lpfc_wqe));
-	/* Word 7 */
-	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
-	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
-	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
-	/* Word 10 */
-	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
-	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
-	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
-
-	dma_buf->iocbq->context1 = NULL;
-	spin_lock(&phba->sli4_hba.sgl_list_lock);
-	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
-	spin_unlock(&phba->sli4_hba.sgl_list_lock);
-	if (!dma_buf->sglq) {
-		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
-		kfree(dma_buf->context);
-		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
-			      dma_buf->dbuf.phys);
-		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
-			      dma_buf->hbuf.phys);
-		kfree(dma_buf);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
-				"6132 Ran out of nvmet XRIs\n");
-		return NULL;
-	}
+	dma_buf->total_size = LPFC_NVMET_DATA_BUF_SIZE;
 	return dma_buf;
 }
 
@@ -705,20 +667,9 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 void
 lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
 {
-	unsigned long flags;
-
-	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
-	dmab->sglq->state = SGL_FREED;
-	dmab->sglq->ndlp = NULL;
-
-	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
-	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
-	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
-
-	lpfc_sli_release_iocbq(phba, dmab->iocbq);
-	kfree(dmab->context);
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
-	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	pci_pool_free(phba->lpfc_nvmet_drb_pool,
+		      dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
 }
 
@@ -803,6 +754,11 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
 	if (rc < 0) {
 		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6409 Cannot post to RQ %d: %x %x\n",
+				rqb_entry->hrq->queue_id,
+				rqb_entry->hrq->host_index,
+				rqb_entry->hrq->hba_index);
 	} else {
 		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
 		rqbp->buffer_count++;

diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8777c2d..f74cb01 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c

@@ -206,7 +206,7 @@ lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
  * associated with a LPFC_NODELIST entry. This
  * routine effectively results in a "software abort".
  */
-int
+void
 lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(abort_list);
@@ -215,6 +215,10 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 
 	pring = lpfc_phba_elsring(phba);
 
+	/* In case of error recovery path, we might have a NULL pring here */
+	if (!pring)
+		return;
+
 	/* Abort outstanding I/O on NPort <nlp_DID> */
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_DISCOVERY,
 			 "2819 Abort outstanding I/O on NPort x%x "
@@ -273,7 +277,6 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 			      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
 
 	lpfc_cancel_retry_delay_tmo(phba->pport, ndlp);
-	return 0;
 }
 
 static int
@@ -1944,7 +1947,13 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
 		/* Target driver cannot solicit NVME FB. */
 		if (bf_get_be32(prli_tgt, nvpr)) {
+			/* Complete the nvme target roles.  The transport
+			 * needs to know if the rport is capable of
+			 * discovery in addition to its role.
+			 */
 			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if (bf_get_be32(prli_disc, nvpr))
+				ndlp->nlp_type |= NLP_NVME_DISCOVERY;
 			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
 			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
 			    (phba->cfg_nvme_enable_fb) &&

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 0488580..518b15e 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c

@@ -142,7 +142,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 }
 
 /**
- * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * lpfc_nvmet_ctxbuf_post - Repost a NVMET RQ DMA buffer and clean up context
  * @phba: HBA buffer is associated with
  * @ctxp: context to clean up
  * @mp: Buffer to free
@@ -155,24 +155,113 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
  * Returns: None
  **/
 void
-lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
-		   struct lpfc_dmabuf *mp)
+lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 {
-	if (ctxp) {
-		if (ctxp->flag)
-			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6314 rq_post ctx xri x%x flag x%x\n",
-				ctxp->oxid, ctxp->flag);
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct rqb_dmabuf *nvmebuf;
+	struct lpfc_dmabuf *hbufp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+	unsigned long iflag;
 
-		if (ctxp->txrdy) {
-			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
-				      ctxp->txrdy_phys);
-			ctxp->txrdy = NULL;
-			ctxp->txrdy_phys = 0;
-		}
-		ctxp->state = LPFC_NVMET_STE_FREE;
+	if (ctxp->txrdy) {
+		pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+			      ctxp->txrdy_phys);
+		ctxp->txrdy = NULL;
+		ctxp->txrdy_phys = 0;
 	}
-	lpfc_rq_buf_free(phba, mp);
+	ctxp->state = LPFC_NVMET_STE_FREE;
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+	if (phba->sli4_hba.nvmet_io_wait_cnt) {
+		hbufp = &nvmebuf->hbuf;
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_io_wait_list,
+				 nvmebuf, struct rqb_dmabuf,
+				 hbuf.list);
+		phba->sli4_hba.nvmet_io_wait_cnt--;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+		oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		payload = (uint32_t *)(nvmebuf->dbuf.virt);
+		size = nvmebuf->bytes_recv;
+		sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+		ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+		memset(ctxp, 0, sizeof(ctxp->ctx));
+		ctxp->wqeq = NULL;
+		ctxp->txrdy = NULL;
+		ctxp->offset = 0;
+		ctxp->phba = phba;
+		ctxp->size = size;
+		ctxp->oxid = oxid;
+		ctxp->sid = sid;
+		ctxp->state = LPFC_NVMET_STE_RCV;
+		ctxp->entry_cnt = 1;
+		ctxp->flag = 0;
+		ctxp->ctxbuf = ctx_buf;
+		spin_lock_init(&ctxp->ctxlock);
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_cmd_nvme = ktime_get_ns();
+			ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme;
+			ctxp->ts_nvme_data = 0;
+			ctxp->ts_data_wqput = 0;
+			ctxp->ts_isr_data = 0;
+			ctxp->ts_data_nvme = 0;
+			ctxp->ts_nvme_status = 0;
+			ctxp->ts_status_wqput = 0;
+			ctxp->ts_isr_status = 0;
+			ctxp->ts_status_nvme = 0;
+		}
+#endif
+		atomic_inc(&tgtp->rcv_fcp_cmd_in);
+		/*
+		 * The calling sequence should be:
+		 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
+		 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+		 * When we return from nvmet_fc_rcv_fcp_req, all relevant info
+		 * the NVME command / FC header is stored.
+		 * A buffer has already been reposted for this IO, so just free
+		 * the nvmebuf.
+		 */
+		rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+					  payload, size);
+
+		/* Process FCP command */
+		if (rc == 0) {
+			atomic_inc(&tgtp->rcv_fcp_cmd_out);
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+			return;
+		}
+
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+				ctxp->oxid, rc,
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_out),
+				atomic_read(&tgtp->xmt_fcp_release));
+
+		lpfc_nvmet_defer_release(phba, ctxp);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		return;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	list_add_tail(&ctx_buf->list,
+		      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+	phba->sli4_hba.nvmet_ctx_cnt++;
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+#endif
 }
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -502,6 +591,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 				"6150 LS Drop IO x%x: Prep\n",
 				ctxp->oxid);
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		atomic_inc(&nvmep->xmt_ls_abort);
 		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
 						ctxp->sid, ctxp->oxid);
 		return -ENOMEM;
@@ -545,6 +635,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	lpfc_nlp_put(nvmewqeq->context1);
 
 	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	atomic_inc(&nvmep->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
 	return -ENXIO;
 }
@@ -612,9 +703,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
 			 ctxp->oxid, rsp->op, rsp->rsplen);
 
+	ctxp->flag |= LPFC_NVMET_IO_INP;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
-		ctxp->flag |= LPFC_NVMET_IO_INP;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 		if (!phba->ktime_on)
 			return 0;
@@ -692,6 +783,7 @@ static void
 lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			   struct nvmefc_tgt_fcp_req *rsp)
 {
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
@@ -707,13 +799,15 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	}
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 
-	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid,
-			 ctxp->state, 0);
+	lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d abt %d\n", ctxp->oxid,
+			 ctxp->state, aborting);
+
+	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
 
 	if (aborting)
 		return;
 
-	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 }
 
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
@@ -734,17 +828,128 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
 };
 
+void
+lpfc_nvmet_cleanup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf, *next_ctx_buf;
+	unsigned long flags;
+
+	list_for_each_entry_safe(
+		ctx_buf, next_ctx_buf,
+		&phba->sli4_hba.lpfc_nvmet_ctx_list, list) {
+		spin_lock_irqsave(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		list_del_init(&ctx_buf->list);
+		spin_unlock_irqrestore(
+			&phba->sli4_hba.abts_nvme_buf_list_lock, flags);
+		__lpfc_clear_active_sglq(phba,
+					 ctx_buf->sglq->sli4_lxritag);
+		ctx_buf->sglq->state = SGL_FREED;
+		ctx_buf->sglq->ndlp = NULL;
+
+		spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+		list_add_tail(&ctx_buf->sglq->list,
+			      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock,
+				       flags);
+
+		lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+		kfree(ctx_buf->context);
+	}
+}
+
+int
+lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+	int i;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6403 Allocate NVMET resources for %d XRIs\n",
+			phba->sli4_hba.nvmet_xri_cnt);
+
+	/* For all nvmet xris, allocate resources needed to process a
+	 * received command on a per xri basis.
+	 */
+	for (i = 0; i < phba->sli4_hba.nvmet_xri_cnt; i++) {
+		ctx_buf = kzalloc(sizeof(*ctx_buf), GFP_KERNEL);
+		if (!ctx_buf) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6404 Ran out of memory for NVMET\n");
+			return -ENOMEM;
+		}
+
+		ctx_buf->context = kzalloc(sizeof(*ctx_buf->context),
+					   GFP_KERNEL);
+		if (!ctx_buf->context) {
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6405 Ran out of NVMET "
+					"context memory\n");
+			return -ENOMEM;
+		}
+		ctx_buf->context->ctxbuf = ctx_buf;
+
+		ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
+		if (!ctx_buf->iocbq) {
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6406 Ran out of NVMET iocb/WQEs\n");
+			return -ENOMEM;
+		}
+		ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+		nvmewqe = ctx_buf->iocbq;
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+		/* Initialize WQE */
+		memset(wqe, 0, sizeof(union lpfc_wqe));
+		/* Word 7 */
+		bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+		bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+		ctx_buf->iocbq->context1 = NULL;
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		ctx_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, ctx_buf->iocbq);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		if (!ctx_buf->sglq) {
+			lpfc_sli_release_iocbq(phba, ctx_buf->iocbq);
+			kfree(ctx_buf->context);
+			kfree(ctx_buf);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+					"6407 Ran out of NVMET XRIs\n");
+			return -ENOMEM;
+		}
+		spin_lock(&phba->sli4_hba.nvmet_io_lock);
+		list_add_tail(&ctx_buf->list,
+			      &phba->sli4_hba.lpfc_nvmet_ctx_list);
+		spin_unlock(&phba->sli4_hba.nvmet_io_lock);
+	}
+	phba->sli4_hba.nvmet_ctx_cnt = phba->sli4_hba.nvmet_xri_cnt;
+	return 0;
+}
+
 int
 lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 {
 	struct lpfc_vport  *vport = phba->pport;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvmet_fc_port_info pinfo;
-	int error = 0;
+	int error;
 
 	if (phba->targetport)
 		return 0;
 
+	error = lpfc_nvmet_setup_io_context(phba);
+	if (error)
+		return error;
+
 	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
 	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
 	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
@@ -772,13 +977,16 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 					     &phba->pcidev->dev,
 					     &phba->targetport);
 #else
-	error = -ENOMEM;
+	error = -ENOENT;
 #endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
 				"x%x\n", error);
 		phba->targetport = NULL;
+
+		lpfc_nvmet_cleanup_io_context(phba);
+
 	} else {
 		tgtp = (struct lpfc_nvmet_tgtport *)
 			phba->targetport->private;
@@ -795,6 +1003,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_ls_req_out, 0);
 		atomic_set(&tgtp->rcv_ls_req_drop, 0);
 		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_abort_cmpl, 0);
 		atomic_set(&tgtp->xmt_ls_rsp, 0);
 		atomic_set(&tgtp->xmt_ls_drop, 0);
 		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
@@ -802,18 +1011,21 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
 		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
-		atomic_set(&tgtp->xmt_fcp_abort, 0);
 		atomic_set(&tgtp->xmt_fcp_drop, 0);
 		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
 		atomic_set(&tgtp->xmt_fcp_read, 0);
 		atomic_set(&tgtp->xmt_fcp_write, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_release, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
 		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_abort_cmpl, 0);
+		atomic_set(&tgtp->xmt_abort_unsol, 0);
+		atomic_set(&tgtp->xmt_abort_sol, 0);
 		atomic_set(&tgtp->xmt_abort_rsp, 0);
 		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
-		atomic_set(&tgtp->xmt_abort_cmpl, 0);
 	}
 	return error;
 }
@@ -864,7 +1076,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		/* Check if we already received a free context call
@@ -885,7 +1097,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
 		     ndlp->nlp_state == NLP_STE_MAPPED_NODE)) {
 			lpfc_set_rrq_active(phba, ndlp,
-				ctxp->rqb_buffer->sglq->sli4_lxritag,
+				ctxp->ctxbuf->sglq->sli4_lxritag,
 				rxid, 1);
 			lpfc_sli4_abts_err_handler(phba, ndlp, axri);
 		}
@@ -894,8 +1106,8 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 				"6318 XB aborted %x flg x%x (%x)\n",
 				ctxp->oxid, ctxp->flag, released);
 		if (released)
-			lpfc_nvmet_rq_post(phba, ctxp,
-					   &ctxp->rqb_buffer->hbuf);
+			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
+
 		if (rrq_empty)
 			lpfc_worker_wake_up(phba);
 		return;
@@ -923,7 +1135,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->rqb_buffer->sglq->sli4_xritag != xri)
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
 			continue;
 
 		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
@@ -975,6 +1187,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		init_completion(&tgtp->tport_unreg_done);
 		nvmet_fc_unregister_targetport(phba->targetport);
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+		lpfc_nvmet_cleanup_io_context(phba);
 	}
 	phba->targetport = NULL;
 #endif
@@ -1010,6 +1223,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
@@ -1104,39 +1318,71 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_ctxbuf *ctx_buf;
 	uint32_t *payload;
-	uint32_t size, oxid, sid, rc;
+	uint32_t size, oxid, sid, rc, qno;
+	unsigned long iflag;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint32_t id;
 #endif
 
+	ctx_buf = NULL;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6157 FCP Drop IO\n");
+				"6157 NVMET FCP Drop IO\n");
 		oxid = 0;
 		size = 0;
 		sid = 0;
+		ctxp = NULL;
 		goto dropit;
 	}
 
+	spin_lock_irqsave(&phba->sli4_hba.nvmet_io_lock, iflag);
+	if (phba->sli4_hba.nvmet_ctx_cnt) {
+		list_remove_head(&phba->sli4_hba.lpfc_nvmet_ctx_list,
+				 ctx_buf, struct lpfc_nvmet_ctxbuf, list);
+		phba->sli4_hba.nvmet_ctx_cnt--;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_lock, iflag);
+
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	size = nvmebuf->bytes_recv;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
+			 oxid, size, smp_processor_id());
+
+	if (!ctx_buf) {
+		/* Queue this NVME IO to process later */
+		spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+		list_add_tail(&nvmebuf->hbuf.list,
+			      &phba->sli4_hba.lpfc_nvmet_io_wait_list);
+		phba->sli4_hba.nvmet_io_wait_cnt++;
+		phba->sli4_hba.nvmet_io_wait_total++;
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		/* Post a brand new DMA buffer to RQ */
+		qno = nvmebuf->idx;
+		lpfc_post_rq_buffer(
+			phba, phba->sli4_hba.nvmet_mrq_hdr[qno],
+			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
+		return;
+	}
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	payload = (uint32_t *)(nvmebuf->dbuf.virt);
-	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
-	size = nvmebuf->bytes_recv;
-	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
 	sid = sli4_sid_from_fc_hdr(fc_hdr);
 
-	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
-	if (ctxp == NULL) {
-		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
-		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6158 FCP Drop IO x%x: Alloc\n",
-				oxid);
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-		/* Cannot send ABTS without context */
-		return;
-	}
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
 	memset(ctxp, 0, sizeof(ctxp->ctx));
 	ctxp->wqeq = NULL;
 	ctxp->txrdy = NULL;
@@ -1146,9 +1392,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->oxid = oxid;
 	ctxp->sid = sid;
 	ctxp->state = LPFC_NVMET_STE_RCV;
-	ctxp->rqb_buffer = nvmebuf;
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
+	ctxp->ctxbuf = ctx_buf;
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1164,22 +1410,16 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		ctxp->ts_isr_status = 0;
 		ctxp->ts_status_nvme = 0;
 	}
-
-	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
-		id = smp_processor_id();
-		if (id < LPFC_CHECK_CPU_CNT)
-			phba->cpucheck_rcv_io[id]++;
-	}
 #endif
 
-	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d CPU %02x\n",
-			 oxid, size, smp_processor_id());
-
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
 	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 * When we return from nvmet_fc_rcv_fcp_req, all relevant info in
+	 * the NVME command / FC header is stored, so we are free to repost
+	 * the buffer.
 	 */
 	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
 				  payload, size);
@@ -1187,26 +1427,32 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	/* Process FCP command */
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: err x%x\n",
-			ctxp->oxid, rc);
+			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
+			ctxp->oxid, rc,
+			atomic_read(&tgtp->rcv_fcp_cmd_in),
+			atomic_read(&tgtp->rcv_fcp_cmd_out),
+			atomic_read(&tgtp->xmt_fcp_release));
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
 			 oxid, size, sid);
 	if (oxid) {
+		lpfc_nvmet_defer_release(phba, ctxp);
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		return;
 	}
 
-	if (nvmebuf) {
-		nvmebuf->iocbq->hba_wqidx = 0;
-		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
-	}
+	if (ctx_buf)
+		lpfc_nvmet_ctxbuf_post(phba, ctx_buf);
+
+	if (nvmebuf)
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 #endif
 }
 
@@ -1258,7 +1504,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
 			   uint64_t isr_timestamp)
 {
 	if (phba->nvmet_support == 0) {
-		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
 		return;
 	}
 	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
@@ -1459,7 +1705,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 	nvmewqe = ctxp->wqeq;
 	if (nvmewqe == NULL) {
 		/* Allocate buffer for  command wqe */
-		nvmewqe = ctxp->rqb_buffer->iocbq;
+		nvmewqe = ctxp->ctxbuf->iocbq;
 		if (nvmewqe == NULL) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 					"6110 lpfc_nvmet_prep_fcp_wqe: No "
@@ -1486,7 +1732,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 		return NULL;
 	}
 
-	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	sgl  = (struct sli4_sge *)ctxp->ctxbuf->sglq->sgl;
 	switch (rsp->op) {
 	case NVMET_FCOP_READDATA:
 	case NVMET_FCOP_READDATA_RSP:
@@ -1811,7 +2057,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	ctxp->state = LPFC_NVMET_STE_DONE;
 
@@ -1826,6 +2073,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
 			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
@@ -1834,15 +2082,16 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
 
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+	/* This is the iocbq for the abort, not the command */
 	lpfc_sli_release_iocbq(phba, cmdwqe);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
@@ -1876,7 +2125,8 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+		atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
 
 	if (!ctxp) {
 		/* if context is clear, related io alrady complete */
@@ -1906,6 +2156,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	}
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	spin_unlock_irqrestore(&ctxp->ctxlock, flags);
+	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6316 ABTS cmpl xri x%x flg x%x (%x) "
@@ -1913,15 +2164,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
 			result, wcqe->word3);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
 	/*
 	 * if transport has released ctx, then can reuse it. Otherwise,
 	 * will be recycled by transport release call.
 	 */
 	if (released)
-		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
-
-	cmdwqe->context2 = NULL;
-	cmdwqe->context3 = NULL;
+		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 
 	/* Since iaab/iaar are NOT set, there is no work left.
 	 * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
@@ -1952,7 +2203,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	result = wcqe->parameter;
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	atomic_inc(&tgtp->xmt_abort_cmpl);
+	atomic_inc(&tgtp->xmt_ls_abort_cmpl);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
@@ -1983,10 +2234,6 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
-	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
-		ctxp->wqeq->hba_wqidx = 0;
-	}
 
 	ndlp = lpfc_findnode_did(phba->pport, sid);
 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
@@ -2082,7 +2329,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2103,6 +2350,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Issue ABTS for this WQE based on iotag */
 	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
 	if (!ctxp->abort_wqeq) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6161 ABORT failed: No wqeqs: "
 				"xri: x%x\n", ctxp->oxid);
@@ -2127,6 +2375,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* driver queued commands are in process of being flushed */
 	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6163 Driver in reset cleanup - flushing "
 				"NVME Req now. hba_flag x%x oxid x%x\n",
@@ -2139,6 +2388,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	/* Outstanding abort is in progress */
 	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
 				"6164 Outstanding NVME I/O Abort Request "
 				"still pending on oxid x%x\n",
@@ -2189,9 +2439,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 	abts_wqeq->context2 = ctxp;
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
-	if (rc == WQE_SUCCESS)
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_sol);
 		return 0;
+	}
 
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	lpfc_sli_release_iocbq(phba, abts_wqeq);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -2214,7 +2467,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (!ctxp->wqeq) {
-		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq = ctxp->ctxbuf->iocbq;
 		ctxp->wqeq->hba_wqidx = 0;
 	}
 
@@ -2230,11 +2483,11 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
 		return 0;
 	}
 
 aerr:
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
@@ -2269,6 +2522,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	}
 	abts_wqeq = ctxp->wqeq;
 	wqe_abts = &abts_wqeq->wqe;
+
 	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -2278,7 +2532,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 	if (rc == WQE_SUCCESS) {
-		atomic_inc(&tgtp->xmt_abort_rsp);
+		atomic_inc(&tgtp->xmt_abort_unsol);
 		return 0;
 	}
 

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 128759f..6eb2f5d 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h

@@ -22,6 +22,7 @@
  ********************************************************************/
 
 #define LPFC_NVMET_DEFAULT_SEGS		(64 + 1)	/* 256K IOs */
+#define LPFC_NVMET_RQE_DEF_COUNT	512
 #define LPFC_NVMET_SUCCESS_LEN	12
 
 /* Used for NVME Target */
@@ -34,6 +35,7 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_ls_req_out;
 	atomic_t rcv_ls_req_drop;
 	atomic_t xmt_ls_abort;
+	atomic_t xmt_ls_abort_cmpl;
 
 	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
 	atomic_t xmt_ls_rsp;
@@ -47,9 +49,9 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_fcp_cmd_in;
 	atomic_t rcv_fcp_cmd_out;
 	atomic_t rcv_fcp_cmd_drop;
+	atomic_t xmt_fcp_release;
 
 	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
-	atomic_t xmt_fcp_abort;
 	atomic_t xmt_fcp_drop;
 	atomic_t xmt_fcp_read_rsp;
 	atomic_t xmt_fcp_read;
@@ -62,12 +64,13 @@ struct lpfc_nvmet_tgtport {
 	atomic_t xmt_fcp_rsp_drop;
 
 
-	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_abort_cmpl;
+	atomic_t xmt_abort_sol;
+	atomic_t xmt_abort_unsol;
 	atomic_t xmt_abort_rsp;
 	atomic_t xmt_abort_rsp_error;
-
-	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
-	atomic_t xmt_abort_cmpl;
 };
 
 struct lpfc_nvmet_rcv_ctx {
@@ -103,6 +106,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 	struct rqb_dmabuf *rqb_buffer;
+	struct lpfc_nvmet_ctxbuf *ctxbuf;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t ts_isr_cmd;

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2a4fc00..d6b1848 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c

@@ -74,6 +74,8 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
+static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+					  struct hbq_dmabuf *dmabuf);
 static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
@@ -479,22 +481,23 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	if (unlikely(!hq) || unlikely(!dq))
 		return -ENOMEM;
 	put_index = hq->host_index;
-	temp_hrqe = hq->qe[hq->host_index].rqe;
+	temp_hrqe = hq->qe[put_index].rqe;
 	temp_drqe = dq->qe[dq->host_index].rqe;
 
 	if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ)
 		return -EINVAL;
-	if (hq->host_index != dq->host_index)
+	if (put_index != dq->host_index)
 		return -EINVAL;
 	/* If the host has not yet processed the next entry then we are done */
-	if (((hq->host_index + 1) % hq->entry_count) == hq->hba_index)
+	if (((put_index + 1) % hq->entry_count) == hq->hba_index)
 		return -EBUSY;
 	lpfc_sli_pcimem_bcopy(hrqe, temp_hrqe, hq->entry_size);
 	lpfc_sli_pcimem_bcopy(drqe, temp_drqe, dq->entry_size);
 
 	/* Update the host index to point to the next slot */
-	hq->host_index = ((hq->host_index + 1) % hq->entry_count);
+	hq->host_index = ((put_index + 1) % hq->entry_count);
 	dq->host_index = ((dq->host_index + 1) % dq->entry_count);
+	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
 	if (!(hq->host_index % hq->entry_repost)) {
@@ -5906,7 +5909,7 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
 		bf_set(lpfc_mbx_set_feature_mds,
 		       &mbox->u.mqe.un.set_feature, 1);
 		bf_set(lpfc_mbx_set_feature_mds_deep_loopbk,
-		       &mbox->u.mqe.un.set_feature, 0);
+		       &mbox->u.mqe.un.set_feature, 1);
 		mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS;
 		mbox->u.mqe.un.set_feature.param_len = 8;
 		break;
@@ -6512,6 +6515,50 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 		 (phba->hba_flag & HBA_FCOE_MODE) ? "FCoE" : "FC");
 }
 
+int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count, int idx)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		/* IF RQ is already full, don't bother */
+		if (rqbp->buffer_count + i >= rqbp->entry_count - 1)
+			break;
+		rqb_buffer = rqbp->rqb_alloc_buffer(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		rqb_buffer->idx = idx;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			rqbp->rqb_free_buffer(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
+		}
+	}
+	return 1;
+}
+
 /**
  * lpfc_sli4_hba_setup - SLI4 device initialization PCI function
  * @phba: Pointer to HBA context object.
@@ -6524,7 +6571,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc, i, cnt;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6875,6 +6922,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* We need 1 iocbq for every SGL, for IO processing */
+		cnt += phba->sli4_hba.nvmet_xri_cnt;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2821 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"1413 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
+
 		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
@@ -6894,28 +6956,34 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
+
+		cnt = phba->cfg_iocb_cnt * 1024;
+		/* Initialize and populate the iocb list per host */
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"2820 initialize iocb list %d total %d\n",
+				phba->cfg_iocb_cnt, cnt);
+		rc = lpfc_init_iocb_list(phba, cnt);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6301 Failed to init iocb list.\n");
+			goto out_destroy_queue;
+		}
 	}
 
 	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
-
 		/* Post initial buffers to all RQs created */
 		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
 			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
 			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
 			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
 			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
-			rqbp->entry_count = 256;
+			rqbp->entry_count = LPFC_NVMET_RQE_DEF_COUNT;
 			rqbp->buffer_count = 0;
 
-			/* Divide by 4 and round down to multiple of 16 */
-			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
-			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
-			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
-
 			lpfc_post_rq_buffer(
 				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
 				phba->sli4_hba.nvmet_mrq_data[i],
-				phba->cfg_nvmet_mrq_post);
+				LPFC_NVMET_RQE_DEF_COUNT, i);
 		}
 	}
 
@@ -7082,6 +7150,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
 out_destroy_queue:
+	lpfc_free_iocb_list(phba);
 	lpfc_sli4_queue_destroy(phba);
 out_stop_timers:
 	lpfc_stop_hba_timers(phba);
@@ -8621,8 +8690,11 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		memset(wqe, 0, sizeof(union lpfc_wqe128));
 	/* Some of the fields are in the right position already */
 	memcpy(wqe, &iocbq->iocb, sizeof(union lpfc_wqe));
-	wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */
-	wqe->generic.wqe_com.word10 = 0;
+	if (iocbq->iocb.ulpCommand != CMD_SEND_FRAME) {
+		/* The ct field has moved so reset */
+		wqe->generic.wqe_com.word7 = 0;
+		wqe->generic.wqe_com.word10 = 0;
+	}
 
 	abort_tag = (uint32_t) iocbq->iotag;
 	xritag = iocbq->sli4_xritag;
@@ -9116,6 +9188,10 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		}
 
 		break;
+	case CMD_SEND_FRAME:
+		bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag);
+		bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag);
+		return 0;
 	case CMD_XRI_ABORTED_CX:
 	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
 	case CMD_IOCB_FCP_IBIDIR64_CR: /* bidirectional xfer */
@@ -12788,6 +12864,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct hbq_dmabuf *dma_buf;
 	uint32_t status, rq_id;
 	unsigned long iflags;
@@ -12808,7 +12885,6 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"2537 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12819,6 +12895,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 			goto out;
 		}
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
 
 		/* If a NVME LS event (type 0x28), treat it as Fast path */
@@ -12832,8 +12909,21 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6402 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -12951,7 +13041,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
 			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 			cq->CQ_mbox++;
 		}
 		break;
@@ -12965,7 +13055,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
 								      cqe);
 			if (!(++ecount % cq->entry_repost))
-				lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+				break;
 		}
 
 		/* Track the max number of CQEs processed in 1 EQ */
@@ -13135,6 +13225,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	struct lpfc_queue *drq;
 	struct rqb_dmabuf *dma_buf;
 	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_tgtport *tgtp;
 	uint32_t status, rq_id;
 	unsigned long iflags;
 	uint32_t fctl, idx;
@@ -13165,8 +13256,6 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"6126 Receive Frame Truncated!!\n");
-		hrq->RQ_buf_trunc++;
-		break;
 	case FC_STATUS_RQ_SUCCESS:
 		lpfc_sli4_rq_release(hrq, drq);
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13178,6 +13267,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		}
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		hrq->RQ_rcv_buf++;
+		hrq->RQ_buf_posted--;
 		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
 
 		/* Just some basic sanity checks on FCP Command frame */
@@ -13200,14 +13290,23 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 drop:
 		lpfc_in_buf_free(phba, &dma_buf->dbuf);
 		break;
-	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		if (phba->nvmet_support) {
+			tgtp = phba->targetport->private;
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_NVME,
+					"6401 RQE Error x%x, posted %d err_cnt "
+					"%d: %x %x %x\n",
+					status, hrq->RQ_buf_posted,
+					hrq->RQ_no_posted_buf,
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out),
+					atomic_read(&tgtp->xmt_fcp_release));
+		}
+		/* fallthrough */
+
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
 		hrq->RQ_no_posted_buf++;
 		/* Post more buffers if possible */
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
 		break;
 	}
 out:
@@ -13361,7 +13460,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13452,7 +13551,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
 		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
-			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
+			break;
 	}
 
 	/* Track the max number of CQEs processed in 1 EQ */
@@ -13534,7 +13633,7 @@ lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 	while ((eqe = lpfc_sli4_eq_get(eq))) {
 		lpfc_sli4_fof_handle_eqe(phba, eqe);
 		if (!(++ecount % eq->entry_repost))
-			lpfc_sli4_eq_release(eq, LPFC_QUEUE_NOARM);
+			break;
 		eq->EQ_processed++;
 	}
 
@@ -13651,7 +13750,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
-			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
+			break;
 		fpeq->EQ_processed++;
 	}
 
@@ -13832,17 +13931,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 	}
 	queue->entry_size = entry_size;
 	queue->entry_count = entry_count;
-
-	/*
-	 * entry_repost is calculated based on the number of entries in the
-	 * queue. This works out except for RQs. If buffers are NOT initially
-	 * posted for every RQE, entry_repost should be adjusted accordingly.
-	 */
-	queue->entry_repost = (entry_count >> 3);
-	if (queue->entry_repost < LPFC_QUEUE_MIN_REPOST)
-		queue->entry_repost = LPFC_QUEUE_MIN_REPOST;
 	queue->phba = phba;
 
+	/* entry_repost will be set during q creation */
+
 	return queue;
 out_fail:
 	lpfc_sli4_queue_free(queue);
@@ -14073,6 +14165,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 		status = -ENXIO;
 	eq->host_index = 0;
 	eq->hba_index = 0;
+	eq->entry_repost = LPFC_EQ_REPOST;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -14146,9 +14239,9 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0361 Unsupported CQ count: "
-				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				"entry cnt %d sz %d pg cnt %d\n",
 				cq->entry_count, cq->entry_size,
-				cq->page_count, cq->entry_repost);
+				cq->page_count);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14201,6 +14294,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->assoc_qid = eq->queue_id;
 	cq->host_index = 0;
 	cq->hba_index = 0;
+	cq->entry_repost = LPFC_CQ_REPOST;
 
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
@@ -14392,6 +14486,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->assoc_qid = eq->queue_id;
 		cq->host_index = 0;
 		cq->hba_index = 0;
+		cq->entry_repost = LPFC_CQ_REPOST;
 
 		rc = 0;
 		list_for_each_entry(dmabuf, &cq->page_list, list) {
@@ -14640,6 +14735,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
+	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -14865,34 +14961,6 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 }
 
 /**
- * lpfc_rq_adjust_repost - Adjust entry_repost for an RQ
- * @phba: HBA structure that indicates port to create a queue on.
- * @rq:   The queue structure to use for the receive queue.
- * @qno:  The associated HBQ number
- *
- *
- * For SLI4 we need to adjust the RQ repost value based on
- * the number of buffers that are initially posted to the RQ.
- */
-void
-lpfc_rq_adjust_repost(struct lpfc_hba *phba, struct lpfc_queue *rq, int qno)
-{
-	uint32_t cnt;
-
-	/* sanity check on queue memory */
-	if (!rq)
-		return;
-	cnt = lpfc_hbq_defs[qno]->entry_count;
-
-	/* Recalc repost for RQs based on buffers initially posted */
-	cnt = (cnt >> 3);
-	if (cnt < LPFC_QUEUE_MIN_REPOST)
-		cnt = LPFC_QUEUE_MIN_REPOST;
-
-	rq->entry_repost = cnt;
-}
-
-/**
  * lpfc_rq_create - Create a Receive Queue on the HBA
  * @phba: HBA structure that indicates port to create a queue on.
  * @hrq: The queue structure to use to create the header receive queue.
@@ -15077,6 +15145,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
+	hrq->entry_repost = LPFC_RQ_REPOST;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -15087,7 +15156,12 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
 		bf_set(lpfc_rq_context_rqe_count_1,
 		       &rq_create->u.request.context, hrq->entry_count);
-		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+		if (subtype == LPFC_NVMET)
+			rq_create->u.request.context.buffer_size =
+				LPFC_NVMET_DATA_BUF_SIZE;
+		else
+			rq_create->u.request.context.buffer_size =
+				LPFC_DATA_BUF_SIZE;
 		bf_set(lpfc_rq_context_rqe_size, &rq_create->u.request.context,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size, &rq_create->u.request.context,
@@ -15124,8 +15198,14 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 			       LPFC_RQ_RING_SIZE_4096);
 			break;
 		}
-		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-		       LPFC_DATA_BUF_SIZE);
+		if (subtype == LPFC_NVMET)
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_NVMET_DATA_BUF_SIZE);
+		else
+			bf_set(lpfc_rq_context_buf_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
@@ -15158,6 +15238,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
+	drq->entry_repost = LPFC_RQ_REPOST;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -15270,7 +15351,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			       cq->queue_id);
 			bf_set(lpfc_rq_context_data_size,
 			       &rq_create->u.request.context,
-			       LPFC_DATA_BUF_SIZE);
+			       LPFC_NVMET_DATA_BUF_SIZE);
 			bf_set(lpfc_rq_context_hdr_size,
 			       &rq_create->u.request.context,
 			       LPFC_HDR_BUF_SIZE);
@@ -15315,6 +15396,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
+		hrq->entry_repost = LPFC_RQ_REPOST;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -15323,6 +15405,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
+		drq->entry_repost = LPFC_RQ_REPOST;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16063,6 +16146,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
+#define FC_RCTL_MDS_DIAGS	0xF4
+
 	switch (fc_hdr->fh_r_ctl) {
 	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
 	case FC_RCTL_DD_SOL_DATA:	/* solicited data */
@@ -16090,6 +16175,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_RCTL_F_BSY:	/* fabric busy to data frame */
 	case FC_RCTL_F_BSYL:	/* fabric busy to link control frame */
 	case FC_RCTL_LCR:	/* link credit reset */
+	case FC_RCTL_MDS_DIAGS: /* MDS Diagnostics */
 	case FC_RCTL_END:	/* end */
 		break;
 	case FC_RCTL_VFTH:	/* Virtual Fabric tagging Header */
@@ -16099,12 +16185,16 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	default:
 		goto drop;
 	}
+
+#define FC_TYPE_VENDOR_UNIQUE	0xFF
+
 	switch (fc_hdr->fh_type) {
 	case FC_TYPE_BLS:
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
 	case FC_TYPE_NVME:
+	case FC_TYPE_VENDOR_UNIQUE:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -16115,12 +16205,14 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
+			(fc_hdr->fh_r_ctl == FC_RCTL_MDS_DIAGS) ? "MDS Diags" :
 			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
-			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
-			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
-			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
-			be32_to_cpu(header[6]));
+			(fc_hdr->fh_type == FC_TYPE_VENDOR_UNIQUE) ?
+			"Vendor Unique" : lpfc_type_names[fc_hdr->fh_type],
+			fc_hdr->fh_type, be32_to_cpu(header[0]),
+			be32_to_cpu(header[1]), be32_to_cpu(header[2]),
+			be32_to_cpu(header[3]), be32_to_cpu(header[4]),
+			be32_to_cpu(header[5]), be32_to_cpu(header[6]));
 	return 0;
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
@@ -16926,6 +17018,96 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 	lpfc_sli_release_iocbq(phba, iocbq);
 }
 
+static void
+lpfc_sli4_mds_loopback_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			    struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_dmabuf *pcmd = cmdiocb->context2;
+
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+static void
+lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
+			      struct hbq_dmabuf *dmabuf)
+{
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_iocbq *iocbq = NULL;
+	union  lpfc_wqe *wqe;
+	struct lpfc_dmabuf *pcmd = NULL;
+	uint32_t frame_len;
+	int rc;
+
+	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+	frame_len = bf_get(lpfc_rcqe_length, &dmabuf->cq_event.cqe.rcqe_cmpl);
+
+	/* Send the received frame back */
+	iocbq = lpfc_sli_get_iocbq(phba);
+	if (!iocbq)
+		goto exit;
+
+	/* Allocate buffer for command payload */
+	pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (pcmd)
+		pcmd->virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &pcmd->phys);
+	if (!pcmd || !pcmd->virt)
+		goto exit;
+
+	INIT_LIST_HEAD(&pcmd->list);
+
+	/* copyin the payload */
+	memcpy(pcmd->virt, dmabuf->dbuf.virt, frame_len);
+
+	/* fill in BDE's for command */
+	iocbq->iocb.un.xseq64.bdl.addrHigh = putPaddrHigh(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.addrLow = putPaddrLow(pcmd->phys);
+	iocbq->iocb.un.xseq64.bdl.bdeFlags = BUFF_TYPE_BDE_64;
+	iocbq->iocb.un.xseq64.bdl.bdeSize = frame_len;
+
+	iocbq->context2 = pcmd;
+	iocbq->vport = vport;
+	iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK;
+	iocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
+
+	/*
+	 * Setup rest of the iocb as though it were a WQE
+	 * Build the SEND_FRAME WQE
+	 */
+	wqe = (union lpfc_wqe *)&iocbq->iocb;
+
+	wqe->send_frame.frame_len = frame_len;
+	wqe->send_frame.fc_hdr_wd0 = be32_to_cpu(*((uint32_t *)fc_hdr));
+	wqe->send_frame.fc_hdr_wd1 = be32_to_cpu(*((uint32_t *)fc_hdr + 1));
+	wqe->send_frame.fc_hdr_wd2 = be32_to_cpu(*((uint32_t *)fc_hdr + 2));
+	wqe->send_frame.fc_hdr_wd3 = be32_to_cpu(*((uint32_t *)fc_hdr + 3));
+	wqe->send_frame.fc_hdr_wd4 = be32_to_cpu(*((uint32_t *)fc_hdr + 4));
+	wqe->send_frame.fc_hdr_wd5 = be32_to_cpu(*((uint32_t *)fc_hdr + 5));
+
+	iocbq->iocb.ulpCommand = CMD_SEND_FRAME;
+	iocbq->iocb.ulpLe = 1;
+	iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl;
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0);
+	if (rc == IOCB_ERROR)
+		goto exit;
+
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+	return;
+
+exit:
+	lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+			"2023 Unable to process MDS loopback frame\n");
+	if (pcmd && pcmd->virt)
+		pci_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
+	kfree(pcmd);
+	lpfc_sli_release_iocbq(phba, iocbq);
+	lpfc_in_buf_free(phba, &dmabuf->dbuf);
+}
+
 /**
  * lpfc_sli4_handle_received_buffer - Handle received buffers from firmware
  * @phba: Pointer to HBA context object.
@@ -16964,6 +17146,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
+	if (fc_hdr->fh_r_ctl == 0xF4 && fc_hdr->fh_type == 0xFF) {
+		vport = phba->pport;
+		/* Handle MDS Loopback frames */
+		lpfc_sli4_handle_mds_loopback(vport, dmabuf);
+		return;
+	}
+
 	/* d_id this frame is directed to */
 	did = sli4_did_from_fc_hdr(fc_hdr);
 
@@ -17137,6 +17326,14 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page)
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
+	} else {
+		/*
+		 * The next_rpi stores the next logical module-64 rpi value used
+		 * to post physical rpis in subsequent rpi postings.
+		 */
+		spin_lock_irq(&phba->hbalock);
+		phba->sli4_hba.next_rpi = rpi_page->next_rpi;
+		spin_unlock_irq(&phba->hbalock);
 	}
 	return rc;
 }
@@ -18717,7 +18914,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
-		sglq = ctxp->rqb_buffer->sglq;
+		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
 			pwqe->sli4_lxritag = sglq->sli4_lxritag;
 			pwqe->sli4_xritag = sglq->sli4_xritag;

diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index da46471..cf863db 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h

@@ -24,7 +24,6 @@
 #define LPFC_XRI_EXCH_BUSY_WAIT_TMO		10000
 #define LPFC_XRI_EXCH_BUSY_WAIT_T1   		10
 #define LPFC_XRI_EXCH_BUSY_WAIT_T2              30000
-#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32
 #define LPFC_RPI_LOW_WATER_MARK			10
 
 #define LPFC_UNREG_FCF                          1
@@ -155,7 +154,11 @@ struct lpfc_queue {
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
-#define LPFC_QUEUE_MIN_REPOST	8
+#define LPFC_EQ_REPOST		8
+#define LPFC_MQ_REPOST		8
+#define LPFC_CQ_REPOST		64
+#define LPFC_RQ_REPOST		64
+#define LPFC_RELEASE_NOTIFICATION_INTERVAL	32  /* For WQs */
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t page_count;	/* Number of pages allocated for this queue */
@@ -195,7 +198,7 @@ struct lpfc_queue {
 /* defines for RQ stats */
 #define	RQ_no_posted_buf	q_cnt_1
 #define	RQ_no_buf_found		q_cnt_2
-#define	RQ_buf_trunc		q_cnt_3
+#define	RQ_buf_posted		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
 	uint64_t isr_timestamp;
@@ -617,12 +620,17 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
 	uint16_t nvmet_xri_cnt;
+	uint16_t nvmet_ctx_cnt;
+	uint16_t nvmet_io_wait_cnt;
+	uint16_t nvmet_io_wait_total;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_nvmet_sgl_list;
 	struct list_head lpfc_abts_nvmet_ctx_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
+	struct list_head lpfc_nvmet_ctx_list;
+	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -654,6 +662,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	spinlock_t nvmet_io_lock;
+	spinlock_t nvmet_io_wait_lock; /* IOs waiting for ctx resources */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -661,8 +670,6 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
-
-	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
@@ -698,6 +705,7 @@ struct lpfc_rpi_hdr {
 	struct lpfc_dmabuf *dmabuf;
 	uint32_t page_count;
 	uint32_t start_rpi;
+	uint16_t next_rpi;
 };
 
 struct lpfc_rsrc_blks {
@@ -762,7 +770,6 @@ int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
 			uint32_t subtype);
-void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_mq_destroy(struct lpfc_hba *, struct lpfc_queue *);

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 1c26dc6..c265324 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h

@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.12"
+#define LPFC_DRIVER_VERSION "11.2.0.14"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b948..a4f28b7 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c

@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
 	 *       code paths.
 	 */
 	if (unlikely(rq->bio))
-		blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+		blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
 	else
 		blk_put_request(rq);
 }
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
 EXPORT_SYMBOL(osd_end_request);
 
 static void _set_error_resid(struct osd_request *or, struct request *req,
-			     int error)
+			     blk_status_t error)
 {
 	or->async_error = error;
-	or->req_errors = scsi_req(req)->result ? : error;
+	or->req_errors = scsi_req(req)->result;
 	or->sense_len = scsi_req(req)->sense_len;
 	if (or->sense_len)
 		memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-	int error;
-
 	blk_execute_rq(or->request->q, NULL, or->request, 0);
-	error = scsi_req(or->request)->result ? -EIO : 0;
 
-	_set_error_resid(or, or->request, error);
-	return error;
+	if (scsi_req(or->request)->result) {
+		_set_error_resid(or, or->request, BLK_STS_IOERR);
+		return -EIO;
+	}
+
+	_set_error_resid(or, or->request, BLK_STS_OK);
+	return 0;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
 {
 	struct osd_request *or = req->end_io_data;
 
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 			flags);
 	if (IS_ERR(req))
 		return req;
-	scsi_req_init(req);
 
 	for_each_bio(bio) {
-		struct bio *bounce_bio = bio;
-
-		blk_queue_bounce(req->q, &bounce_bio);
-		ret = blk_rq_append_bio(req, bounce_bio);
+		ret = blk_rq_append_bio(req, bio);
 		if (ret)
 			return ERR_PTR(ret);
 	}
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			scsi_req_init(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
@@ -1914,7 +1911,7 @@ int osd_req_decode_sense_full(struct osd_request *or,
 		/* scsi sense is Empty, the request was never issued to target
 		 * linux return code might tell us what happened.
 		 */
-		if (or->async_error == -ENOMEM)
+		if (or->async_error == BLK_STS_RESOURCE)
 			osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
 		else
 			osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;

diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed9..929ee7e 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c

@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 
 
 /* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
 {
 	struct scsi_request *rq = scsi_req(req);
 	struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 		return DRIVER_ERROR << 24;
 
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	SRpnt->bio = NULL;

diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 5ca3e8c..32632c9 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h

@@ -38,7 +38,7 @@ struct qedi_endpoint;
 #define QEDI_MAX_ISCSI_TASK		4096
 #define QEDI_MAX_TASK_NUM		0x0FFF
 #define QEDI_MAX_ISCSI_CONNS_PER_HBA	1024
-#define QEDI_ISCSI_MAX_BDS_PER_CMD	256	/* Firmware max BDs is 256 */
+#define QEDI_ISCSI_MAX_BDS_PER_CMD	255	/* Firmware max BDs is 255 */
 #define MAX_OUSTANDING_TASKS_PER_CON	1024
 
 #define QEDI_MAX_BD_LEN		0xffff
@@ -63,6 +63,7 @@ struct qedi_endpoint;
 #define QEDI_PAGE_MASK		(~((QEDI_PAGE_SIZE) - 1))
 
 #define QEDI_PAGE_SIZE		4096
+#define QEDI_HW_DMA_BOUNDARY	0xfff
 #define QEDI_PATH_HANDLE	0xFE0000000UL
 
 struct qedi_uio_ctrl {

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index d6978cb..507512c 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c

@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
 			 protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-		WARN_ON(1);
 	}
 }
 
@@ -1494,6 +1493,8 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn,
 	tmf_hdr = (struct iscsi_tm *)mtask->hdr;
 	qedi_cmd = (struct qedi_cmd *)mtask->dd_data;
 	ep = qedi_conn->ep;
+	if (!ep)
+		return -ENODEV;
 
 	tid = qedi_get_task_idx(qedi);
 	if (tid == -1)

diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 3548d46..87f0af3 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c

@@ -59,6 +59,7 @@ struct scsi_host_template qedi_host_template = {
 	.this_id = -1,
 	.sg_tablesize = QEDI_ISCSI_MAX_BDS_PER_CMD,
 	.max_sectors = 0xffff,
+	.dma_boundary = QEDI_HW_DMA_BOUNDARY,
 	.cmd_per_lun = 128,
 	.use_clustering = ENABLE_CLUSTERING,
 	.shost_attrs = qedi_shost_attrs,
@@ -1223,8 +1224,12 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data)
 
 	iscsi_cid = (u32)path_data->handle;
 	qedi_ep = qedi->ep_tbl[iscsi_cid];
-	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN,
+	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
 		  "iscsi_cid=0x%x, qedi_ep=%p\n", iscsi_cid, qedi_ep);
+	if (!qedi_ep) {
+		ret = -EINVAL;
+		goto set_path_exit;
+	}
 
 	if (!is_valid_ether_addr(&path_data->mac_addr[0])) {
 		QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n");

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 92775a8..879d3b7 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c

@@ -151,6 +151,11 @@ static int qedi_uio_close(struct uio_info *uinfo, struct inode *inode)
 
 static void __qedi_free_uio_rings(struct qedi_uio_dev *udev)
 {
+	if (udev->uctrl) {
+		free_page((unsigned long)udev->uctrl);
+		udev->uctrl = NULL;
+	}
+
 	if (udev->ll2_ring) {
 		free_page((unsigned long)udev->ll2_ring);
 		udev->ll2_ring = NULL;
@@ -169,7 +174,6 @@ static void __qedi_free_uio(struct qedi_uio_dev *udev)
 	__qedi_free_uio_rings(udev);
 
 	pci_dev_put(udev->pdev);
-	kfree(udev->uctrl);
 	kfree(udev);
 }
 
@@ -208,6 +212,11 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev)
 	if (udev->ll2_ring || udev->ll2_buf)
 		return rc;
 
+	/* Memory for control area.  */
+	udev->uctrl = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!udev->uctrl)
+		return -ENOMEM;
+
 	/* Allocating memory for LL2 ring  */
 	udev->ll2_ring_size = QEDI_PAGE_SIZE;
 	udev->ll2_ring = (void *)get_zeroed_page(GFP_KERNEL | __GFP_COMP);
@@ -237,7 +246,6 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev)
 static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 {
 	struct qedi_uio_dev *udev = NULL;
-	struct qedi_uio_ctrl *uctrl = NULL;
 	int rc = 0;
 
 	list_for_each_entry(udev, &qedi_udev_list, list) {
@@ -258,21 +266,14 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 		goto err_udev;
 	}
 
-	uctrl = kzalloc(sizeof(*uctrl), GFP_KERNEL);
-	if (!uctrl) {
-		rc = -ENOMEM;
-		goto err_uctrl;
-	}
-
 	udev->uio_dev = -1;
 
 	udev->qedi = qedi;
 	udev->pdev = qedi->pdev;
-	udev->uctrl = uctrl;
 
 	rc = __qedi_alloc_uio_rings(udev);
 	if (rc)
-		goto err_uio_rings;
+		goto err_uctrl;
 
 	list_add(&udev->list, &qedi_udev_list);
 
@@ -283,8 +284,6 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi)
 	udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE;
 	return 0;
 
- err_uio_rings:
-	kfree(uctrl);
  err_uctrl:
 	kfree(udev);
  err_udev:
@@ -828,6 +827,8 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
 	qedi->pf_params.iscsi_pf_params.num_uhq_pages_in_ring = num_sq_pages;
 	qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues;
 	qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug;
+	qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000;
+	qedi->pf_params.iscsi_pf_params.max_fin_rt = 2;
 
 	for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) {
 		if ((1 << log_page_size) == PAGE_SIZE)
@@ -1498,11 +1499,9 @@ int qedi_get_task_idx(struct qedi_ctx *qedi)
 
 void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
 {
-	if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+	if (!test_and_clear_bit(idx, qedi->task_idx_map))
 		QEDI_ERR(&qedi->dbg_ctx,
 			 "FW task context, already cleared, tid=0x%x\n", idx);
-		WARN_ON(1);
-	}
 }
 
 void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,

diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 16d1cd5..ca3420d 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c

@@ -730,6 +730,8 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
 		return -EIO;
 	}
 
+	memset(&elreq, 0, sizeof(elreq));
+
 	elreq.req_sg_cnt = dma_map_sg(&ha->pdev->dev,
 		bsg_job->request_payload.sg_list, bsg_job->request_payload.sg_cnt,
 		DMA_TO_DEVICE);
@@ -795,10 +797,9 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
 
 	if (atomic_read(&vha->loop_state) == LOOP_READY &&
 	    (ha->current_topology == ISP_CFG_F ||
-	    ((IS_QLA81XX(ha) || IS_QLA8031(ha) || IS_QLA8044(ha)) &&
-	    le32_to_cpu(*(uint32_t *)req_data) == ELS_OPCODE_BYTE
-	    && req_data_len == MAX_ELS_FRAME_PAYLOAD)) &&
-		elreq.options == EXTERNAL_LOOPBACK) {
+	    (le32_to_cpu(*(uint32_t *)req_data) == ELS_OPCODE_BYTE &&
+	     req_data_len == MAX_ELS_FRAME_PAYLOAD)) &&
+	    elreq.options == EXTERNAL_LOOPBACK) {
 		type = "FC_BSG_HST_VENDOR_ECHO_DIAG";
 		ql_dbg(ql_dbg_user, vha, 0x701e,
 		    "BSG request type: %s.\n", type);

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 51b4179..88748a6 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c

@@ -1131,7 +1131,7 @@ qla24xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked)
 
 	/* Mailbox registers. */
 	mbx_reg = &reg->mailbox0;
-	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, dmp_reg++)
+	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++)
 		fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg));
 
 	/* Transfer sequence registers. */
@@ -2090,7 +2090,7 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked)
 
 	/* Mailbox registers. */
 	mbx_reg = &reg->mailbox0;
-	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, dmp_reg++)
+	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++, mbx_reg++)
 		fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg));
 
 	/* Transfer sequence registers. */

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ae11901..eddbc12 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h

@@ -3425,6 +3425,7 @@ struct qla_hw_data {
 	uint8_t 	max_req_queues;
 	uint8_t 	max_rsp_queues;
 	uint8_t		max_qpairs;
+	uint8_t		num_qpairs;
 	struct qla_qpair *base_qpair;
 	struct qla_npiv_entry *npiv_info;
 	uint16_t	nvram_npiv_size;

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 0347433..0391fc3 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c

@@ -7543,12 +7543,13 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, int v
 		/* Assign available que pair id */
 		mutex_lock(&ha->mq_lock);
 		qpair_id = find_first_zero_bit(ha->qpair_qid_map, ha->max_qpairs);
-		if (qpair_id >= ha->max_qpairs) {
+		if (ha->num_qpairs >= ha->max_qpairs) {
 			mutex_unlock(&ha->mq_lock);
 			ql_log(ql_log_warn, vha, 0x0183,
 			    "No resources to create additional q pair.\n");
 			goto fail_qid_map;
 		}
+		ha->num_qpairs++;
 		set_bit(qpair_id, ha->qpair_qid_map);
 		ha->queue_pair_map[qpair_id] = qpair;
 		qpair->id = qpair_id;
@@ -7635,6 +7636,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, int v
 fail_msix:
 	ha->queue_pair_map[qpair_id] = NULL;
 	clear_bit(qpair_id, ha->qpair_qid_map);
+	ha->num_qpairs--;
 	mutex_unlock(&ha->mq_lock);
 fail_qid_map:
 	kfree(qpair);
@@ -7660,6 +7662,7 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair)
 	mutex_lock(&ha->mq_lock);
 	ha->queue_pair_map[qpair->id] = NULL;
 	clear_bit(qpair->id, ha->qpair_qid_map);
+	ha->num_qpairs--;
 	list_del(&qpair->qp_list_elem);
 	if (list_empty(&vha->qp_list))
 		vha->flags.qpairs_available = 0;

diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 66df6ce..c61a6a8 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h

@@ -129,28 +129,16 @@ qla2x00_clear_loop_id(fc_port_t *fcport) {
 }
 
 static inline void
-qla2x00_clean_dsd_pool(struct qla_hw_data *ha, srb_t *sp,
-	struct qla_tgt_cmd *tc)
+qla2x00_clean_dsd_pool(struct qla_hw_data *ha, struct crc_context *ctx)
 {
-	struct dsd_dma *dsd_ptr, *tdsd_ptr;
-	struct crc_context *ctx;
-
-	if (sp)
-		ctx = (struct crc_context *)GET_CMD_CTX_SP(sp);
-	else if (tc)
-		ctx = (struct crc_context *)tc->ctx;
-	else {
-		BUG();
-		return;
-	}
+	struct dsd_dma *dsd, *tdsd;
 
 	/* clean up allocated prev pool */
-	list_for_each_entry_safe(dsd_ptr, tdsd_ptr,
-	    &ctx->dsd_list, list) {
-		dma_pool_free(ha->dl_dma_pool, dsd_ptr->dsd_addr,
-		    dsd_ptr->dsd_list_dma);
-		list_del(&dsd_ptr->list);
-		kfree(dsd_ptr);
+	list_for_each_entry_safe(dsd, tdsd, &ctx->dsd_list, list) {
+		dma_pool_free(ha->dl_dma_pool, dsd->dsd_addr,
+		    dsd->dsd_list_dma);
+		list_del(&dsd->list);
+		kfree(dsd);
 	}
 	INIT_LIST_HEAD(&ctx->dsd_list);
 }

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index aac0350..2572121 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c

@@ -3282,7 +3282,7 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
 	}
 
 	/* Enable MSI-X vector for response queue update for queue 0 */
-	if (IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
+	if (IS_QLA25XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha)) {
 		if (ha->msixbase && ha->mqiobase &&
 		    (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
 		     ql2xmqsupport))

diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index a113ab3..cba1fc5 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c

@@ -3676,15 +3676,6 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 				qlt_update_host_map(vha, id);
 			}
 
-			fc_host_port_name(vha->host) =
-			    wwn_to_u64(vha->port_name);
-
-			if (qla_ini_mode_enabled(vha))
-				ql_dbg(ql_dbg_mbx, vha, 0x1018,
-				    "FA-WWN portname %016llx (%x)\n",
-				    fc_host_port_name(vha->host),
-				    rptid_entry->vp_status);
-
 			set_bit(REGISTER_FC4_NEEDED, &vha->dpc_flags);
 			set_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags);
 		} else {
@@ -4821,9 +4812,9 @@ qla2x00_echo_test(scsi_qla_host_t *vha, struct msg_echo_lb *mreq,
 
 	memset(mcp->mb, 0 , sizeof(mcp->mb));
 	mcp->mb[0] = MBC_DIAGNOSTIC_ECHO;
-	mcp->mb[1] = mreq->options | BIT_6;	/* BIT_6 specifies 64bit address */
+	/* BIT_6 specifies 64bit address */
+	mcp->mb[1] = mreq->options | BIT_15 | BIT_6;
 	if (IS_CNA_CAPABLE(ha)) {
-		mcp->mb[1] |= BIT_15;
 		mcp->mb[2] = vha->fcoe_fcf_idx;
 	}
 	mcp->mb[16] = LSW(mreq->rcv_dma);

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1c79579..79f0502 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c

@@ -630,29 +630,34 @@ qla2x00_sp_free_dma(void *ptr)
 		sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
 	}
 
+	if (!ctx)
+		goto end;
+
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
-		qla2x00_clean_dsd_pool(ha, sp, NULL);
+		qla2x00_clean_dsd_pool(ha, ctx);
 		sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
 	}
 
 	if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-		dma_pool_free(ha->dl_dma_pool, ctx,
-		    ((struct crc_context *)ctx)->crc_ctx_dma);
+		struct crc_context *ctx0 = ctx;
+
+		dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma);
 		sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
 	}
 
 	if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-		struct ct6_dsd *ctx1 = (struct ct6_dsd *)ctx;
+		struct ct6_dsd *ctx1 = ctx;
 
 		dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
-			ctx1->fcp_cmnd_dma);
+		    ctx1->fcp_cmnd_dma);
 		list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
 		ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
 	}
 
+end:
 	CMD_SP(cmd) = NULL;
 	qla2x00_rel_sp(sp);
 }
@@ -699,21 +704,24 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 		sp->flags &= ~SRB_CRC_PROT_DMA_VALID;
 	}
 
+	if (!ctx)
+		goto end;
+
 	if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
 		/* List assured to be having elements */
-		qla2x00_clean_dsd_pool(ha, sp, NULL);
+		qla2x00_clean_dsd_pool(ha, ctx);
 		sp->flags &= ~SRB_CRC_CTX_DSD_VALID;
 	}
 
 	if (sp->flags & SRB_CRC_CTX_DMA_VALID) {
-		dma_pool_free(ha->dl_dma_pool, ctx,
-		    ((struct crc_context *)ctx)->crc_ctx_dma);
+		struct crc_context *ctx0 = ctx;
+
+		dma_pool_free(ha->dl_dma_pool, ctx, ctx0->crc_ctx_dma);
 		sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
 	}
 
 	if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
-		struct ct6_dsd *ctx1 = (struct ct6_dsd *)ctx;
-
+		struct ct6_dsd *ctx1 = ctx;
 		dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
 		    ctx1->fcp_cmnd_dma);
 		list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
@@ -721,7 +729,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
 	}
-
+end:
 	CMD_SP(cmd) = NULL;
 	qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
@@ -1632,7 +1640,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 void
 qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 {
-	int que, cnt;
+	int que, cnt, status;
 	unsigned long flags;
 	srb_t *sp;
 	struct qla_hw_data *ha = vha->hw;
@@ -1662,8 +1670,12 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 					 */
 					sp_get(sp);
 					spin_unlock_irqrestore(&ha->hardware_lock, flags);
-					qla2xxx_eh_abort(GET_CMD_SP(sp));
+					status = qla2xxx_eh_abort(GET_CMD_SP(sp));
 					spin_lock_irqsave(&ha->hardware_lock, flags);
+					/* Get rid of extra reference if immediate exit
+					 * from ql2xxx_eh_abort */
+					if (status == FAILED && (qla2x00_isp_reg_stat(ha)))
+						atomic_dec(&sp->ref_count);
 				}
 				req->outstanding_cmds[cnt] = NULL;
 				sp->done(sp, res);
@@ -2623,10 +2635,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	if (mem_only) {
 		if (pci_enable_device_mem(pdev))
-			goto probe_out;
+			return ret;
 	} else {
 		if (pci_enable_device(pdev))
-			goto probe_out;
+			return ret;
 	}
 
 	/* This may fail but that's ok */
@@ -2636,7 +2648,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!ha) {
 		ql_log_pci(ql_log_fatal, pdev, 0x0009,
 		    "Unable to allocate memory for ha.\n");
-		goto probe_out;
+		goto disable_device;
 	}
 	ql_dbg_pci(ql_dbg_init, pdev, 0x000a,
 	    "Memory allocated for ha=%p.\n", ha);
@@ -3254,7 +3266,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_release_selected_regions(ha->pdev, ha->bars);
 	kfree(ha);
 
-probe_out:
+disable_device:
 	pci_disable_device(pdev);
 	return ret;
 }

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0e03ca2..e766d84 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -2245,11 +2245,13 @@ static void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
 		pci_unmap_sg(ha->pdev, cmd->prot_sg, cmd->prot_sg_cnt,
 			cmd->dma_data_direction);
 
-	if (cmd->ctx_dsd_alloced)
-		qla2x00_clean_dsd_pool(ha, NULL, cmd);
+	if (!cmd->ctx)
+		return;
 
-	if (cmd->ctx)
-		dma_pool_free(ha->dl_dma_pool, cmd->ctx, cmd->ctx->crc_ctx_dma);
+	if (cmd->ctx_dsd_alloced)
+		qla2x00_clean_dsd_pool(ha, cmd->ctx);
+
+	dma_pool_free(ha->dl_dma_pool, cmd->ctx, cmd->ctx->crc_ctx_dma);
 }
 
 static int qlt_check_reserve_free_req(struct scsi_qla_host *vha,

diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index 8a58ef3..c197972 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c

@@ -371,7 +371,7 @@ qla27xx_fwdt_entry_t262(struct scsi_qla_host *vha,
 		goto done;
 	}
 
-	if (end <= start || start == 0 || end == 0) {
+	if (end < start || start == 0 || end == 0) {
 		ql_dbg(ql_dbg_misc, vha, 0xd023,
 		    "%s: unusable range (start=%x end=%x)\n", __func__,
 		    ent->t262.end_addr, ent->t262.start_addr);

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 17249c3..3be980d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c

@@ -245,7 +245,7 @@ struct sdebug_dev_info {
 	unsigned int channel;
 	unsigned int target;
 	u64 lun;
-	uuid_be lu_name;
+	uuid_t lu_name;
 	struct sdebug_host_info *sdbg_host;
 	unsigned long uas_bm[1];
 	atomic_t num_in_q;
@@ -965,7 +965,7 @@ static const u64 naa3_comp_c = 0x3111111000000000ULL;
 static int inquiry_vpd_83(unsigned char *arr, int port_group_id,
 			  int target_dev_id, int dev_id_num,
 			  const char *dev_id_str, int dev_id_str_len,
-			  const uuid_be *lu_name)
+			  const uuid_t *lu_name)
 {
 	int num, port_a;
 	char b[32];
@@ -1404,7 +1404,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	arr[4] = SDEBUG_LONG_INQ_SZ - 5;
 	arr[5] = (int)have_dif_prot;	/* PROTECT bit */
 	if (sdebug_vpd_use_hostno == 0)
-		arr[5] = 0x10; /* claim: implicit TGPS */
+		arr[5] |= 0x10; /* claim: implicit TPGS */
 	arr[6] = 0x10; /* claim: MultiP */
 	/* arr[6] |= 0x40; ... claim: EncServ (enclosure services) */
 	arr[7] = 0xa; /* claim: LINKED + CMDQUE */
@@ -3568,7 +3568,7 @@ static void sdebug_q_cmd_wq_complete(struct work_struct *work)
 }
 
 static bool got_shared_uuid;
-static uuid_be shared_uuid;
+static uuid_t shared_uuid;
 
 static struct sdebug_dev_info *sdebug_device_create(
 			struct sdebug_host_info *sdbg_host, gfp_t flags)
@@ -3578,12 +3578,12 @@ static struct sdebug_dev_info *sdebug_device_create(
 	devip = kzalloc(sizeof(*devip), flags);
 	if (devip) {
 		if (sdebug_uuid_ctl == 1)
-			uuid_be_gen(&devip->lu_name);
+			uuid_gen(&devip->lu_name);
 		else if (sdebug_uuid_ctl == 2) {
 			if (got_shared_uuid)
 				devip->lu_name = shared_uuid;
 			else {
-				uuid_be_gen(&shared_uuid);
+				uuid_gen(&shared_uuid);
 				got_shared_uuid = true;
 				devip->lu_name = shared_uuid;
 			}

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07da..304a715 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c

@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
 	__blk_put_request(req->q, req);
 }
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	if (IS_ERR(req))
 		return;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	rq->cmd[1] = 0;

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e31f1cc..550e29f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return ret;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
 	cmd->request->next_rq->special = NULL;
 }
 
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
 		unsigned int bytes, unsigned int bidi_bytes)
 {
 	struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
  * @cmd:	SCSI command (unused)
  * @result:	scsi error code
  *
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK	temporary transport failure
- * -EREMOTEIO	permanent target failure, do not retry
- * -EBADE	permanent nexus failure, retry on other path
- * -ENOSPC	No write space available
- * -ENODATA	Medium error
- * -EIO		unspecified I/O error
+ * Translate SCSI error code into block errors.
  */
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+		int result)
 {
-	int error = 0;
-
-	switch(host_byte(result)) {
+	switch (host_byte(result)) {
 	case DID_TRANSPORT_FAILFAST:
-		error = -ENOLINK;
-		break;
+		return BLK_STS_TRANSPORT;
 	case DID_TARGET_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -EREMOTEIO;
-		break;
+		return BLK_STS_TARGET;
 	case DID_NEXUS_FAILURE:
-		set_host_byte(cmd, DID_OK);
-		error = -EBADE;
-		break;
+		return BLK_STS_NEXUS;
 	case DID_ALLOC_FAILURE:
 		set_host_byte(cmd, DID_OK);
-		error = -ENOSPC;
-		break;
+		return BLK_STS_NOSPC;
 	case DID_MEDIUM_ERROR:
 		set_host_byte(cmd, DID_OK);
-		error = -ENODATA;
-		break;
+		return BLK_STS_MEDIUM;
 	default:
-		error = -EIO;
-		break;
+		return BLK_STS_IOERR;
 	}
-
-	return error;
 }
 
 /*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	int result = cmd->result;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
-	int error = 0;
+	blk_status_t error = BLK_STS_OK;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid = false;
 	int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			 * both sides at once.
 			 */
 			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-			if (scsi_end_request(req, 0, blk_rq_bytes(req),
+			if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
 					blk_rq_bytes(req->next_rq)))
 				BUG();
 			return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_print_sense(cmd);
 		result = 0;
 		/* for passthrough error may be set */
-		error = 0;
+		error = BLK_STS_OK;
 	}
 
 	/*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 				action = ACTION_REPREP;
 			} else if (sshdr.asc == 0x10) /* DIX */ {
 				action = ACTION_FAIL;
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
 			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
 				action = ACTION_FAIL;
-				error = -EREMOTEIO;
+				error = BLK_STS_TARGET;
 			} else
 				action = ACTION_FAIL;
 			break;
 		case ABORTED_COMMAND:
 			action = ACTION_FAIL;
 			if (sshdr.asc == 0x10) /* DIF */
-				error = -EILSEQ;
+				error = BLK_STS_PROTECTION;
 			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ int scsi_init_io(struct scsi_cmnd *cmd)
 }
 EXPORT_SYMBOL(scsi_init_io);
 
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
 void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
 	void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ static void scsi_request_fn(struct request_queue *q)
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
 {
 	switch (ret) {
 	case BLKPREP_OK:
-		return BLK_MQ_RQ_QUEUE_OK;
+		return BLK_STS_OK;
 	case BLKPREP_DEFER:
-		return BLK_MQ_RQ_QUEUE_BUSY;
+		return BLK_STS_RESOURCE;
 	default:
-		return BLK_MQ_RQ_QUEUE_ERROR;
+		return BLK_STS_IOERR;
 	}
 }
 
@@ -1851,7 +1847,7 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	/* zero out the cmd, except for the embedded scsi_request */
 	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req));
+		sizeof(*cmd) - sizeof(cmd->req) + shost->hostt->cmd_size);
 
 	req->special = cmd;
 
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-	int ret;
+	blk_status_t ret;
 	int reason;
 
 	ret = prep_to_mq(scsi_prep_state_check(sdev, req));
-	if (ret != BLK_MQ_RQ_QUEUE_OK)
+	if (ret != BLK_STS_OK)
 		goto out;
 
-	ret = BLK_MQ_RQ_QUEUE_BUSY;
+	ret = BLK_STS_RESOURCE;
 	if (!get_device(&sdev->sdev_gendev))
 		goto out;
 
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
-		if (ret != BLK_MQ_RQ_QUEUE_OK)
+		if (ret != BLK_STS_OK)
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
-		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		ret = BLK_STS_RESOURCE;
 		goto out_dec_host_busy;
 	}
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return BLK_STS_OK;
 
 out_dec_host_busy:
 	atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	put_device(&sdev->sdev_gendev);
 out:
 	switch (ret) {
-	case BLK_MQ_RQ_QUEUE_BUSY:
+	case BLK_STS_OK:
+		break;
+	case BLK_STS_RESOURCE:
 		if (atomic_read(&sdev->device_busy) == 0 &&
 		    !scsi_device_blocked(sdev))
 			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 		break;
-	case BLK_MQ_RQ_QUEUE_ERROR:
+	default:
 		/*
 		 * Make sure to release all allocated ressources when
 		 * we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		if (req->rq_flags & RQF_DONTPREP)
 			scsi_mq_uninit_cmd(cmd);
 		break;
-	default:
-		break;
 	}
 	return ret;
 }
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	q->request_fn = scsi_request_fn;
 	q->init_rq_fn = scsi_init_rq;
 	q->exit_rq_fn = scsi_exit_rq;
+	q->initialize_rq_fn = scsi_initialize_rq;
 
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
 #endif
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
+	.initialize_rq_fn = scsi_initialize_rq,
 	.map_queues	= scsi_map_queues,
 };
 
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 		if (wait)
 			blk_mq_quiesce_queue(q);
 		else
-			blk_mq_stop_hw_queues(q);
+			blk_mq_quiesce_queue_nowait(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
 		return -EINVAL;
 
 	if (q->mq_ops) {
-		blk_mq_start_stopped_hw_queues(q, false);
+		blk_mq_unquiesce_queue(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_start_queue(q);

diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1..5006a65 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c

@@ -33,6 +33,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_request.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 			    struct sas_rphy *rphy)
 {
 	struct request *req;
-	int ret;
+	blk_status_t ret;
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
+	q->initialize_rq_fn = scsi_initialize_rq;
 	q->cmd_size = sizeof(struct scsi_request);
 
 	if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	if (error)
 		goto out_cleanup_queue;
 
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
 	error = bsg_register_queue(q, dev, name, release);
 	if (error)
 		goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		q->queuedata = shost;
 
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 
 out_cleanup_queue:

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f9d1432..b6bb4e0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -827,21 +827,32 @@ static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
 	u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+	int ret;
 
 	if (!(rq->cmd_flags & REQ_NOUNMAP)) {
 		switch (sdkp->zeroing_mode) {
 		case SD_ZERO_WS16_UNMAP:
-			return sd_setup_write_same16_cmnd(cmd, true);
+			ret = sd_setup_write_same16_cmnd(cmd, true);
+			goto out;
 		case SD_ZERO_WS10_UNMAP:
-			return sd_setup_write_same10_cmnd(cmd, true);
+			ret = sd_setup_write_same10_cmnd(cmd, true);
+			goto out;
 		}
 	}
 
 	if (sdp->no_write_same)
 		return BLKPREP_INVALID;
+
 	if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
-		return sd_setup_write_same16_cmnd(cmd, false);
-	return sd_setup_write_same10_cmnd(cmd, false);
+		ret = sd_setup_write_same16_cmnd(cmd, false);
+	else
+		ret = sd_setup_write_same10_cmnd(cmd, false);
+
+out:
+	if (sd_is_zoned(sdkp) && ret == BLKPREP_OK)
+		return sd_zbc_write_lock_zone(cmd);
+
+	return ret;
 }
 
 static void sd_config_write_same(struct scsi_disk *sdkp)
@@ -948,6 +959,10 @@ static int sd_setup_write_same_cmnd(struct scsi_cmnd *cmd)
 	rq->__data_len = sdp->sector_size;
 	ret = scsi_init_io(cmd);
 	rq->__data_len = nr_bytes;
+
+	if (sd_is_zoned(sdkp) && ret != BLKPREP_OK)
+		sd_zbc_write_unlock_zone(cmd);
+
 	return ret;
 }
 
@@ -1567,17 +1582,21 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	return retval;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp)
+static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout
 		* SD_FLUSH_TIMEOUT_MULTIPLIER;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr my_sshdr;
 
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
+	/* caller might not be interested in sense, but we need it */
+	if (!sshdr)
+		sshdr = &my_sshdr;
+
 	for (retries = 3; retries > 0; --retries) {
 		unsigned char cmd[10] = { 0 };
 
@@ -1586,7 +1605,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, sshdr,
 				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
@@ -1596,11 +1615,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		sd_print_result(sdkp, "Synchronize Cache(10) failed", res);
 
 		if (driver_byte(res) & DRIVER_SENSE)
-			sd_print_sense_hdr(sdkp, &sshdr);
+			sd_print_sense_hdr(sdkp, sshdr);
+
 		/* we need to evaluate the error return  */
-		if (scsi_sense_valid(&sshdr) &&
-			(sshdr.asc == 0x3a ||	/* medium not present */
-			 sshdr.asc == 0x20))	/* invalid command */
+		if (scsi_sense_valid(sshdr) &&
+			(sshdr->asc == 0x3a ||	/* medium not present */
+			 sshdr->asc == 0x20))	/* invalid command */
 				/* this is no error here */
 				return 0;
 
@@ -3444,7 +3464,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		sd_sync_cache(sdkp, NULL);
 	}
 
 	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
@@ -3456,6 +3476,7 @@ static void sd_shutdown(struct device *dev)
 static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	struct scsi_sense_hdr sshdr;
 	int ret = 0;
 
 	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
@@ -3463,12 +3484,23 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp);
+		ret = sd_sync_cache(sdkp, &sshdr);
+
 		if (ret) {
 			/* ignore OFFLINE device */
 			if (ret == -ENODEV)
-				ret = 0;
-			goto done;
+				return 0;
+
+			if (!scsi_sense_valid(&sshdr) ||
+			    sshdr.sense_key != ILLEGAL_REQUEST)
+				return ret;
+
+			/*
+			 * sshdr.sense_key == ILLEGAL_REQUEST means this drive
+			 * doesn't support sync. There's not much to do and
+			 * suspend shouldn't fail.
+			 */
+			 ret = 0;
 		}
 	}
 
@@ -3480,7 +3512,6 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
 			ret = 0;
 	}
 
-done:
 	return ret;
 }
 

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0a38ba0..21225d6 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c

@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
 } Sg_device;
 
 /* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
 static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static int sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
 			scsi_req_free_cmd(scsi_req(srp->rq));
-			blk_end_request_all(srp->rq, -EIO);
+			blk_end_request_all(srp->rq, BLK_STS_IOERR);
 			srp->rq = NULL;
 		}
 
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
  * level when a command is completed (or has failed).
  */
 static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
 {
 	struct sg_request *srp = rq->end_io_data;
 	struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 	}
 	req = scsi_req(rq);
 
-	scsi_req_init(rq);
-
 	if (hp->cmd_len > BLK_MAX_CDB)
 		req->cmd = long_cmdp;
 	memcpy(req->cmd, cmd, hp->cmd_len);
@@ -2074,11 +2072,12 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id)
 		if ((1 == resp->done) && (!resp->sg_io_owned) &&
 		    ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
 			resp->done = 2;	/* guard against other readers */
-			break;
+			write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+			return resp;
 		}
 	}
 	write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
-	return resp;
+	return NULL;
 }
 
 /* always adds to end of list */

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6..8e5013d 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c

@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 	atomic64_dec(&STp->stats->in_flight);
 }
 
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
 {
 	struct st_request *SRpnt = req->end_io_data;
 	struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 	rq = scsi_req(req);
-	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	mdata->null_mapped = 1;

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abc7e87..ffe8d86 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c

@@ -7698,6 +7698,12 @@ static inline void ufshcd_add_sysfs_nodes(struct ufs_hba *hba)
 	ufshcd_add_spm_lvl_sysfs_nodes(hba);
 }
 
+static inline void ufshcd_remove_sysfs_nodes(struct ufs_hba *hba)
+{
+	device_remove_file(hba->dev, &hba->rpm_lvl_attr);
+	device_remove_file(hba->dev, &hba->spm_lvl_attr);
+}
+
 /**
  * ufshcd_shutdown - shutdown routine
  * @hba: per adapter instance
@@ -7735,6 +7741,7 @@ EXPORT_SYMBOL(ufshcd_shutdown);
  */
 void ufshcd_remove(struct ufs_hba *hba)
 {
+	ufshcd_remove_sysfs_nodes(hba);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);

diff --git a/drivers/staging/ccree/Kconfig b/drivers/staging/ccree/Kconfig
index ae62704..4be87f5 100644
--- a/drivers/staging/ccree/Kconfig
+++ b/drivers/staging/ccree/Kconfig

@@ -1,6 +1,6 @@
 config CRYPTO_DEV_CCREE
 	tristate "Support for ARM TrustZone CryptoCell C7XX family of Crypto accelerators"
-	depends on CRYPTO_HW && OF && HAS_DMA
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
 	default n
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER

diff --git a/drivers/staging/ccree/ssi_buffer_mgr.c b/drivers/staging/ccree/ssi_buffer_mgr.c
index 038e2ff..6471d3d 100644
--- a/drivers/staging/ccree/ssi_buffer_mgr.c
+++ b/drivers/staging/ccree/ssi_buffer_mgr.c

@@ -216,7 +216,8 @@ void ssi_buffer_mgr_copy_scatterlist_portion(
 	uint32_t nents, lbytes;
 
 	nents = ssi_buffer_mgr_get_sgl_nents(sg, end, &lbytes, NULL);
-	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip), 0, (direct == SSI_SG_TO_BUF));
+	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip,
+		       (direct == SSI_SG_TO_BUF));
 }
 
 static inline int ssi_buffer_mgr_render_buff_to_mlli(

diff --git a/drivers/staging/iio/cdc/ad7152.c b/drivers/staging/iio/cdc/ad7152.c
index dc6ecd8..ff10d1f 100644
--- a/drivers/staging/iio/cdc/ad7152.c
+++ b/drivers/staging/iio/cdc/ad7152.c

@@ -231,16 +231,12 @@ static int ad7152_write_raw_samp_freq(struct device *dev, int val)
 	if (i >= ARRAY_SIZE(ad7152_filter_rate_table))
 		i = ARRAY_SIZE(ad7152_filter_rate_table) - 1;
 
-	mutex_lock(&chip->state_lock);
 	ret = i2c_smbus_write_byte_data(chip->client,
 					AD7152_REG_CFG2, AD7152_CFG2_OSR(i));
-	if (ret < 0) {
-		mutex_unlock(&chip->state_lock);
+	if (ret < 0)
 		return ret;
-	}
 
 	chip->filter_rate_setup = i;
-	mutex_unlock(&chip->state_lock);
 
 	return ret;
 }

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 0db662d..85b242e 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c

@@ -3267,7 +3267,7 @@ int
 kiblnd_connd(void *arg)
 {
 	spinlock_t *lock = &kiblnd_data.kib_connd_lock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct kib_conn *conn;
 	int timeout;
@@ -3521,7 +3521,7 @@ kiblnd_scheduler(void *arg)
 	long id = (long)arg;
 	struct kib_sched_info *sched;
 	struct kib_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	struct ib_wc wc;
 	int did_something;
@@ -3656,7 +3656,7 @@ kiblnd_failover_thread(void *arg)
 {
 	rwlock_t *glock = &kiblnd_data.kib_global_lock;
 	struct kib_dev *dev;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned long flags;
 	int rc;
 

diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 3ed3b08..6b38d5a 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c

@@ -2166,7 +2166,7 @@ ksocknal_connd(void *arg)
 {
 	spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock;
 	struct ksock_connreq *cr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int nloops = 0;
 	int cons_retry = 0;
 
@@ -2554,7 +2554,7 @@ ksocknal_check_peer_timeouts(int idx)
 int
 ksocknal_reaper(void *arg)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct ksock_conn *conn;
 	struct ksock_sched *sched;
 	struct list_head enomem_conns;

diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c56e992..49deb44 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c

@@ -361,7 +361,7 @@ static int libcfs_debug_dumplog_thread(void *arg)
 
 void libcfs_debug_dumplog(void)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct task_struct *dumper;
 
 	/* we're being careful to ensure that the kernel thread is

diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 9599b74..27082d2 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c

@@ -990,7 +990,7 @@ static int tracefiled(void *arg)
 	complete(&tctl->tctl_start);
 
 	while (1) {
-		wait_queue_t __wait;
+		wait_queue_entry_t __wait;
 
 		pc.pc_want_daemon_pages = 0;
 		collect_pages(&pc);

diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c
index ce4b835..9ebba4e 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-eq.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-eq.c

@@ -312,7 +312,7 @@ __must_hold(&the_lnet.ln_eq_wait_lock)
 {
 	int tms = *timeout_ms;
 	int wait;
-	wait_queue_t wl;
+	wait_queue_entry_t wl;
 	unsigned long now;
 
 	if (!tms)

diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 9fca8d2..f075706 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c

@@ -516,7 +516,7 @@ lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
 int
 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct socket *newsock;
 	int rc;
 

diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 999f250..bf31bc2 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c

@@ -192,7 +192,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
 }
 
 static int seq_fid_alloc_prep(struct lu_client_seq *seq,
-			      wait_queue_t *link)
+			      wait_queue_entry_t *link)
 {
 	if (seq->lcs_update) {
 		add_wait_queue(&seq->lcs_waitq, link);
@@ -223,7 +223,7 @@ static void seq_fid_alloc_fini(struct lu_client_seq *seq)
 int seq_client_alloc_fid(const struct lu_env *env,
 			 struct lu_client_seq *seq, struct lu_fid *fid)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 	int rc;
 
 	LASSERT(seq);
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(seq_client_alloc_fid);
  */
 void seq_client_flush(struct lu_client_seq *seq)
 {
-	wait_queue_t link;
+	wait_queue_entry_t link;
 
 	LASSERT(seq);
 	init_waitqueue_entry(&link, current);

diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index b04d613..f24970d 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h

@@ -201,7 +201,7 @@ struct l_wait_info {
 			   sigmask(SIGALRM))
 
 /**
- * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * wait_queue_entry_t of Linux (version < 2.6.34) is a FIFO list for exclusively
  * waiting threads, which is not always desirable because all threads will
  * be waken up again and again, even user only needs a few of them to be
  * active most time. This is not good for performance because cache can
@@ -228,7 +228,7 @@ struct l_wait_info {
  */
 #define __l_wait_event(wq, condition, info, ret, l_add_wait)		   \
 do {									   \
-	wait_queue_t __wait;						 \
+	wait_queue_entry_t __wait;						 \
 	long __timeout = info->lwi_timeout;			  \
 	sigset_t   __blocked;					      \
 	int   __allow_intr = info->lwi_allow_intr;			     \

diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 8af6110..96515b8 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c

@@ -207,7 +207,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
 static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
 {
 	struct lu_object_header *header = obj->co_lu.lo_header;
-	wait_queue_t	   waiter;
+	wait_queue_entry_t	   waiter;
 
 	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
 		struct lu_site *site = obj->co_lu.lo_dev->ld_site;

diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 391c632..e889d3a 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h

@@ -370,7 +370,7 @@ struct lov_thread_info {
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
 	struct cl_page_list     lti_plist;
-	wait_queue_t	  lti_waiter;
+	wait_queue_entry_t	  lti_waiter;
 	struct cl_attr          lti_attr;
 };
 

diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index ab3ecfe..eddabbe 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c

@@ -371,7 +371,7 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
 	struct lov_layout_raid0 *r0;
 	struct lu_site	  *site;
 	struct lu_site_bkt_data *bkt;
-	wait_queue_t	  *waiter;
+	wait_queue_entry_t	  *waiter;
 
 	r0  = &lov->u.raid0;
 	LASSERT(r0->lo_sub[idx] == los);

diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 2e1bd47..e6727ce 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c

@@ -293,18 +293,10 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
 	size_t lmmk_size;
 	size_t lum_size;
 	int rc;
-	mm_segment_t seg;
 
 	if (!lsm)
 		return -ENODATA;
 
-	/*
-	 * "Switch to kernel segment" to allow copying from kernel space by
-	 * copy_{to,from}_user().
-	 */
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-
 	if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) {
 		CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
 		       lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
@@ -406,6 +398,5 @@ int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
 out_free:
 	kvfree(lmmk);
 out:
-	set_fs(seg);
 	return rc;
 }

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index abcf951..76ae600 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c

@@ -556,7 +556,7 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_t *waiter,
+				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
 	struct lu_site_bkt_data *bkt;
@@ -670,7 +670,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 					    struct lu_device *dev,
 					    const struct lu_fid *f,
 					    const struct lu_object_conf *conf,
-					    wait_queue_t *waiter)
+					    wait_queue_entry_t *waiter)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -750,7 +750,7 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 {
 	struct lu_site_bkt_data *bkt;
 	struct lu_object	*obj;
-	wait_queue_t	   wait;
+	wait_queue_entry_t	   wait;
 
 	while (1) {
 		obj = lu_object_find_try(env, dev, f, conf, &wait);

diff --git a/drivers/staging/media/atomisp/i2c/Makefile b/drivers/staging/media/atomisp/i2c/Makefile
index 8ea0190..466517c 100644
--- a/drivers/staging/media/atomisp/i2c/Makefile
+++ b/drivers/staging/media/atomisp/i2c/Makefile

@@ -19,5 +19,3 @@
 
 obj-$(CONFIG_VIDEO_LM3554) += lm3554.o
 
-ccflags-y += -Werror
-

diff --git a/drivers/staging/media/atomisp/i2c/imx/Makefile b/drivers/staging/media/atomisp/i2c/imx/Makefile
index 1d7f7ab..6b13a3a 100644
--- a/drivers/staging/media/atomisp/i2c/imx/Makefile
+++ b/drivers/staging/media/atomisp/i2c/imx/Makefile

@@ -4,5 +4,3 @@
 
 ov8858_driver-objs := ../ov8858.o dw9718.o vcm.o
 obj-$(CONFIG_VIDEO_OV8858)     += ov8858_driver.o
-
-ccflags-y += -Werror

diff --git a/drivers/staging/media/atomisp/i2c/ov5693/Makefile b/drivers/staging/media/atomisp/i2c/ov5693/Makefile
index fceb9e9..c9c0e12 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/Makefile
+++ b/drivers/staging/media/atomisp/i2c/ov5693/Makefile

@@ -1,3 +1 @@
 obj-$(CONFIG_VIDEO_OV5693) += ov5693.o
-
-ccflags-y += -Werror

diff --git a/drivers/staging/media/atomisp/pci/atomisp2/Makefile b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
index 3fa7c1c..f126a89 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/Makefile
+++ b/drivers/staging/media/atomisp/pci/atomisp2/Makefile

@@ -351,5 +351,5 @@
 DEFINES += -DATOMISP_POSTFIX=\"css2400b0_v21\" -DISP2400B0
 DEFINES += -DSYSTEM_hive_isp_css_2400_system -DISP2400
 
-ccflags-y += $(INCLUDES) $(DEFINES) -fno-common -Werror
+ccflags-y += $(INCLUDES) $(DEFINES) -fno-common
 

diff --git a/drivers/staging/rtl8188eu/os_dep/mon.c b/drivers/staging/rtl8188eu/os_dep/mon.c
index cfe37eb..859d0d6 100644
--- a/drivers/staging/rtl8188eu/os_dep/mon.c
+++ b/drivers/staging/rtl8188eu/os_dep/mon.c

@@ -152,7 +152,7 @@ static const struct net_device_ops mon_netdev_ops = {
 static void mon_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &mon_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	ether_setup(dev);
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->type = ARPHRD_IEEE80211;

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index 36c3189..bd4352f 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c

@@ -2667,7 +2667,8 @@ static int rtw_cfg80211_add_monitor_if (struct adapter *padapter, char *name, st
 	mon_ndev->type = ARPHRD_IEEE80211_RADIOTAP;
 	strncpy(mon_ndev->name, name, IFNAMSIZ);
 	mon_ndev->name[IFNAMSIZ - 1] = 0;
-	mon_ndev->destructor = rtw_ndev_destructor;
+	mon_ndev->needs_free_netdev = true;
+	mon_ndev->priv_destructor = rtw_ndev_destructor;
 
 	mon_ndev->netdev_ops = &rtw_cfg80211_monitor_if_ops;
 

diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index f83cfc7..0215899 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c

@@ -1207,8 +1207,6 @@ void rtw_ndev_destructor(struct net_device *ndev)
 
 	if (ndev->ieee80211_ptr)
 		kfree((u8 *)ndev->ieee80211_ptr);
-
-	free_netdev(ndev);
 }
 
 void rtw_dev_unload(struct adapter *padapter)

diff --git a/drivers/staging/rtl8723bs/os_dep/osdep_service.c b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
index 02db59e..aa16d1a 100644
--- a/drivers/staging/rtl8723bs/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8723bs/os_dep/osdep_service.c

@@ -160,7 +160,7 @@ static int isFileReadable(char *path)
 		oldfs = get_fs(); set_fs(get_ds());
 
 		if (1!=readFile(fp, &buf, 1))
-			ret = PTR_ERR(fp);
+			ret = -EINVAL;
 
 		set_fs(oldfs);
 		filp_close(fp, NULL);

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 26a9bcd..3fdca2c 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c

@@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
 	 */
 	if (dump_payload)
 		goto after_immediate_data;
+	/*
+	 * Check for underflow case where both EDTL and immediate data payload
+	 * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+	 * already been set in target_cmd_size_check() as se_cmd->data_length.
+	 *
+	 * For this special case, fail the command and dump the immediate data
+	 * payload.
+	 */
+	if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+		cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+		goto after_immediate_data;
+	}
 
 	immed_ret = iscsit_handle_immediate_data(cmd, hdr,
 					cmd->first_burst_len);
@@ -3790,6 +3802,8 @@ int iscsi_target_tx_thread(void *arg)
 {
 	int ret = 0;
 	struct iscsi_conn *conn = arg;
+	bool conn_freed = false;
+
 	/*
 	 * Allow ourselves to be interrupted by SIGINT so that a
 	 * connection recovery / failure event can be triggered externally.
@@ -3815,12 +3829,14 @@ int iscsi_target_tx_thread(void *arg)
 			goto transport_err;
 
 		ret = iscsit_handle_response_queue(conn);
-		if (ret == 1)
+		if (ret == 1) {
 			goto get_immediate;
-		else if (ret == -ECONNRESET)
+		} else if (ret == -ECONNRESET) {
+			conn_freed = true;
 			goto out;
-		else if (ret < 0)
+		} else if (ret < 0) {
 			goto transport_err;
+		}
 	}
 
 transport_err:
@@ -3830,8 +3846,13 @@ int iscsi_target_tx_thread(void *arg)
 	 * responsible for cleaning up the early connection failure.
 	 */
 	if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN)
-		iscsit_take_action_for_connection_exit(conn);
+		iscsit_take_action_for_connection_exit(conn, &conn_freed);
 out:
+	if (!conn_freed) {
+		while (!kthread_should_stop()) {
+			msleep(100);
+		}
+	}
 	return 0;
 }
 
@@ -4004,6 +4025,7 @@ int iscsi_target_rx_thread(void *arg)
 {
 	int rc;
 	struct iscsi_conn *conn = arg;
+	bool conn_freed = false;
 
 	/*
 	 * Allow ourselves to be interrupted by SIGINT so that a
@@ -4016,7 +4038,7 @@ int iscsi_target_rx_thread(void *arg)
 	 */
 	rc = wait_for_completion_interruptible(&conn->rx_login_comp);
 	if (rc < 0 || iscsi_target_check_conn_state(conn))
-		return 0;
+		goto out;
 
 	if (!conn->conn_transport->iscsit_get_rx_pdu)
 		return 0;
@@ -4025,7 +4047,15 @@ int iscsi_target_rx_thread(void *arg)
 
 	if (!signal_pending(current))
 		atomic_set(&conn->transport_failed, 1);
-	iscsit_take_action_for_connection_exit(conn);
+	iscsit_take_action_for_connection_exit(conn, &conn_freed);
+
+out:
+	if (!conn_freed) {
+		while (!kthread_should_stop()) {
+			msleep(100);
+		}
+	}
+
 	return 0;
 }
 
@@ -4405,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
 	 * always sleep waiting for RX/TX thread shutdown to complete
 	 * within iscsit_close_connection().
 	 */
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);
@@ -4422,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
 {
 	int sleep = 1;
 
-	if (!conn->conn_transport->rdma_shutdown)
+	if (!conn->conn_transport->rdma_shutdown) {
 		sleep = cmpxchg(&conn->tx_thread_active, true, false);
+		if (!sleep)
+			return;
+	}
 
 	atomic_set(&conn->conn_logout_remove, 0);
 	complete(&conn->conn_logout_comp);

diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index 9a96e17..7fe2aa7 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c

@@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn)
 	}
 }
 
-void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
+void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed)
 {
+	*conn_freed = false;
+
 	spin_lock_bh(&conn->state_lock);
 	if (atomic_read(&conn->connection_exit)) {
 		spin_unlock_bh(&conn->state_lock);
@@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
 	if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
 		spin_unlock_bh(&conn->state_lock);
 		iscsit_close_connection(conn);
+		*conn_freed = true;
 		return;
 	}
 
@@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
 	spin_unlock_bh(&conn->state_lock);
 
 	iscsit_handle_connection_cleanup(conn);
+	*conn_freed = true;
 }

diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h
index 60e69e2..3822d9c 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.h
+++ b/drivers/target/iscsi/iscsi_target_erl0.h

@@ -15,6 +15,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *);
 extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *);
 extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int);
 extern void iscsit_fall_back_to_erl0(struct iscsi_session *);
-extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *);
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *);
 
 #endif   /*** ISCSI_TARGET_ERL0_H ***/

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 6623847..92b96b5 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c

@@ -1464,5 +1464,9 @@ int iscsi_target_login_thread(void *arg)
 			break;
 	}
 
+	while (!kthread_should_stop()) {
+		msleep(100);
+	}
+
 	return 0;
 }

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 7ccc9c1..6f88b31 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c

@@ -493,14 +493,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn)
 
 static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *);
 
-static bool iscsi_target_sk_state_check(struct sock *sk)
+static bool __iscsi_target_sk_check_close(struct sock *sk)
 {
 	if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) {
-		pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE,"
+		pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE,"
 			"returning FALSE\n");
-		return false;
+		return true;
 	}
-	return true;
+	return false;
+}
+
+static bool iscsi_target_sk_check_close(struct iscsi_conn *conn)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		read_lock_bh(&sk->sk_callback_lock);
+		state = (__iscsi_target_sk_check_close(sk) ||
+			 test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags));
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
+}
+
+static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		read_lock_bh(&sk->sk_callback_lock);
+		state = test_bit(flag, &conn->login_flags);
+		read_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
+}
+
+static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag)
+{
+	bool state = false;
+
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+		state = (__iscsi_target_sk_check_close(sk) ||
+			 test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags));
+		if (!state)
+			clear_bit(flag, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	return state;
 }
 
 static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login)
@@ -540,6 +586,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 
 	pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n",
 			conn, current->comm, current->pid);
+	/*
+	 * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready()
+	 * before initial PDU processing in iscsi_target_start_negotiation()
+	 * has completed, go ahead and retry until it's cleared.
+	 *
+	 * Otherwise if the TCP connection drops while this is occuring,
+	 * iscsi_target_start_negotiation() will detect the failure, call
+	 * cancel_delayed_work_sync(&conn->login_work), and cleanup the
+	 * remaining iscsi connection resources from iscsi_np process context.
+	 */
+	if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) {
+		schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10));
+		return;
+	}
 
 	spin_lock(&tpg->tpg_state_lock);
 	state = (tpg->tpg_state == TPG_STATE_ACTIVE);
@@ -547,26 +607,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 
 	if (!state) {
 		pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n");
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
-		return;
+		goto err;
 	}
 
-	if (conn->sock) {
-		struct sock *sk = conn->sock->sk;
-
-		read_lock_bh(&sk->sk_callback_lock);
-		state = iscsi_target_sk_state_check(sk);
-		read_unlock_bh(&sk->sk_callback_lock);
-
-		if (!state) {
-			pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
-			iscsi_target_restore_sock_callbacks(conn);
-			iscsi_target_login_drop(conn, login);
-			iscsit_deaccess_np(np, tpg, tpg_np);
-			return;
-		}
+	if (iscsi_target_sk_check_close(conn)) {
+		pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
+		goto err;
 	}
 
 	conn->login_kworker = current;
@@ -584,34 +630,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 	flush_signals(current);
 	conn->login_kworker = NULL;
 
-	if (rc < 0) {
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
-		return;
-	}
+	if (rc < 0)
+		goto err;
 
 	pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n",
 			conn, current->comm, current->pid);
 
 	rc = iscsi_target_do_login(conn, login);
 	if (rc < 0) {
-		iscsi_target_restore_sock_callbacks(conn);
-		iscsi_target_login_drop(conn, login);
-		iscsit_deaccess_np(np, tpg, tpg_np);
+		goto err;
 	} else if (!rc) {
-		if (conn->sock) {
-			struct sock *sk = conn->sock->sk;
-
-			write_lock_bh(&sk->sk_callback_lock);
-			clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
-			write_unlock_bh(&sk->sk_callback_lock);
-		}
+		if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE))
+			goto err;
 	} else if (rc == 1) {
 		iscsi_target_nego_release(conn);
 		iscsi_post_login_handler(np, conn, zero_tsih);
 		iscsit_deaccess_np(np, tpg, tpg_np);
 	}
+	return;
+
+err:
+	iscsi_target_restore_sock_callbacks(conn);
+	iscsi_target_login_drop(conn, login);
+	iscsit_deaccess_np(np, tpg, tpg_np);
 }
 
 static void iscsi_target_do_cleanup(struct work_struct *work)
@@ -659,31 +700,54 @@ static void iscsi_target_sk_state_change(struct sock *sk)
 		orig_state_change(sk);
 		return;
 	}
+	state = __iscsi_target_sk_check_close(sk);
+	pr_debug("__iscsi_target_sk_close_change: state: %d\n", state);
+
 	if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
 		pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change"
 			 " conn: %p\n", conn);
+		if (state)
+			set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
 		write_unlock_bh(&sk->sk_callback_lock);
 		orig_state_change(sk);
 		return;
 	}
-	if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
+	if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
 		pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n",
 			 conn);
 		write_unlock_bh(&sk->sk_callback_lock);
 		orig_state_change(sk);
 		return;
 	}
-
-	state = iscsi_target_sk_state_check(sk);
-	write_unlock_bh(&sk->sk_callback_lock);
-
-	pr_debug("iscsi_target_sk_state_change: state: %d\n", state);
-
-	if (!state) {
+	/*
+	 * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED,
+	 * but only queue conn->login_work -> iscsi_target_do_login_rx()
+	 * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared.
+	 *
+	 * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close()
+	 * will detect the dropped TCP connection from delayed workqueue context.
+	 *
+	 * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial
+	 * iscsi_target_start_negotiation() is running, iscsi_target_do_login()
+	 * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation()
+	 * via iscsi_target_sk_check_and_clear() is responsible for detecting the
+	 * dropped TCP connection in iscsi_np process context, and cleaning up
+	 * the remaining iscsi connection resources.
+	 */
+	if (state) {
 		pr_debug("iscsi_target_sk_state_change got failed state\n");
-		schedule_delayed_work(&conn->login_cleanup_work, 0);
+		set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags);
+		state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+
+		orig_state_change(sk);
+
+		if (!state)
+			schedule_delayed_work(&conn->login_work, 0);
 		return;
 	}
+	write_unlock_bh(&sk->sk_callback_lock);
+
 	orig_state_change(sk);
 }
 
@@ -946,6 +1010,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
 			if (iscsi_target_handle_csg_one(conn, login) < 0)
 				return -1;
 			if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+				/*
+				 * Check to make sure the TCP connection has not
+				 * dropped asynchronously while session reinstatement
+				 * was occuring in this kthread context, before
+				 * transitioning to full feature phase operation.
+				 */
+				if (iscsi_target_sk_check_close(conn))
+					return -1;
+
 				login->tsih = conn->sess->tsih;
 				login->login_complete = 1;
 				iscsi_target_restore_sock_callbacks(conn);
@@ -972,21 +1045,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
 		break;
 	}
 
-	if (conn->sock) {
-		struct sock *sk = conn->sock->sk;
-		bool state;
-
-		read_lock_bh(&sk->sk_callback_lock);
-		state = iscsi_target_sk_state_check(sk);
-		read_unlock_bh(&sk->sk_callback_lock);
-
-		if (!state) {
-			pr_debug("iscsi_target_do_login() failed state for"
-				 " conn: %p\n", conn);
-			return -1;
-		}
-	}
-
 	return 0;
 }
 
@@ -1255,10 +1313,22 @@ int iscsi_target_start_negotiation(
 
 		write_lock_bh(&sk->sk_callback_lock);
 		set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
+		set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags);
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
-
+	/*
+	 * If iscsi_target_do_login returns zero to signal more PDU
+	 * exchanges are required to complete the login, go ahead and
+	 * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection
+	 * is still active.
+	 *
+	 * Otherwise if TCP connection dropped asynchronously, go ahead
+	 * and perform connection cleanup now.
+	 */
 	ret = iscsi_target_do_login(conn, login);
+	if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU))
+		ret = -1;
+
 	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
 		cancel_delayed_work_sync(&conn->login_cleanup_work);

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069eb..c05d3801 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c

@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
 		return -EINVAL;
 	}
 
-	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 	if (!ib_dev->ibd_bio_set) {
 		pr_err("IBLOCK: Unable to create bioset\n");
 		goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
 	struct se_cmd *cmd = bio->bi_private;
 	struct iblock_req *ibr = cmd->priv;
 
-	if (bio->bi_error) {
-		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_error);
+	if (bio->bi_status) {
+		pr_err("bio error: %p,  err: %d\n", bio, bio->bi_status);
 		/*
 		 * Bump the ib_bio_err_cnt and release bio.
 		 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
 {
 	struct se_cmd *cmd = bio->bi_private;
 
-	if (bio->bi_error)
-		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+	if (bio->bi_status)
+		pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
 
 	if (cmd) {
-		if (bio->bi_error)
+		if (bio->bi_status)
 			target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		else
 			target_complete_cmd(cmd, SAM_STAT_GOOD);

diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 9ab7090..0912de7 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h

@@ -136,7 +136,7 @@ int	init_se_kmem_caches(void);
 void	release_se_kmem_caches(void);
 u32	scsi_get_new_index(scsi_index_t);
 void	transport_subsystem_check_init(void);
-void	transport_cmd_finish_abort(struct se_cmd *, int);
+int	transport_cmd_finish_abort(struct se_cmd *, int);
 unsigned char *transport_dump_cmd_direction(struct se_cmd *);
 void	transport_dump_dev_state(struct se_device *, char *, int *);
 void	transport_dump_dev_info(struct se_device *, struct se_lun *,

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb1..ceec021 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c

@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
 }
 
 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
 
 /*	pscsi_attach_hba():
  *
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		goto fail;
 	}
 
-	scsi_req_init(req);
-
 	if (sgl) {
 		ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
 		if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	return 0;
 }
 
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index dce1e1b..13f47bf 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c

@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 	kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
 	unsigned long flags;
 	bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 		transport_send_task_abort(cmd);
 	}
 
-	transport_cmd_finish_abort(cmd, remove);
+	return transport_cmd_finish_abort(cmd, remove);
 }
 
 static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
 		cancel_work_sync(&se_cmd->work);
 		transport_wait_for_tasks(se_cmd);
 
-		transport_cmd_finish_abort(se_cmd, true);
-		target_put_sess_cmd(se_cmd);
+		if (!transport_cmd_finish_abort(se_cmd, true))
+			target_put_sess_cmd(se_cmd);
 
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		transport_cmd_finish_abort(cmd, 1);
-		target_put_sess_cmd(cmd);
+		if (!transport_cmd_finish_abort(cmd, 1))
+			target_put_sess_cmd(cmd);
 	}
 }
 
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
 		cancel_work_sync(&cmd->work);
 		transport_wait_for_tasks(cmd);
 
-		core_tmr_handle_tas_abort(cmd, tas);
-		target_put_sess_cmd(cmd);
+		if (!core_tmr_handle_tas_abort(cmd, tas))
+			target_put_sess_cmd(cmd);
 	}
 }
 

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 37f5735..f1b3a46 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
 		percpu_ref_put(&lun->lun_ref);
 }
 
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
 	bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+	int ret = 0;
 
 	if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
 		transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 		cmd->se_tfo->aborted_task(cmd);
 
 	if (transport_cmd_check_stop_to_fabric(cmd))
-		return;
+		return 1;
 	if (remove && ack_kref)
-		transport_put_cmd(cmd);
+		ret = transport_put_cmd(cmd);
+
+	return ret;
 }
 
 static void target_complete_failure_work(struct work_struct *work)
@@ -1160,15 +1163,28 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
 	if (cmd->unknown_data_length) {
 		cmd->data_length = size;
 	} else if (size != cmd->data_length) {
-		pr_warn("TARGET_CORE[%s]: Expected Transfer Length:"
+		pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:"
 			" %u does not match SCSI CDB Length: %u for SAM Opcode:"
 			" 0x%02x\n", cmd->se_tfo->get_fabric_name(),
 				cmd->data_length, size, cmd->t_task_cdb[0]);
 
-		if (cmd->data_direction == DMA_TO_DEVICE &&
-		    cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
-			pr_err("Rejecting underflow/overflow WRITE data\n");
-			return TCM_INVALID_CDB_FIELD;
+		if (cmd->data_direction == DMA_TO_DEVICE) {
+			if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
+				pr_err_ratelimited("Rejecting underflow/overflow"
+						   " for WRITE data CDB\n");
+				return TCM_INVALID_CDB_FIELD;
+			}
+			/*
+			 * Some fabric drivers like iscsi-target still expect to
+			 * always reject overflow writes.  Reject this case until
+			 * full fabric driver level support for overflow writes
+			 * is introduced tree-wide.
+			 */
+			if (size > cmd->data_length) {
+				pr_err_ratelimited("Rejecting overflow for"
+						   " WRITE control CDB\n");
+				return TCM_INVALID_CDB_FIELD;
+			}
 		}
 		/*
 		 * Reject READ_* or WRITE_* with overflow/underflow for

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9045837..beb5f09 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c

@@ -97,7 +97,7 @@ struct tcmu_hba {
 
 struct tcmu_dev {
 	struct list_head node;
-
+	struct kref kref;
 	struct se_device se_dev;
 
 	char *name;
@@ -969,6 +969,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
 	if (!udev)
 		return NULL;
+	kref_init(&udev->kref);
 
 	udev->name = kstrdup(name, GFP_KERNEL);
 	if (!udev->name) {
@@ -1145,6 +1146,24 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 	return 0;
 }
 
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+	struct se_device *dev = container_of(p, struct se_device, rcu_head);
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	kfree(udev->uio_info.name);
+	kfree(udev->name);
+	kfree(udev);
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+	struct se_device *dev = &udev->se_dev;
+
+	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -1152,7 +1171,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-
+	/* release ref from configure */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 	return 0;
 }
 
@@ -1272,6 +1292,12 @@ static int tcmu_configure_device(struct se_device *dev)
 		dev->dev_attrib.hw_max_sectors = 128;
 	dev->dev_attrib.hw_queue_depth = 128;
 
+	/*
+	 * Get a ref incase userspace does a close on the uio device before
+	 * LIO has initiated tcmu_free_device.
+	 */
+	kref_get(&udev->kref);
+
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
 				 udev->uio_info.uio_dev->minor);
 	if (ret)
@@ -1284,11 +1310,13 @@ static int tcmu_configure_device(struct se_device *dev)
 	return 0;
 
 err_netlink:
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 	uio_unregister_device(&udev->uio_info);
 err_register:
 	vfree(udev->mb_addr);
 err_vzalloc:
 	kfree(info->name);
+	info->name = NULL;
 
 	return ret;
 }
@@ -1302,14 +1330,6 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
 	return -EINVAL;
 }
 
-static void tcmu_dev_call_rcu(struct rcu_head *p)
-{
-	struct se_device *dev = container_of(p, struct se_device, rcu_head);
-	struct tcmu_dev *udev = TCMU_DEV(dev);
-
-	kfree(udev);
-}
-
 static bool tcmu_dev_configured(struct tcmu_dev *udev)
 {
 	return udev->uio_info.uio_dev ? true : false;
@@ -1364,10 +1384,10 @@ static void tcmu_free_device(struct se_device *dev)
 				   udev->uio_info.uio_dev->minor);
 
 		uio_unregister_device(&udev->uio_info);
-		kfree(udev->uio_info.name);
-		kfree(udev->name);
 	}
-	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+
+	/* release ref from init */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
 }
 
 enum {

diff --git a/drivers/thermal/broadcom/Kconfig b/drivers/thermal/broadcom/Kconfig
index ab08af4..42c098e 100644
--- a/drivers/thermal/broadcom/Kconfig
+++ b/drivers/thermal/broadcom/Kconfig

@@ -9,8 +9,9 @@
 config BCM_NS_THERMAL
 	tristate "Northstar thermal driver"
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	default y if ARCH_BCM_IPROC
 	help
-	  Northstar is a family of SoCs that includes e.g. BCM4708, BCM47081,
-	  BCM4709 and BCM47094. It contains DMU (Device Management Unit) block
-	  with a thermal sensor that allows checking CPU temperature. This
-	  driver provides support for it.
+	  Support for the Northstar and Northstar Plus family of SoCs (e.g.
+	  BCM4708, BCM4709, BCM5301x, BCM95852X, etc). It contains DMU (Device
+	  Management Unit) block with a thermal sensor that allows checking CPU
+	  temperature.

diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index 9413c4a..a9ec94e 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c

@@ -23,7 +23,7 @@ enum int3400_thermal_uuid {
 	INT3400_THERMAL_MAXIMUM_UUID,
 };
 
-static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
 	"42A441D6-AE6A-462b-A84B-4A8CE79027D3",
 	"3A95C389-E4B8-4629-A526-C52C88626BAE",
 	"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
@@ -141,10 +141,10 @@ static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
 		}
 
 		for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
-			u8 uuid[16];
+			guid_t guid;
 
-			acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
-			if (!strncmp(uuid, objb->buffer.pointer, 16)) {
+			guid_parse(int3400_thermal_uuids[j], &guid);
+			if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) {
 				priv->uuid_bitmap |= (1 << j);
 				break;
 			}

diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c
index 644ba52..4362a69 100644
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c

@@ -195,7 +195,6 @@ static struct thermal_zone_of_device_ops tmu_tz_ops = {
 static int qoriq_tmu_probe(struct platform_device *pdev)
 {
 	int ret;
-	const struct thermal_trip *trip;
 	struct qoriq_tmu_data *data;
 	struct device_node *np = pdev->dev.of_node;
 	u32 site = 0;
@@ -243,8 +242,6 @@ static int qoriq_tmu_probe(struct platform_device *pdev)
 		goto err_tmu;
 	}
 
-	trip = of_thermal_get_trip_points(data->tz);
-
 	/* Enable monitoring */
 	site |= 0x1 << (15 - data->sensor_id);
 	tmu_write(data, site | TMR_ME | TMR_ALPF, &data->regs->tmr);

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index b21b9cc..5a51c74 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c

@@ -359,7 +359,7 @@ static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
  * This may be called from any critical situation to trigger a system shutdown
  * after a known period of time. By default this is not scheduled.
  */
-void thermal_emergency_poweroff(void)
+static void thermal_emergency_poweroff(void)
 {
 	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
 	/*

diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index ba9c302..696ab30 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c

@@ -1010,7 +1010,7 @@ ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id)
 }
 
 /**
- * ti_bandgap_set_continous_mode() - One time enabling of continuous mode
+ * ti_bandgap_set_continuous_mode() - One time enabling of continuous mode
  * @bgp: pointer to struct ti_bandgap
  *
  * Call this function only if HAS(MODE_CONFIG) is set. As this driver may
@@ -1214,22 +1214,18 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev)
 	}
 
 	bgp = devm_kzalloc(&pdev->dev, sizeof(*bgp), GFP_KERNEL);
-	if (!bgp) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	if (!bgp)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	of_id = of_match_device(of_ti_bandgap_match, &pdev->dev);
 	if (of_id)
 		bgp->conf = of_id->data;
 
 	/* register shadow for context save and restore */
-	bgp->regval = devm_kzalloc(&pdev->dev, sizeof(*bgp->regval) *
-				   bgp->conf->sensor_count, GFP_KERNEL);
-	if (!bgp->regval) {
-		dev_err(&pdev->dev, "Unable to allocate mem for driver ref\n");
+	bgp->regval = devm_kcalloc(&pdev->dev, bgp->conf->sensor_count,
+				   sizeof(*bgp->regval), GFP_KERNEL);
+	if (!bgp->regval)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	i = 0;
 	do {

diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 7ac9bcdf..61fe8d6 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c

@@ -764,7 +764,7 @@ static int __init ehv_bc_init(void)
 	ehv_bc_driver = alloc_tty_driver(count);
 	if (!ehv_bc_driver) {
 		ret = -ENOMEM;
-		goto error;
+		goto err_free_bcs;
 	}
 
 	ehv_bc_driver->driver_name = "ehv-bc";
@@ -778,24 +778,23 @@ static int __init ehv_bc_init(void)
 	ret = tty_register_driver(ehv_bc_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register tty driver (ret=%i)\n", ret);
-		goto error;
+		goto err_put_tty_driver;
 	}
 
 	ret = platform_driver_register(&ehv_bc_tty_driver);
 	if (ret) {
 		pr_err("ehv-bc: could not register platform driver (ret=%i)\n",
 		       ret);
-		goto error;
+		goto err_deregister_tty_driver;
 	}
 
 	return 0;
 
-error:
-	if (ehv_bc_driver) {
-		tty_unregister_driver(ehv_bc_driver);
-		put_tty_driver(ehv_bc_driver);
-	}
-
+err_deregister_tty_driver:
+	tty_unregister_driver(ehv_bc_driver);
+err_put_tty_driver:
+	put_tty_driver(ehv_bc_driver);
+err_free_bcs:
 	kfree(bcs);
 
 	return ret;

diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 433de5e..f71b473 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c

@@ -122,6 +122,18 @@ void serdev_device_write_wakeup(struct serdev_device *serdev)
 }
 EXPORT_SYMBOL_GPL(serdev_device_write_wakeup);
 
+int serdev_device_write_buf(struct serdev_device *serdev,
+			    const unsigned char *buf, size_t count)
+{
+	struct serdev_controller *ctrl = serdev->ctrl;
+
+	if (!ctrl || !ctrl->ops->write_buf)
+		return -EINVAL;
+
+	return ctrl->ops->write_buf(ctrl, buf, count);
+}
+EXPORT_SYMBOL_GPL(serdev_device_write_buf);
+
 int serdev_device_write(struct serdev_device *serdev,
 			const unsigned char *buf, size_t count,
 			unsigned long timeout)

diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 487c88f..d0a021c 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c

@@ -102,9 +102,6 @@ static int ttyport_open(struct serdev_controller *ctrl)
 		return PTR_ERR(tty);
 	serport->tty = tty;
 
-	serport->port->client_ops = &client_ops;
-	serport->port->client_data = ctrl;
-
 	if (tty->ops->open)
 		tty->ops->open(serport->tty, NULL);
 	else
@@ -215,6 +212,7 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx)
 {
+	const struct tty_port_client_operations *old_ops;
 	struct serdev_controller *ctrl;
 	struct serport *serport;
 	int ret;
@@ -233,28 +231,37 @@ struct device *serdev_tty_port_register(struct tty_port *port,
 
 	ctrl->ops = &ctrl_ops;
 
+	old_ops = port->client_ops;
+	port->client_ops = &client_ops;
+	port->client_data = ctrl;
+
 	ret = serdev_controller_add(ctrl);
 	if (ret)
-		goto err_controller_put;
+		goto err_reset_data;
 
 	dev_info(&ctrl->dev, "tty port %s%d registered\n", drv->name, idx);
 	return &ctrl->dev;
 
-err_controller_put:
+err_reset_data:
+	port->client_data = NULL;
+	port->client_ops = old_ops;
 	serdev_controller_put(ctrl);
+
 	return ERR_PTR(ret);
 }
 
-void serdev_tty_port_unregister(struct tty_port *port)
+int serdev_tty_port_unregister(struct tty_port *port)
 {
 	struct serdev_controller *ctrl = port->client_data;
 	struct serport *serport = serdev_controller_get_drvdata(ctrl);
 
 	if (!serport)
-		return;
+		return -ENODEV;
 
 	serdev_controller_remove(ctrl);
 	port->client_ops = NULL;
 	port->client_data = NULL;
 	serdev_controller_put(ctrl);
+
+	return 0;
 }

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 09a65a3..68fd045 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c

@@ -47,6 +47,7 @@
 /*
  * These are definitions for the Exar XR17V35X and XR17(C|D)15X
  */
+#define UART_EXAR_INT0		0x80
 #define UART_EXAR_SLEEP		0x8b	/* Sleep mode */
 #define UART_EXAR_DVID		0x8d	/* Device identification */
 
@@ -1337,7 +1338,7 @@ static void autoconfig(struct uart_8250_port *up)
 	/*
 	 * Check if the device is a Fintek F81216A
 	 */
-	if (port->type == PORT_16550A)
+	if (port->type == PORT_16550A && port->iotype == UPIO_PORT)
 		fintek_8250_probe(up);
 
 	if (up->capabilities != old_capabilities) {
@@ -1869,17 +1870,13 @@ static int serial8250_default_handle_irq(struct uart_port *port)
 static int exar_handle_irq(struct uart_port *port)
 {
 	unsigned int iir = serial_port_in(port, UART_IIR);
-	int ret;
+	int ret = 0;
 
-	ret = serial8250_handle_irq(port, iir);
+	if (((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) &&
+	    serial_port_in(port, UART_EXAR_INT0) != 0)
+		ret = 1;
 
-	if ((port->type == PORT_XR17V35X) ||
-	   (port->type == PORT_XR17D15X)) {
-		serial_port_in(port, 0x80);
-		serial_port_in(port, 0x81);
-		serial_port_in(port, 0x82);
-		serial_port_in(port, 0x83);
-	}
+	ret |= serial8250_handle_irq(port, iir);
 
 	return ret;
 }
@@ -2177,6 +2174,8 @@ int serial8250_do_startup(struct uart_port *port)
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 
 	/*
 	 * At this point, there's no way the LSR could still be 0xff;
@@ -2335,6 +2334,8 @@ int serial8250_do_startup(struct uart_port *port)
 	serial_port_in(port, UART_RX);
 	serial_port_in(port, UART_IIR);
 	serial_port_in(port, UART_MSR);
+	if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X))
+		serial_port_in(port, UART_EXAR_INT0);
 	up->lsr_saved_flags = 0;
 	up->msr_saved_flags = 0;
 

diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 18e3f83..0475f5d 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c

@@ -478,6 +478,7 @@ static int altera_jtaguart_remove(struct platform_device *pdev)
 
 	port = &altera_jtaguart_ports[i].port;
 	uart_remove_one_port(&altera_jtaguart_driver, port);
+	iounmap(port->membase);
 
 	return 0;
 }

diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 46d3438..3e4b717 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c

@@ -615,6 +615,7 @@ static int altera_uart_remove(struct platform_device *pdev)
 	if (port) {
 		uart_remove_one_port(&altera_uart_driver, port);
 		port->mapbase = 0;
+		iounmap(port->membase);
 	}
 
 	return 0;

diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c
index ebd8569..9fff25b 100644
--- a/drivers/tty/serial/efm32-uart.c
+++ b/drivers/tty/serial/efm32-uart.c

@@ -27,6 +27,7 @@
 #define UARTn_FRAME		0x04
 #define UARTn_FRAME_DATABITS__MASK	0x000f
 #define UARTn_FRAME_DATABITS(n)		((n) - 3)
+#define UARTn_FRAME_PARITY__MASK	0x0300
 #define UARTn_FRAME_PARITY_NONE		0x0000
 #define UARTn_FRAME_PARITY_EVEN		0x0200
 #define UARTn_FRAME_PARITY_ODD		0x0300
@@ -572,12 +573,16 @@ static void efm32_uart_console_get_options(struct efm32_uart_port *efm_port,
 			16 * (4 + (clkdiv >> 6)));
 
 	frame = efm32_uart_read32(efm_port, UARTn_FRAME);
-	if (frame & UARTn_FRAME_PARITY_ODD)
+	switch (frame & UARTn_FRAME_PARITY__MASK) {
+	case UARTn_FRAME_PARITY_ODD:
 		*parity = 'o';
-	else if (frame & UARTn_FRAME_PARITY_EVEN)
+		break;
+	case UARTn_FRAME_PARITY_EVEN:
 		*parity = 'e';
-	else
+		break;
+	default:
 		*parity = 'n';
+	}
 
 	*bits = (frame & UARTn_FRAME_DATABITS__MASK) -
 			UARTn_FRAME_DATABITS(4) + 4;

diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 1578836..f190a84 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c

@@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver = {
 static void __exit ifx_spi_exit(void)
 {
 	/* unregister */
+	spi_unregister_driver(&ifx_spi_driver);
 	tty_unregister_driver(tty_drv);
 	put_tty_driver(tty_drv);
-	spi_unregister_driver(&ifx_spi_driver);
 	unregister_reboot_notifier(&ifx_modem_reboot_notifier_block);
 }
 

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 33509b4..bbefddd 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c

@@ -2184,7 +2184,9 @@ static int serial_imx_probe(struct platform_device *pdev)
 		 * and DCD (when they are outputs) or enables the respective
 		 * irqs. So set this bit early, i.e. before requesting irqs.
 		 */
-		writel(UFCR_DCEDTE, sport->port.membase + UFCR);
+		reg = readl(sport->port.membase + UFCR);
+		if (!(reg & UFCR_DCEDTE))
+			writel(reg | UFCR_DCEDTE, sport->port.membase + UFCR);
 
 		/*
 		 * Disable UCR3_RI and UCR3_DCD irqs. They are also not
@@ -2195,7 +2197,15 @@ static int serial_imx_probe(struct platform_device *pdev)
 		       sport->port.membase + UCR3);
 
 	} else {
-		writel(0, sport->port.membase + UFCR);
+		unsigned long ucr3 = UCR3_DSR;
+
+		reg = readl(sport->port.membase + UFCR);
+		if (reg & UFCR_DCEDTE)
+			writel(reg & ~UFCR_DCEDTE, sport->port.membase + UFCR);
+
+		if (!is_imx1_uart(sport))
+			ucr3 |= IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP;
+		writel(ucr3, sport->port.membase + UCR3);
 	}
 
 	clk_disable_unprepare(sport->clk_ipg);

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0f45b78..13bfd5d 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c

@@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	mutex_lock(&port->mutex);
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
-	if (device_may_wakeup(tty_dev)) {
+	if (tty_dev && device_may_wakeup(tty_dev)) {
 		if (!enable_irq_wake(uport->irq))
 			uport->irq_wake = 1;
 		put_device(tty_dev);
@@ -2782,7 +2782,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this port's parameters.
 	 */
-	tty_dev = tty_port_register_device_attr(port, drv->tty_driver,
+	tty_dev = tty_port_register_device_attr_serdev(port, drv->tty_driver,
 			uport->line, uport->dev, port, uport->tty_groups);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_set_wakeup_capable(tty_dev, 1);
@@ -2845,7 +2845,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	/*
 	 * Remove the devices from the tty layer
 	 */
-	tty_unregister_device(drv->tty_driver, uport->line);
+	tty_port_unregister_device(port, drv->tty_driver, uport->line);
 
 	tty = tty_port_tty_get(port);
 	if (tty) {

diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 31885f2..cc047de 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c

@@ -184,7 +184,7 @@ static void hdlcdev_exit(struct slgt_info *info);
 struct cond_wait {
 	struct cond_wait *next;
 	wait_queue_head_t q;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	unsigned int data;
 };
 static void init_cond_wait(struct cond_wait *w, unsigned int data);

diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 1d21a9c..6b13719 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c

@@ -129,19 +129,85 @@ struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp)
 {
+	tty_port_link_device(port, driver, index);
+	return tty_register_device_attr(driver, index, device, drvdata,
+			attr_grp);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+
+/**
+ * tty_port_register_device_attr_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ * @drvdata: driver data for the device
+ * @attr_grp: attribute group for the device
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp)
+{
 	struct device *dev;
 
 	tty_port_link_device(port, driver, index);
 
 	dev = serdev_tty_port_register(port, device, driver, index);
-	if (PTR_ERR(dev) != -ENODEV)
+	if (PTR_ERR(dev) != -ENODEV) {
 		/* Skip creating cdev if we registered a serdev device */
 		return dev;
+	}
 
 	return tty_register_device_attr(driver, index, device, drvdata,
 			attr_grp);
 }
-EXPORT_SYMBOL_GPL(tty_port_register_device_attr);
+EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev);
+
+/**
+ * tty_port_register_device_serdev - register tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ * @device: parent if exists, otherwise NULL
+ *
+ * Register a serdev or tty device depending on if the parent device has any
+ * defined serdev clients or not.
+ */
+struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device)
+{
+	return tty_port_register_device_attr_serdev(port, driver, index,
+			device, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(tty_port_register_device_serdev);
+
+/**
+ * tty_port_unregister_device - deregister a tty or serdev device
+ * @port: tty_port of the device
+ * @driver: tty_driver for this device
+ * @index: index of the tty
+ *
+ * If a tty or serdev device is registered with a call to
+ * tty_port_register_device_serdev() then this function must be called when
+ * the device is gone.
+ */
+void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index)
+{
+	int ret;
+
+	ret = serdev_tty_port_unregister(port);
+	if (ret == 0)
+		return;
+
+	tty_unregister_device(driver, index);
+}
+EXPORT_SYMBOL_GPL(tty_port_unregister_device);
 
 int tty_port_alloc_xmit_buf(struct tty_port *port)
 {
@@ -189,9 +255,6 @@ static void tty_port_destructor(struct kref *kref)
 	/* check if last port ref was dropped before tty release */
 	if (WARN_ON(port->itty))
 		return;
-
-	serdev_tty_port_unregister(port);
-
 	if (port->xmit_buf)
 		free_page((unsigned long)port->xmit_buf);
 	tty_port_destroy(port);

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 9e217b1..fe4fe24 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c

@@ -843,7 +843,10 @@ static ssize_t ci_role_show(struct device *dev, struct device_attribute *attr,
 {
 	struct ci_hdrc *ci = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%s\n", ci_role(ci)->name);
+	if (ci->role != CI_ROLE_END)
+		return sprintf(buf, "%s\n", ci_role(ci)->name);
+
+	return 0;
 }
 
 static ssize_t ci_role_store(struct device *dev,

diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 6d23eed..1c31e8a 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c

@@ -294,7 +294,8 @@ static int ci_role_show(struct seq_file *s, void *data)
 {
 	struct ci_hdrc *ci = s->private;
 
-	seq_printf(s, "%s\n", ci_role(ci)->name);
+	if (ci->role != CI_ROLE_END)
+		seq_printf(s, "%s\n", ci_role(ci)->name);
 
 	return 0;
 }

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 56d2d32..d68b125 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c

@@ -1993,6 +1993,7 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci)
 int ci_hdrc_gadget_init(struct ci_hdrc *ci)
 {
 	struct ci_role_driver *rdrv;
+	int ret;
 
 	if (!hw_read(ci, CAP_DCCPARAMS, DCCPARAMS_DC))
 		return -ENXIO;
@@ -2005,7 +2006,10 @@ int ci_hdrc_gadget_init(struct ci_hdrc *ci)
 	rdrv->stop	= udc_id_switch_for_host;
 	rdrv->irq	= udc_irq;
 	rdrv->name	= "gadget";
-	ci->roles[CI_ROLE_GADGET] = rdrv;
 
-	return udc_start(ci);
+	ret = udc_start(ci);
+	if (!ret)
+		ci->roles[CI_ROLE_GADGET] = rdrv;
+
+	return ret;
 }

diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index e77a4ed..9f4a018 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c

@@ -108,6 +108,8 @@ struct imx_usbmisc {
 	const struct usbmisc_ops *ops;
 };
 
+static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data);
+
 static int usbmisc_imx25_init(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
@@ -242,10 +244,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data)
 			val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN
 				| MX53_USB_UHx_CTRL_ULPI_INT_EN;
 			writel(val, reg);
-			/* Disable internal 60Mhz clock */
-			reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET;
-			val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF;
-			writel(val, reg);
+			if (is_imx53_usbmisc(data)) {
+				/* Disable internal 60Mhz clock */
+				reg = usbmisc->base +
+					MX53_USB_CLKONOFF_CTRL_OFFSET;
+				val = readl(reg) |
+					MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF;
+				writel(val, reg);
+			}
+
 		}
 		if (data->disable_oc) {
 			reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET;
@@ -267,10 +274,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data)
 			val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN
 				| MX53_USB_UHx_CTRL_ULPI_INT_EN;
 			writel(val, reg);
-			/* Disable internal 60Mhz clock */
-			reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET;
-			val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF;
-			writel(val, reg);
+
+			if (is_imx53_usbmisc(data)) {
+				/* Disable internal 60Mhz clock */
+				reg = usbmisc->base +
+					MX53_USB_CLKONOFF_CTRL_OFFSET;
+				val = readl(reg) |
+					MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF;
+				writel(val, reg);
+			}
 		}
 		if (data->disable_oc) {
 			reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET;
@@ -456,6 +468,10 @@ static const struct usbmisc_ops imx27_usbmisc_ops = {
 	.init = usbmisc_imx27_init,
 };
 
+static const struct usbmisc_ops imx51_usbmisc_ops = {
+	.init = usbmisc_imx53_init,
+};
+
 static const struct usbmisc_ops imx53_usbmisc_ops = {
 	.init = usbmisc_imx53_init,
 };
@@ -479,6 +495,13 @@ static const struct usbmisc_ops imx7d_usbmisc_ops = {
 	.set_wakeup = usbmisc_imx7d_set_wakeup,
 };
 
+static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data)
+{
+	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
+
+	return usbmisc->ops == &imx53_usbmisc_ops;
+}
+
 int imx_usbmisc_init(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc;
@@ -536,7 +559,7 @@ static const struct of_device_id usbmisc_imx_dt_ids[] = {
 	},
 	{
 		.compatible = "fsl,imx51-usbmisc",
-		.data = &imx53_usbmisc_ops,
+		.data = &imx51_usbmisc_ops,
 	},
 	{
 		.compatible = "fsl,imx53-usbmisc",

diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index 9cd8722..a3ffe97 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c

@@ -144,6 +144,8 @@ const struct of_device_id dwc2_of_match_table[] = {
 	{ .compatible = "lantiq,xrx200-usb", .data = dwc2_set_ltq_params },
 	{ .compatible = "snps,dwc2" },
 	{ .compatible = "samsung,s3c6400-hsotg" },
+	{ .compatible = "amlogic,meson8-usb",
+	  .data = dwc2_set_amlogic_params },
 	{ .compatible = "amlogic,meson8b-usb",
 	  .data = dwc2_set_amlogic_params },
 	{ .compatible = "amlogic,meson-gxbb-usb",

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 84a2ceb..fe85154 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c

@@ -42,7 +42,7 @@
 #define PCI_DEVICE_ID_INTEL_CNPLP		0x9dee
 #define PCI_DEVICE_ID_INTEL_CNPH		0xa36e
 
-#define PCI_INTEL_BXT_DSM_UUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+#define PCI_INTEL_BXT_DSM_GUID		"732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
 #define PCI_INTEL_BXT_FUNC_PMU_PWR	4
 #define PCI_INTEL_BXT_STATE_D0		0
 #define PCI_INTEL_BXT_STATE_D3		3
@@ -51,14 +51,14 @@
  * struct dwc3_pci - Driver private structure
  * @dwc3: child dwc3 platform_device
  * @pci: our link to PCI bus
- * @uuid: _DSM UUID
+ * @guid: _DSM GUID
  * @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM
  */
 struct dwc3_pci {
 	struct platform_device *dwc3;
 	struct pci_dev *pci;
 
-	u8 uuid[16];
+	guid_t guid;
 
 	unsigned int has_dsm_for_pm:1;
 };
@@ -120,7 +120,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
 
 		if (pdev->device == PCI_DEVICE_ID_INTEL_BXT ||
 				pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) {
-			acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid);
+			guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid);
 			dwc->has_dsm_for_pm = true;
 		}
 
@@ -292,7 +292,7 @@ static int dwc3_pci_dsm(struct dwc3_pci *dwc, int param)
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid,
 			1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4);
 	if (!obj) {
 		dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n");

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 49d685a..45b5540 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c

@@ -315,6 +315,9 @@ void usb_remove_function(struct usb_configuration *c, struct usb_function *f)
 	list_del(&f->list);
 	if (f->unbind)
 		f->unbind(c, f);
+
+	if (f->bind_deactivated)
+		usb_function_activate(f);
 }
 EXPORT_SYMBOL_GPL(usb_remove_function);
 
@@ -956,12 +959,8 @@ static void remove_config(struct usb_composite_dev *cdev,
 
 		f = list_first_entry(&config->functions,
 				struct usb_function, list);
-		list_del(&f->list);
-		if (f->unbind) {
-			DBG(cdev, "unbind function '%s'/%p\n", f->name, f);
-			f->unbind(config, f);
-			/* may free memory for "f" */
-		}
+
+		usb_remove_function(config, f);
 	}
 	list_del(&config->list);
 	if (config->unbind) {

diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 4c8aacc..74d57d6 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c

@@ -396,7 +396,11 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep)
 /* Caller must hold fsg->lock */
 static void wakeup_thread(struct fsg_common *common)
 {
-	smp_wmb();	/* ensure the write of bh->state is complete */
+	/*
+	 * Ensure the reading of thread_wakeup_needed
+	 * and the writing of bh->state are completed
+	 */
+	smp_mb();
 	/* Tell the main thread that something has happened */
 	common->thread_wakeup_needed = 1;
 	if (common->thread_task)
@@ -627,7 +631,12 @@ static int sleep_thread(struct fsg_common *common, bool can_freeze)
 	}
 	__set_current_state(TASK_RUNNING);
 	common->thread_wakeup_needed = 0;
-	smp_rmb();	/* ensure the latest bh->state is visible */
+
+	/*
+	 * Ensure the writing of thread_wakeup_needed
+	 * and the reading of bh->state are completed
+	 */
+	smp_mb();
 	return rc;
 }
 

diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index b4058f0..6a1ce6a 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c

@@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev)
 	dev->tx_queue_len	= 1;
 
 	dev->netdev_ops		= &pn_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->header_ops		= &phonet_header_ops;
 }
 

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index b9ca0a2..684900f 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c

@@ -1183,8 +1183,10 @@ dev_release (struct inode *inode, struct file *fd)
 
 	/* closing ep0 === shutdown all */
 
-	if (dev->gadget_registered)
+	if (dev->gadget_registered) {
 		usb_gadget_unregister_driver (&gadgetfs_driver);
+		dev->gadget_registered = false;
+	}
 
 	/* at this point "good" hardware has disconnected the
 	 * device from USB; the host won't see it any more.
@@ -1677,9 +1679,10 @@ static void
 gadgetfs_suspend (struct usb_gadget *gadget)
 {
 	struct dev_data		*dev = get_gadget_data (gadget);
+	unsigned long		flags;
 
 	INFO (dev, "suspended from state %d\n", dev->state);
-	spin_lock (&dev->lock);
+	spin_lock_irqsave(&dev->lock, flags);
 	switch (dev->state) {
 	case STATE_DEV_SETUP:		// VERY odd... host died??
 	case STATE_DEV_CONNECTED:
@@ -1690,7 +1693,7 @@ gadgetfs_suspend (struct usb_gadget *gadget)
 	default:
 		break;
 	}
-	spin_unlock (&dev->lock);
+	spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 static struct usb_gadget_driver gadgetfs_driver = {

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index ccabb51..7635fd7 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c

@@ -442,23 +442,16 @@ static void set_link_state(struct dummy_hcd *dum_hcd)
 		/* Report reset and disconnect events to the driver */
 		if (dum->driver && (disconnect || reset)) {
 			stop_activity(dum);
-			spin_unlock(&dum->lock);
 			if (reset)
 				usb_gadget_udc_reset(&dum->gadget, dum->driver);
 			else
 				dum->driver->disconnect(&dum->gadget);
-			spin_lock(&dum->lock);
 		}
 	} else if (dum_hcd->active != dum_hcd->old_active) {
-		if (dum_hcd->old_active && dum->driver->suspend) {
-			spin_unlock(&dum->lock);
+		if (dum_hcd->old_active && dum->driver->suspend)
 			dum->driver->suspend(&dum->gadget);
-			spin_lock(&dum->lock);
-		} else if (!dum_hcd->old_active &&  dum->driver->resume) {
-			spin_unlock(&dum->lock);
+		else if (!dum_hcd->old_active &&  dum->driver->resume)
 			dum->driver->resume(&dum->gadget);
-			spin_lock(&dum->lock);
-		}
 	}
 
 	dum_hcd->old_status = dum_hcd->port_status;
@@ -983,7 +976,9 @@ static int dummy_udc_stop(struct usb_gadget *g)
 	struct dummy_hcd	*dum_hcd = gadget_to_dummy_hcd(g);
 	struct dummy		*dum = dum_hcd->dum;
 
+	spin_lock_irq(&dum->lock);
 	dum->driver = NULL;
+	spin_unlock_irq(&dum->lock);
 
 	return 0;
 }

diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 6cf0785..f2cbd7f 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c

@@ -2470,11 +2470,8 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
 		nuke(&dev->ep[i]);
 
 	/* report disconnect; the driver is already quiesced */
-	if (driver) {
-		spin_unlock(&dev->lock);
+	if (driver)
 		driver->disconnect(&dev->gadget);
-		spin_lock(&dev->lock);
-	}
 
 	usb_reinit(dev);
 }
@@ -3348,8 +3345,6 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat)
 		BIT(PCI_RETRY_ABORT_INTERRUPT))
 
 static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
-__releases(dev->lock)
-__acquires(dev->lock)
 {
 	struct net2280_ep	*ep;
 	u32			tmp, num, mask, scratch;
@@ -3390,14 +3385,12 @@ __acquires(dev->lock)
 			if (disconnect || reset) {
 				stop_activity(dev, dev->driver);
 				ep0_start(dev);
-				spin_unlock(&dev->lock);
 				if (reset)
 					usb_gadget_udc_reset
 						(&dev->gadget, dev->driver);
 				else
 					(dev->driver->disconnect)
 						(&dev->gadget);
-				spin_lock(&dev->lock);
 				return;
 			}
 		}

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 5a2d845..cd4c885 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c

@@ -623,7 +623,6 @@ static void renesas_usb3_stop_controller(struct renesas_usb3 *usb3)
 {
 	usb3_disconnect(usb3);
 	usb3_write(usb3, 0, USB3_P0_INT_ENA);
-	usb3_write(usb3, 0, USB3_PN_INT_ENA);
 	usb3_write(usb3, 0, USB3_USB_OTG_INT_ENA);
 	usb3_write(usb3, 0, USB3_USB_INT_ENA_1);
 	usb3_write(usb3, 0, USB3_USB_INT_ENA_2);
@@ -1475,7 +1474,13 @@ static void usb3_request_done_pipen(struct renesas_usb3 *usb3,
 				    struct renesas_usb3_request *usb3_req,
 				    int status)
 {
-	usb3_pn_stop(usb3);
+	unsigned long flags;
+
+	spin_lock_irqsave(&usb3->lock, flags);
+	if (usb3_pn_change(usb3, usb3_ep->num))
+		usb3_pn_stop(usb3);
+	spin_unlock_irqrestore(&usb3->lock, flags);
+
 	usb3_disable_pipe_irq(usb3, usb3_ep->num);
 	usb3_request_done(usb3_ep, usb3_req, status);
 
@@ -1504,30 +1509,46 @@ static void usb3_irq_epc_pipen_bfrdy(struct renesas_usb3 *usb3, int num)
 {
 	struct renesas_usb3_ep *usb3_ep = usb3_get_ep(usb3, num);
 	struct renesas_usb3_request *usb3_req = usb3_get_request(usb3_ep);
+	bool done = false;
 
 	if (!usb3_req)
 		return;
 
+	spin_lock(&usb3->lock);
+	if (usb3_pn_change(usb3, num))
+		goto out;
+
 	if (usb3_ep->dir_in) {
 		/* Do not stop the IN pipe here to detect LSTTR interrupt */
 		if (!usb3_write_pipe(usb3_ep, usb3_req, USB3_PN_WRITE))
 			usb3_clear_bit(usb3, PN_INT_BFRDY, USB3_PN_INT_ENA);
 	} else {
 		if (!usb3_read_pipe(usb3_ep, usb3_req, USB3_PN_READ))
-			usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0);
+			done = true;
 	}
+
+out:
+	/* need to unlock because usb3_request_done_pipen() locks it */
+	spin_unlock(&usb3->lock);
+
+	if (done)
+		usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0);
 }
 
 static void usb3_irq_epc_pipen(struct renesas_usb3 *usb3, int num)
 {
 	u32 pn_int_sta;
 
-	if (usb3_pn_change(usb3, num) < 0)
+	spin_lock(&usb3->lock);
+	if (usb3_pn_change(usb3, num) < 0) {
+		spin_unlock(&usb3->lock);
 		return;
+	}
 
 	pn_int_sta = usb3_read(usb3, USB3_PN_INT_STA);
 	pn_int_sta &= usb3_read(usb3, USB3_PN_INT_ENA);
 	usb3_write(usb3, pn_int_sta, USB3_PN_INT_STA);
+	spin_unlock(&usb3->lock);
 	if (pn_int_sta & PN_INT_LSTTR)
 		usb3_irq_epc_pipen_lsttr(usb3, num);
 	if (pn_int_sta & PN_INT_BFRDY)
@@ -1660,6 +1681,7 @@ static int usb3_disable_pipe_n(struct renesas_usb3_ep *usb3_ep)
 
 	spin_lock_irqsave(&usb3->lock, flags);
 	if (!usb3_pn_change(usb3, usb3_ep->num)) {
+		usb3_write(usb3, 0, USB3_PN_INT_ENA);
 		usb3_write(usb3, 0, USB3_PN_RAMMAP);
 		usb3_clear_bit(usb3, PN_CON_EN, USB3_PN_CON);
 	}
@@ -1799,6 +1821,9 @@ static int renesas_usb3_start(struct usb_gadget *gadget,
 	/* hook up the driver */
 	usb3->driver = driver;
 
+	pm_runtime_enable(usb3_to_dev(usb3));
+	pm_runtime_get_sync(usb3_to_dev(usb3));
+
 	renesas_usb3_init_controller(usb3);
 
 	return 0;
@@ -1807,14 +1832,14 @@ static int renesas_usb3_start(struct usb_gadget *gadget,
 static int renesas_usb3_stop(struct usb_gadget *gadget)
 {
 	struct renesas_usb3 *usb3 = gadget_to_renesas_usb3(gadget);
-	unsigned long flags;
 
-	spin_lock_irqsave(&usb3->lock, flags);
 	usb3->softconnect = false;
 	usb3->gadget.speed = USB_SPEED_UNKNOWN;
 	usb3->driver = NULL;
 	renesas_usb3_stop_controller(usb3);
-	spin_unlock_irqrestore(&usb3->lock, flags);
+
+	pm_runtime_put(usb3_to_dev(usb3));
+	pm_runtime_disable(usb3_to_dev(usb3));
 
 	return 0;
 }
@@ -1891,9 +1916,6 @@ static int renesas_usb3_remove(struct platform_device *pdev)
 
 	device_remove_file(&pdev->dev, &dev_attr_role);
 
-	pm_runtime_put(&pdev->dev);
-	pm_runtime_disable(&pdev->dev);
-
 	usb_del_gadget_udc(&usb3->gadget);
 
 	__renesas_usb3_ep_free_request(usb3->ep0_req);
@@ -2099,9 +2121,6 @@ static int renesas_usb3_probe(struct platform_device *pdev)
 
 	usb3->workaround_for_vbus = priv->workaround_for_vbus;
 
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(&pdev->dev);
-
 	dev_info(&pdev->dev, "probed\n");
 
 	return 0;

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 1f1687e..fddf273 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c

@@ -2119,11 +2119,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 {
 	u32 temp, port_offset, port_count;
 	int i;
-	u8 major_revision;
+	u8 major_revision, minor_revision;
 	struct xhci_hub *rhub;
 
 	temp = readl(addr);
 	major_revision = XHCI_EXT_PORT_MAJOR(temp);
+	minor_revision = XHCI_EXT_PORT_MINOR(temp);
 
 	if (major_revision == 0x03) {
 		rhub = &xhci->usb3_rhub;
@@ -2137,7 +2138,9 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 		return;
 	}
 	rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp);
-	rhub->min_rev = XHCI_EXT_PORT_MINOR(temp);
+
+	if (rhub->min_rev < minor_revision)
+		rhub->min_rev = minor_revision;
 
 	/* Port offset and count in the third dword, see section 7.2 */
 	temp = readl(addr + 2);

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index fcf1f3f..783e668 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c

@@ -201,6 +201,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
 			pdev->device == 0x1042)
 		xhci->quirks |= XHCI_BROKEN_STREAMS;
+	if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+			pdev->device == 0x1142)
+		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
 	if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
 		xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
@@ -213,13 +216,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 #ifdef CONFIG_ACPI
 static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
 {
-	static const u8 intel_dsm_uuid[] = {
-		0xb7, 0x0c, 0x34, 0xac,	0x01, 0xe9, 0xbf, 0x45,
-		0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
-	};
+	static const guid_t intel_dsm_guid =
+		GUID_INIT(0xac340cb7, 0xe901, 0x45bf,
+			  0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23);
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1,
 				NULL);
 	ACPI_FREE(obj);
 }

diff --git a/drivers/usb/misc/ucsi.c b/drivers/usb/misc/ucsi.c
index 07397bd..81251aa 100644
--- a/drivers/usb/misc/ucsi.c
+++ b/drivers/usb/misc/ucsi.c

@@ -55,13 +55,13 @@ struct ucsi {
 
 static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl)
 {
-	uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4,
-			       0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
+	guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4,
+				0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
 	union acpi_object *obj;
 
 	ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL);
 	if (!obj) {
 		dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__);
 		return -EIO;

diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 9c7ee26..bc6a9be 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c

@@ -245,6 +245,11 @@ static int dsps_check_status(struct musb *musb, void *unused)
 		dsps_mod_timer_optional(glue);
 		break;
 	case OTG_STATE_A_WAIT_BCON:
+		/* keep VBUS on for host-only mode */
+		if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+			dsps_mod_timer_optional(glue);
+			break;
+		}
 		musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 		skip_session = 1;
 		/* fall */

diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c
index d5a7b21..c2ce252 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/typec_wcove.c

@@ -105,8 +105,8 @@ enum wcove_typec_role {
 	WCOVE_ROLE_DEVICE,
 };
 
-static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49,
-			      0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
+static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49,
+			       0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
 
 static int wcove_typec_func(struct wcove_typec *wcove,
 			    enum wcove_typec_func func, int param)
@@ -118,7 +118,7 @@ static int wcove_typec_func(struct wcove_typec *wcove,
 	tmp.type = ACPI_TYPE_INTEGER;
 	tmp.integer.value = param;
 
-	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func,
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func,
 				&argv4);
 	if (!obj) {
 		dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__);
@@ -314,7 +314,7 @@ static int wcove_typec_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) {
+	if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) {
 		dev_err(&pdev->dev, "Missing _DSM functions\n");
 		return -ENODEV;
 	}

diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 27c89cd..4797217 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c

@@ -43,7 +43,7 @@ static void virqfd_deactivate(struct virqfd *virqfd)
 	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
 }
 
-static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
 	unsigned long flags = (unsigned long)key;

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 042030e..e4613a3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c

@@ -165,7 +165,7 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 	add_wait_queue(wqh, &poll->wait);
 }
 
-static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
 			     void *key)
 {
 	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index f55671d..f720958 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h

@@ -31,7 +31,7 @@ struct vhost_work {
 struct vhost_poll {
 	poll_table                table;
 	wait_queue_head_t        *wqh;
-	wait_queue_t              wait;
+	wait_queue_entry_t              wait;
 	struct vhost_work	  work;
 	unsigned long		  mask;
 	struct vhost_dev	 *dev;

diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index 687ebb0..41d7979 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c

@@ -1048,7 +1048,7 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 
 	for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE;
 	     i++, block += DETAILED_TIMING_DESCRIPTION_SIZE)
-		if (PIXEL_CLOCK)
+		if (PIXEL_CLOCK != 0)
 			edt[num++] = block - edid;
 
 	/* Yikes, EDID data is totally useless */

diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index ec2e7e3..449fcea 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c

@@ -1646,8 +1646,9 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev_dbg(dev->gdev, "%s %s - serial #%s\n",
 		usbdev->manufacturer, usbdev->product, usbdev->serial);
 	dev_dbg(dev->gdev, "vid_%04x&pid_%04x&rev_%04x driver's ufx_data struct at %p\n",
-		usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-		usbdev->descriptor.bcdDevice, dev);
+		le16_to_cpu(usbdev->descriptor.idVendor),
+		le16_to_cpu(usbdev->descriptor.idProduct),
+		le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
 	dev_dbg(dev->gdev, "console enable=%d\n", console);
 	dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 6a3c353..05ef657 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c

@@ -1105,8 +1105,8 @@ static int dlfb_ops_blank(int blank_mode, struct fb_info *info)
 	char *bufptr;
 	struct urb *urb;
 
-	pr_info("/dev/fb%d FB_BLANK mode %d --> %d\n",
-		info->node, dev->blank_mode, blank_mode);
+	pr_debug("/dev/fb%d FB_BLANK mode %d --> %d\n",
+		 info->node, dev->blank_mode, blank_mode);
 
 	if ((dev->blank_mode == FB_BLANK_POWERDOWN) &&
 	    (blank_mode != FB_BLANK_POWERDOWN)) {
@@ -1613,8 +1613,9 @@ static int dlfb_usb_probe(struct usb_interface *interface,
 	pr_info("%s %s - serial #%s\n",
 		usbdev->manufacturer, usbdev->product, usbdev->serial);
 	pr_info("vid_%04x&pid_%04x&rev_%04x driver's dlfb_data struct at %p\n",
-		usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-		usbdev->descriptor.bcdDevice, dev);
+		le16_to_cpu(usbdev->descriptor.idVendor),
+		le16_to_cpu(usbdev->descriptor.idProduct),
+		le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
 	pr_info("console enable=%d\n", console);
 	pr_info("fb_defio enable=%d\n", fb_defio);
 	pr_info("shadow enable=%d\n", shadow);

diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index f9718f0..badee04 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c

@@ -1630,16 +1630,14 @@ static void viafb_init_proc(struct viafb_shared *shared)
 }
 static void viafb_remove_proc(struct viafb_shared *shared)
 {
-	struct proc_dir_entry *viafb_entry = shared->proc_entry,
-		*iga1_entry = shared->iga1_proc_entry,
-		*iga2_entry = shared->iga2_proc_entry;
+	struct proc_dir_entry *viafb_entry = shared->proc_entry;
 
 	if (!viafb_entry)
 		return;
 
-	remove_proc_entry("output_devices", iga2_entry);
+	remove_proc_entry("output_devices", shared->iga2_proc_entry);
 	remove_proc_entry("iga2", viafb_entry);
-	remove_proc_entry("output_devices", iga1_entry);
+	remove_proc_entry("output_devices", shared->iga1_proc_entry);
 	remove_proc_entry("iga1", viafb_entry);
 	remove_proc_entry("supported_output_devices", viafb_entry);
 

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 408c174..22caf80 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -663,6 +663,12 @@ static int virtballoon_restore(struct virtio_device *vdev)
 }
 #endif
 
+static int virtballoon_validate(struct virtio_device *vdev)
+{
+	__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
+	return 0;
+}
+
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
@@ -675,6 +681,7 @@ static struct virtio_driver virtio_balloon_driver = {
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
+	.validate =	virtballoon_validate,
 	.probe =	virtballoon_probe,
 	.remove =	virtballoon_remove,
 	.config_changed = virtballoon_changed,

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c1ec8ee..9e35032 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c

@@ -190,6 +190,7 @@ static void do_poweroff(void)
 {
 	switch (system_state) {
 	case SYSTEM_BOOTING:
+	case SYSTEM_SCHEDULING:
 		orderly_poweroff(true);
 		break;
 	case SYSTEM_RUNNING:

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 7a92a5e..feca75b 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c

@@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state)
 				st->global_error = 1;
 		}
 	}
-	st->va += PAGE_SIZE * nr;
-	st->index += nr;
+	st->va += XEN_PAGE_SIZE * nr;
+	st->index += nr / XEN_PFN_PER_PAGE;
 
 	return 0;
 }

diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 4ac2ca8..bf13d1e 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c

@@ -233,12 +233,12 @@ static int tmem_cleancache_init_fs(size_t pagesize)
 	return xen_tmem_new_pool(uuid_private, 0, pagesize);
 }
 
-static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize)
 {
 	struct tmem_pool_uuid shared_uuid;
 
-	shared_uuid.uuid_lo = *(u64 *)uuid;
-	shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
+	shared_uuid.uuid_lo = *(u64 *)&uuid->b[0];
+	shared_uuid.uuid_hi = *(u64 *)&uuid->b[8];
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3062cce..782d4d0 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c

@@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
 	struct sockaddr_rxrpc srx;
 	struct afs_server *server;
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
@@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
 	struct afs_call *call = container_of(work, struct afs_call, work);
-	struct uuid_v1 *r = call->request;
+	struct afs_uuid *r = call->request;
 
 	struct {
 		__be32	match;
@@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
  */
 static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
-	struct uuid_v1 *r;
+	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
@@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		}
 
 		_debug("unmarshall UUID");
-		call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
 
 		b = call->buffer;
 		r = call->request;
-		r->time_low			= b[0];
-		r->time_mid			= htons(ntohl(b[1]));
-		r->time_hi_and_version		= htons(ntohl(b[2]));
+		r->time_low			= ntohl(b[0]);
+		r->time_mid			= ntohl(b[1]);
+		r->time_hi_and_version		= ntohl(b[2]);
 		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
 		r->clock_seq_low		= ntohl(b[4]);
 

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 3936729..4e25566 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h

@@ -410,6 +410,15 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
+struct afs_uuid {
+	__be32		time_low;			/* low part of timestamp */
+	__be16		time_mid;			/* mid part of timestamp */
+	__be16		time_hi_and_version;		/* high part of timestamp and version  */
+	__u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+	__u8		clock_seq_low;			/* clock seq low */
+	__u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*****************************************************************************/
 /*
  * cache.c
@@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
 
 /*
  * misc.c

diff --git a/fs/afs/main.c b/fs/afs/main.c
index 51d7d17..9944770 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c

@@ -31,7 +31,7 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
 
 /*

diff --git a/fs/aio.c b/fs/aio.c
index f52d925..dcad3a6 100644
--- a/fs/aio.c
+++ b/fs/aio.c

@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+	if (unlikely(iocb->aio_reserved2)) {
 		pr_debug("EINVAL: reserve field set\n");
 		return -EINVAL;
 	}
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->common.ki_pos = iocb->aio_offset;
 	req->common.ki_complete = aio_complete;
 	req->common.ki_flags = iocb_flags(req->common.ki_filp);
+	req->common.ki_hint = file_write_hint(file);
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		req->common.ki_flags |= IOCB_EVENTFD;
 	}
 
+	ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+	if (unlikely(ret)) {
+		pr_debug("EINVAL: aio_rw_flags\n");
+		goto out_put_req;
+	}
+
+	if ((req->common.ki_flags & IOCB_NOWAIT) &&
+			!(req->common.ki_flags & IOCB_DIRECT)) {
+		ret = -EOPNOTSUPP;
+		goto out_put_req;
+	}
+
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
 	if (unlikely(ret)) {
 		pr_debug("EFAULT: aio_key\n");

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981..974f534 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h

@@ -83,7 +83,7 @@ struct autofs_info {
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8..dd9f1be 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c

@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
 	int status;
 
 	token = (autofs_wqt_t) param->fail.token;
-	status = param->fail.status ? param->fail.status : -ENOENT;
+	status = param->fail.status < 0 ? param->fail.status : -ENOENT;
 	return autofs4_wait_release(sbi, token, status);
 }
 

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 24a58bf..7071895 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c

@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
-		 (unsigned long) wq->wait_queue_token,
+		 (unsigned long) wq->wait_queue_entry_token,
 		 wq->name.len, wq->name.name, type);
 
 	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*mp);
 
-		mp->wait_queue_token = wq->wait_queue_token;
+		mp->wait_queue_entry_token = wq->wait_queue_entry_token;
 		mp->len = wq->name.len;
 		memcpy(mp->name, wq->name.name, wq->name.len);
 		mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*ep);
 
-		ep->wait_queue_token = wq->wait_queue_token;
+		ep->wait_queue_entry_token = wq->wait_queue_entry_token;
 		ep->len = wq->name.len;
 		memcpy(ep->name, wq->name.name, wq->name.len);
 		ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*packet);
 
-		packet->wait_queue_token = wq->wait_queue_token;
+		packet->wait_queue_entry_token = wq->wait_queue_entry_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
 		packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 			return -ENOMEM;
 		}
 
-		wq->wait_queue_token = autofs4_next_wait_queue;
+		wq->wait_queue_entry_token = autofs4_next_wait_queue;
 		if (++autofs4_next_wait_queue == 0)
 			autofs4_next_wait_queue = 1;
 		wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 		}
 
 		pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 
 		/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	} else {
 		wq->wait_ctr++;
 		pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
 			 wq->name.name, notify);
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 }
 
 
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
 {
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
 	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if (wq->wait_queue_token == wait_queue_token)
+		if (wq->wait_queue_entry_token == wait_queue_entry_token)
 			break;
 	}
 

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599d..a7df151 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	bio_init(&bio, vecs, nr_pages);
 	bio.bi_bdev = bdev;
 	bio.bi_iter.bi_sector = pos >> 9;
+	bio.bi_write_hint = iocb->ki_hint;
 	bio.bi_private = current;
 	bio.bi_end_io = blkdev_bio_end_io_simple;
 
@@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	if (vecs != inline_vecs)
 		kfree(vecs);
 
-	if (unlikely(bio.bi_error))
-		return bio.bi_error;
+	if (unlikely(bio.bi_status))
+		ret = blk_status_to_errno(bio.bi_status);
+
+	bio_uninit(&bio);
+
 	return ret;
 }
 
@@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
 	bool should_dirty = dio->should_dirty;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-		if (bio->bi_error && !dio->bio.bi_error)
-			dio->bio.bi_error = bio->bi_error;
+		if (bio->bi_status && !dio->bio.bi_status)
+			dio->bio.bi_status = bio->bi_status;
 	} else {
 		if (!dio->is_sync) {
 			struct kiocb *iocb = dio->iocb;
-			ssize_t ret = dio->bio.bi_error;
+			ssize_t ret;
 
-			if (likely(!ret)) {
+			if (likely(!dio->bio.bi_status)) {
 				ret = dio->size;
 				iocb->ki_pos += ret;
+			} else {
+				ret = blk_status_to_errno(dio->bio.bi_status);
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
@@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	loff_t pos = iocb->ki_pos;
 	blk_qc_t qc = BLK_QC_T_NONE;
-	int ret;
+	int ret = 0;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
@@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	for (;;) {
 		bio->bi_bdev = bdev;
 		bio->bi_iter.bi_sector = pos >> 9;
+		bio->bi_write_hint = iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = blkdev_bio_end_io;
 
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
-			bio->bi_error = ret;
+			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 			break;
 		}
@@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	}
 	__set_current_state(TASK_RUNNING);
 
-	ret = dio->bio.bi_error;
+	if (!ret)
+		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
 		ret = dio->size;
 
@@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
 	if (!blkdev_dio_pool)
 		return -ENOMEM;
 	return 0;

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4..d87ac27 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h

@@ -310,7 +310,8 @@ struct btrfs_dio_private {
 	 * The original bio may be split to several sub-bios, this is
 	 * done during endio of sub-bios
 	 */
-	int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+			blk_status_t);
 };
 
 /*

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e..4ded1c3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c

@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 	/* mutex is not held! This is not save if IO is not yet completed
 	 * on umount */
 	iodone_w_error = 0;
-	if (bp->bi_error)
+	if (bp->bi_status)
 		iodone_w_error = 1;
 
 	BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
 		if ((dev_state->state->print_mask &
 		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
 			pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
-			       bp->bi_error,
+			       bp->bi_status,
 			       btrfsic_get_block_type(dev_state->state, block),
 			       block->logical_bytenr, dev_state->name,
 			       block->dev_bytenr, block->mirror_num);

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b28..a2fad39 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c

@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	unsigned long index;
 	int ret;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	struct page *page;
 	unsigned long index;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		cb->errors = 1;
 
 	/* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
 					 cb->start,
 					 cb->start + cb->len - 1,
 					 NULL,
-					 bio->bi_error ? 0 : 1);
+					 bio->bi_status ? 0 : 1);
 	cb->compressed_pages[0]->mapping = NULL;
 
 	end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ static void end_compressed_bio_write(struct bio *bio)
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				 unsigned long len, u64 disk_start,
 				 unsigned long compressed_len,
 				 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	struct page *page;
 	u64 first_byte = disk_start;
 	struct block_device *bdev;
-	int ret;
+	blk_status_t ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
 	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
 	if (!cb)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_bios, 0);
 	cb->errors = 0;
 	cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
 	if (!bio) {
 		kfree(cb);
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 	}
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	/* create and submit bios for the compressed pages */
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			ret = io_tree->ops->merge_bio_hook(page, 0,
+			submit = io_tree->ops->merge_bio_hook(page, 0,
 							   PAGE_SIZE,
 							   bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(bio);
 
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 			ret = btrfs_map_bio(fs_info, bio, 0, 1);
 			if (ret) {
-				bio->bi_error = ret;
+				bio->bi_status = ret;
 				bio_endio(bio);
 			}
 
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 	ret = btrfs_map_bio(fs_info, bio, 0, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 
@@ -569,7 +569,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
  * After the compressed pages are read, we copy the bytes into the
  * bio we were passed and then call the bio end_io calls
  */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	int ret = -ENOMEM;
+	blk_status_t ret = BLK_STS_RESOURCE;
 	int faili = 0;
 	u32 *sums;
 
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
-		return -EIO;
+		return BLK_STS_IOERR;
 
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 							      __GFP_HIGHMEM);
 		if (!cb->compressed_pages[pg_index]) {
 			faili = pg_index - 1;
-			ret = -ENOMEM;
+			ret = BLK_STS_RESOURCE;
 			goto fail2;
 		}
 	}
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	refcount_set(&cb->pending_bios, 1);
 
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+		int submit = 0;
+
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			ret = tree->ops->merge_bio_hook(page, 0,
+			submit = tree->ops->merge_bio_hook(page, 0,
 							PAGE_SIZE,
 							comp_bio, 0);
-		else
-			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
 		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 			if (ret) {
-				comp_bio->bi_error = ret;
+				comp_bio->bi_status = ret;
 				bio_endio(comp_bio);
 			}
 
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
 	if (ret) {
-		comp_bio->bi_error = ret;
+		comp_bio->bi_status = ret;
 		bio_endio(comp_bio);
 	}
 

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43a..680d426 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h

@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long len, u64 disk_start,
 				  unsigned long compressed_len,
 				  struct page **compressed_pages,
 				  unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
 enum btrfs_compression_type {

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d..a0d0c79 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -2563,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
 						 unsigned num_items)
 {
-	return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
+	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
 }
 
 /*
@@ -2573,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
 static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
 						 unsigned num_items)
 {
-	return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
+	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
 }
 
 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
 			      u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);

diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 60a7506..c24d615 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c

@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
 
 	if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
 		btrfs_crit(fs_info, "invalid dir item name len: %u",
-		       (unsigned)btrfs_dir_data_len(leaf, dir_item));
+		       (unsigned)btrfs_dir_name_len(leaf, dir_item));
 		return 1;
 	}
 

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8685d67..6036d15 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
 	bio_end_io_t *end_io;
 	void *private;
 	struct btrfs_fs_info *info;
-	int error;
+	blk_status_t status;
 	enum btrfs_wq_endio_type metadata;
 	struct list_head list;
 	struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
 	 */
 	u64 bio_offset;
 	struct btrfs_work work;
-	int error;
+	blk_status_t status;
 };
 
 /*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_work_func_t func;
 
 	fs_info = end_io_wq->info;
-	end_io_wq->error = bio->bi_error;
+	end_io_wq->status = bio->bi_status;
 
 	if (bio_op(bio) == REQ_OP_WRITE) {
 		if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
 	btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata)
 {
 	struct btrfs_end_io_wq *end_io_wq;
 
 	end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
 	if (!end_io_wq)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	end_io_wq->private = bio->bi_private;
 	end_io_wq->end_io = bio->bi_end_io;
 	end_io_wq->info = info;
-	end_io_wq->error = 0;
+	end_io_wq->status = 0;
 	end_io_wq->bio = bio;
 	end_io_wq->metadata = metadata;
 
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
-	int ret;
+	blk_status_t ret;
 
 	async = container_of(work, struct  async_submit_bio, work);
 	ret = async->submit_bio_start(async->inode, async->bio,
 				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
-		async->error = ret;
+		async->status = ret;
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
 		wake_up(&fs_info->async_submit_wait);
 
 	/* If an error occurred we just want to clean up the bio and move on */
-	if (async->error) {
-		async->bio->bi_error = async->error;
+	if (async->status) {
+		async->bio->bi_status = async->status;
 		bio_endio(async->bio);
 		return;
 	}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
 	kfree(async);
 }
 
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags,
-			u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done)
 {
 	struct async_submit_bio *async;
 
 	async = kmalloc(sizeof(*async), GFP_NOFS);
 	if (!async)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	async->inode = inode;
 	async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	async->bio_flags = bio_flags;
 	async->bio_offset = bio_offset;
 
-	async->error = 0;
+	async->status = 0;
 
 	atomic_inc(&fs_info->nr_async_submits);
 
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 	return 0;
 }
 
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
 			break;
 	}
 
-	return ret;
+	return errno_to_blk_status(ret);
 }
 
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	/*
 	 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
-				 int mirror_num, unsigned long bio_flags,
-				 u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
 	 */
 	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
 	return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int async = check_async_write(bio_flags);
-	int ret;
+	blk_status_t ret;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
 		/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
 	return 0;
 
 out_w_error:
-	bio->bi_error = ret;
+	bio->bi_status = ret;
 	bio_endio(bio);
 	return ret;
 }
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
 	bio = end_io_wq->bio;
 
-	bio->bi_error = end_io_wq->error;
+	bio->bi_status = end_io_wq->status;
 	bio->bi_private = end_io_wq->private;
 	bio->bi_end_io = end_io_wq->end_io;
 	kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3467,10 +3466,12 @@ static int write_dev_supers(struct btrfs_device *device,
 		 * we fua the first super.  The others we allow
 		 * to go down lazy.
 		 */
-		if (i == 0)
-			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
-		else
+		if (i == 0) {
+			ret = btrfsic_submit_bh(REQ_OP_WRITE,
+						REQ_SYNC | REQ_FUA, bh);
+		} else {
 			ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+		}
 		if (ret)
 			errors++;
 	}
@@ -3495,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  * any device where the flush fails with eopnotsupp are flagged as not-barrier
  * capable
  */
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
 {
 	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio;
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		return 0;
@@ -3511,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
 		wait_for_completion(&device->flush_wait);
 
-		if (bio->bi_error) {
-			ret = bio->bi_error;
+		if (bio->bi_status) {
+			ret = bio->bi_status;
 			btrfs_dev_stat_inc_and_print(device,
 				BTRFS_DEV_STAT_FLUSH_ERRS);
 		}
@@ -3531,11 +3532,11 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 	device->flush_bio = NULL;
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
 	if (!bio)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	bio->bi_end_io = btrfs_end_empty_barrier;
 	bio->bi_bdev = device->bdev;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
 	init_completion(&device->flush_wait);
 	bio->bi_private = &device->flush_wait;
 	device->flush_bio = bio;
@@ -3556,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 	struct btrfs_device *dev;
 	int errors_send = 0;
 	int errors_wait = 0;
-	int ret;
+	blk_status_t ret;
 
 	/* send down all the barriers */
 	head = &info->fs_devices->devices;

diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb..c581927 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h

@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-			struct bio *bio, int mirror_num,
-			unsigned long bio_flags, u64 bio_offset,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+		struct inode *inode, struct bio *bio, int mirror_num,
+		unsigned long bio_flags, u64 bio_offset,
+		extent_submit_bio_hook_t *submit_bio_start,
+		extent_submit_bio_hook_t *submit_bio_done);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e390451..33d979e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -3993,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 				    info->space_info_kobj, "%s",
 				    alloc_name(found->flags));
 	if (ret) {
+		percpu_counter_destroy(&found->total_bytes_pinned);
 		kfree(found);
 		return ret;
 	}
@@ -4844,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	spin_unlock(&delayed_rsv->lock);
 
 commit:
-	trans = btrfs_join_transaction(fs_info->fs_root);
+	trans = btrfs_join_transaction(fs_info->extent_root);
 	if (IS_ERR(trans))
 		return -ENOSPC;
 
@@ -4862,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
 		       struct btrfs_space_info *space_info, u64 num_bytes,
 		       u64 orig_bytes, int state)
 {
-	struct btrfs_root *root = fs_info->fs_root;
+	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_trans_handle *trans;
 	int nr;
 	int ret = 0;
@@ -5062,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 	int flush_state = FLUSH_DELAYED_ITEMS_NR;
 
 	spin_lock(&space_info->lock);
-	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+	to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
 						      space_info);
 	if (!to_reclaim) {
 		spin_unlock(&space_info->lock);

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3ed..d1cd601 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c

@@ -174,7 +174,8 @@ int __init extent_io_init(void)
 		goto free_state_cache;
 
 	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-				     offsetof(struct btrfs_io_bio, bio));
+				     offsetof(struct btrfs_io_bio, bio),
+				     BIOSET_NEED_BVECS);
 	if (!btrfs_bioset)
 		goto free_buffer_cache;
 
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct bio *bio;
 	int read_mode = 0;
+	blk_status_t status;
 	int ret;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
 		read_mode, failrec->this_mirror, failrec->in_validation);
 
-	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+	status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
-	if (ret) {
+	if (status) {
 		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
+		ret = blk_status_to_errno(status);
 	}
 
 	return ret;
@@ -2458,7 +2461,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 	if (!uptodate) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		ret = ret < 0 ? ret : -EIO;
+		ret = err < 0 ? err : -EIO;
 		mapping_set_error(page->mapping, ret);
 	}
 }
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio)
 {
+	int error = blk_status_to_errno(bio->bi_status);
 	struct bio_vec *bvec;
 	u64 start;
 	u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
 		start = page_offset(page);
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-		end_extent_writepage(page, bio->bi_error, start, end);
+		end_extent_writepage(page, error, start, end);
 		end_page_writeback(page);
 	}
 
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
 static void end_bio_extent_readpage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	int uptodate = !bio->bi_error;
+	int uptodate = !bio->bi_status;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree;
 	u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		btrfs_debug(fs_info,
 			"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
-			(u64)bio->bi_iter.bi_sector, bio->bi_error,
+			(u64)bio->bi_iter.bi_sector, bio->bi_status,
 			io_bio->mirror_num);
 		tree = &BTRFS_I(inode)->io_tree;
 
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 				ret = bio_readpage_error(bio, offset, page,
 							 start, end, mirror);
 				if (ret == 0) {
-					uptodate = !bio->bi_error;
+					uptodate = !bio->bi_status;
 					offset += len;
 					continue;
 				}
@@ -2673,7 +2677,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		endio_readpage_release_extent(tree, extent_start, extent_len,
 					      uptodate);
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, bio->bi_error);
+		io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }
 
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				       unsigned long bio_flags)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct page *page = bvec->bv_page;
 	struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 		btrfsic_submit_bio(bio);
 
 	bio_put(bio);
-	return ret;
+	return blk_status_to_errno(ret);
 }
 
 static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
 	bio->bi_private = tree;
+	bio->bi_write_hint = page->mapping->host->i_write_hint;
 	bio_set_op_attrs(bio, op, op_flags);
 	if (wbc) {
 		wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 		BUG_ON(!eb);
 		done = atomic_dec_and_test(&eb->io_pages);
 
-		if (bio->bi_error ||
+		if (bio->bi_status ||
 		    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
 			ClearPageUptodate(page);
 			set_btree_ioerr(page);
@@ -4377,6 +4382,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
 	return NULL;
 }
 
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+	u64 offset;
+	u64 phys;
+	u64 len;
+	u32 flags;
+	bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+				struct fiemap_cache *cache,
+				u64 offset, u64 phys, u64 len, u32 flags)
+{
+	int ret = 0;
+
+	if (!cache->cached)
+		goto assign;
+
+	/*
+	 * Sanity check, extent_fiemap() should have ensured that new
+	 * fiemap extent won't overlap with cahced one.
+	 * Not recoverable.
+	 *
+	 * NOTE: Physical address can overlap, due to compression
+	 */
+	if (cache->offset + cache->len > offset) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	/*
+	 * Only merges fiemap extents if
+	 * 1) Their logical addresses are continuous
+	 *
+	 * 2) Their physical addresses are continuous
+	 *    So truly compressed (physical size smaller than logical size)
+	 *    extents won't get merged with each other
+	 *
+	 * 3) Share same flags except FIEMAP_EXTENT_LAST
+	 *    So regular extent won't get merged with prealloc extent
+	 */
+	if (cache->offset + cache->len  == offset &&
+	    cache->phys + cache->len == phys  &&
+	    (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+			(flags & ~FIEMAP_EXTENT_LAST)) {
+		cache->len += len;
+		cache->flags |= flags;
+		goto try_submit_last;
+	}
+
+	/* Not mergeable, need to submit cached one */
+	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+				      cache->len, cache->flags);
+	cache->cached = false;
+	if (ret)
+		return ret;
+assign:
+	cache->cached = true;
+	cache->offset = offset;
+	cache->phys = phys;
+	cache->len = len;
+	cache->flags = flags;
+try_submit_last:
+	if (cache->flags & FIEMAP_EXTENT_LAST) {
+		ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+				cache->phys, cache->len, cache->flags);
+		cache->cached = false;
+	}
+	return ret;
+}
+
+/*
+ * Sanity check for fiemap cache
+ *
+ * All fiemap cache should be submitted by emit_fiemap_extent()
+ * Iteration should be terminated either by last fiemap extent or
+ * fieinfo->fi_extents_max.
+ * So no cached fiemap should exist.
+ */
+static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
+			       struct fiemap_extent_info *fieinfo,
+			       struct fiemap_cache *cache)
+{
+	int ret;
+
+	if (!cache->cached)
+		return 0;
+
+	/* Small and recoverbale problem, only to info developer */
+#ifdef CONFIG_BTRFS_DEBUG
+	WARN_ON(1);
+#endif
+	btrfs_warn(fs_info,
+		   "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
+		   cache->offset, cache->phys, cache->len, cache->flags);
+	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+				      cache->len, cache->flags);
+	cache->cached = false;
+	if (ret > 0)
+		ret = 0;
+	return ret;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -4394,6 +4516,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	struct extent_state *cached_state = NULL;
 	struct btrfs_path *path;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct fiemap_cache cache = { 0 };
 	int end = 0;
 	u64 em_start = 0;
 	u64 em_len = 0;
@@ -4573,8 +4696,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			flags |= FIEMAP_EXTENT_LAST;
 			end = 1;
 		}
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					      em_len, flags);
+		ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
+					   em_len, flags);
 		if (ret) {
 			if (ret == 1)
 				ret = 0;
@@ -4582,6 +4705,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		}
 	}
 out_free:
+	if (!ret)
+		ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
 	free_extent_map(em);
 out:
 	btrfs_free_path(path);

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f..487ca02 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h

@@ -92,9 +92,9 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
-				       int mirror_num, unsigned long bio_flags,
-				       u64 bio_offset);
+typedef	blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset);
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31..5b1c709 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c

@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
 	kfree(bio->csum_allocated);
 }
 
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 				   u64 logical_offset, u32 *dst, int dio)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 					csum_size, GFP_NOFS);
 			if (!btrfs_bio->csum_allocated) {
 				btrfs_free_path(path);
-				return -ENOMEM;
+				return BLK_STS_RESOURCE;
 			}
 			btrfs_bio->csum = btrfs_bio->csum_allocated;
 			btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 	return 0;
 }
 
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
 
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
 {
 	return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
 }
@@ -433,7 +433,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 	return ret;
 }
 
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 	sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 		       GFP_NOFS);
 	if (!sums)
-		return -ENOMEM;
+		return BLK_STS_RESOURCE;
 
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096e..59e2dcc 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	ssize_t num_written = 0;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	ssize_t err;
-	loff_t pos;
-	size_t count;
+	loff_t pos = iocb->ki_pos;
+	size_t count = iov_iter_count(from);
 	loff_t oldsize;
 	int clean_page = 0;
 
-	inode_lock(inode);
+	if ((iocb->ki_flags & IOCB_NOWAIT) &&
+			(iocb->ki_flags & IOCB_DIRECT)) {
+		/* Don't sleep on inode rwsem */
+		if (!inode_trylock(inode))
+			return -EAGAIN;
+		/*
+		 * We will allocate space in case nodatacow is not set,
+		 * so bail
+		 */
+		if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+					      BTRFS_INODE_PREALLOC)) ||
+		    check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+			inode_unlock(inode);
+			return -EAGAIN;
+		}
+	} else
+		inode_lock(inode);
+
 	err = generic_write_checks(iocb, from);
 	if (err <= 0) {
 		inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	 */
 	update_time_for_write(inode);
 
-	pos = iocb->ki_pos;
-	count = iov_iter_count(from);
 	start_pos = round_down(pos, fs_info->sectorsize);
 	oldsize = i_size_read(inode);
 	if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+	filp->f_mode |= FMODE_AIO_NOWAIT;
+	return generic_file_open(inode, filp);
+}
+
 const struct file_operations btrfs_file_operations = {
 	.llseek		= btrfs_file_llseek,
 	.read_iter      = generic_file_read_iter,
 	.splice_read	= generic_file_splice_read,
 	.write_iter	= btrfs_file_write_iter,
 	.mmap		= btrfs_file_mmap,
-	.open		= generic_file_open,
+	.open		= btrfs_file_open,
 	.release	= btrfs_release_file,
 	.fsync		= btrfs_sync_file,
 	.fallocate	= btrfs_fallocate,

diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index a97fdc1..baacc18 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c

@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
 {
 	SHASH_DESC_ON_STACK(shash, tfm);
 	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 retval;
 	int err;
 
 	shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	retval = *ctx;
+	barrier_data(ctx);
+	return retval;
 }

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17cbe93..556c930 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -842,13 +842,12 @@ static noinline void submit_compressed_extents(struct inode *inode,
 				NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
 				PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
 				PAGE_SET_WRITEBACK);
-		ret = btrfs_submit_compressed_write(inode,
+		if (btrfs_submit_compressed_write(inode,
 				    async_extent->start,
 				    async_extent->ram_size,
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
-				    async_extent->nr_pages);
-		if (ret) {
+				    async_extent->nr_pages)) {
 			struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
-				    u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
-	int ret = 0;
+	blk_status_t ret = 0;
 
 	ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 	BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
-			  int mirror_num, unsigned long bio_flags,
-			  u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+		struct bio *bio, int mirror_num, unsigned long bio_flags,
+		u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int ret;
+	blk_status_t ret;
 
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
 	if (ret) {
-		bio->bi_error = ret;
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 			  int mirror_num, unsigned long bio_flags,
 			  u64 bio_offset)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-	int ret = 0;
+	blk_status_t ret = 0;
 	int skip_sum;
 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
@@ -1991,8 +1990,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 	ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
 
 out:
-	if (ret < 0) {
-		bio->bi_error = ret;
+	if (ret) {
+		bio->bi_status = ret;
 		bio_endio(bio);
 	}
 	return ret;
@@ -2952,7 +2951,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
 	ret = test_range_bit(io_tree, ordered_extent->file_offset,
 			ordered_extent->file_offset + ordered_extent->len - 1,
-			EXTENT_DEFRAG, 1, cached_state);
+			EXTENT_DEFRAG, 0, cached_state);
 	if (ret) {
 		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
 		if (0 && last_snapshot >= BTRFS_I(inode)->generation)
@@ -7483,8 +7482,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
 	int found = false;
 	void **pagep = NULL;
 	struct page *page = NULL;
-	int start_idx;
-	int end_idx;
+	unsigned long start_idx;
+	unsigned long end_idx;
 
 	start_idx = start >> PAGE_SHIFT;
 
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	struct bio_vec *bvec;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	int ret;
 	int i;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		goto end;
 
 	uptodate = 1;
@@ -8141,8 +8140,8 @@ static void btrfs_retry_endio(struct bio *bio)
 	bio_put(bio);
 }
 
-static int __btrfs_subio_endio_read(struct inode *inode,
-				    struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ static int __btrfs_subio_endio_read(struct inode *inode,
 				io_bio->mirror_num,
 				btrfs_retry_endio, &done);
 		if (ret) {
-			err = ret;
+			err = errno_to_blk_status(ret);
 			goto next;
 		}
 
@@ -8211,8 +8210,8 @@ static int __btrfs_subio_endio_read(struct inode *inode,
 	return err;
 }
 
-static int btrfs_subio_endio_read(struct inode *inode,
-				  struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+		struct btrfs_io_bio *io_bio, blk_status_t err)
 {
 	bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 	struct inode *inode = dip->inode;
 	struct bio *dio_bio;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
 		err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_bio->bi_status = bio->bi_status;
+	dio_end_io(dio_bio);
 
 	if (io_bio->end_io)
-		io_bio->end_io(io_bio, err);
+		io_bio->end_io(io_bio, blk_status_to_errno(err));
 	bio_put(bio);
 }
 
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	struct bio *dio_bio = dip->dio_bio;
 
 	__endio_write_update_ordered(dip->inode, dip->logical_offset,
-				     dip->bytes, !bio->bi_error);
+				     dip->bytes, !bio->bi_status);
 
 	kfree(dip);
 
-	dio_bio->bi_error = bio->bi_error;
-	dio_end_io(dio_bio, bio->bi_error);
+	dio_bio->bi_status = bio->bi_status;
+	dio_end_io(dio_bio);
 	bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
 				    struct bio *bio, int mirror_num,
 				    unsigned long bio_flags, u64 offset)
 {
-	int ret;
+	blk_status_t ret;
 	ret = btrfs_csum_one_bio(inode, bio, offset, 1);
 	BUG_ON(ret); /* -ENOMEM */
 	return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 
 	if (err)
 		btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
 	if (dip->errors) {
 		bio_io_error(dip->orig_bio);
 	} else {
-		dip->dio_bio->bi_error = 0;
+		dip->dio_bio->bi_status = 0;
 		bio_endio(dip->orig_bio);
 	}
 out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 	return bio;
 }
 
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 						 struct btrfs_dio_private *dip,
 						 struct bio *bio,
 						 u64 file_offset)
 {
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-	int ret;
+	blk_status_t ret;
 
 	/*
 	 * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
 	bool write = bio_op(bio) == REQ_OP_WRITE;
-	int ret;
+	blk_status_t ret;
 
 	if (async_submit)
 		async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
 	 * callbacks - they require an allocated dip and a clone of dio_bio.
 	 */
 	if (io_bio && dip) {
-		io_bio->bi_error = -EIO;
+		io_bio->bi_status = BLK_STS_IOERR;
 		bio_endio(io_bio);
 		/*
 		 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
 			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
 			      file_offset + dio_bio->bi_iter.bi_size - 1);
 
-		dio_bio->bi_error = -EIO;
+		dio_bio->bi_status = BLK_STS_IOERR;
 		/*
 		 * Releases and cleans up our dio_bio, no need to bio_put()
 		 * nor bio_endio()/bio_io_error() against dio_bio.
 		 */
-		dio_end_io(dio_bio, ret);
+		dio_end_io(dio_bio);
 	}
 	if (io_bio)
 		bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			dio_data.overwrite = 1;
 			inode_unlock(inode);
 			relock = true;
+		} else if (iocb->ki_flags & IOCB_NOWAIT) {
+			ret = -EAGAIN;
+			goto out;
 		}
 		ret = btrfs_delalloc_reserve_space(inode, offset, count);
 		if (ret)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb..f3d30d9e 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c

@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
 {
 	struct bio *cur = bio_list_get(&rbio->bio_list);
 	struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 	while (cur) {
 		next = cur->bi_next;
 		cur->bi_next = NULL;
-		cur->bi_error = err;
+		cur->bi_status = err;
 		bio_endio(cur);
 		cur = next;
 	}
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 static void raid_write_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
-	int err = bio->bi_error;
+	blk_status_t err = bio->bi_status;
 	int max_errors;
 
 	if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
 	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
 		     0 : rbio->bbio->max_errors;
 	if (atomic_read(&rbio->error) > max_errors)
-		err = -EIO;
+		err = BLK_STS_IOERR;
 
 	rbio_orig_end_io(rbio, err);
 }
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 		 * devices or if they are not contiguous
 		 */
 		if (last_end == disk_start && stripe->dev->bdev &&
-		    !last->bi_error &&
+		    !last->bi_status &&
 		    last->bi_bdev == stripe->dev->bdev) {
 			ret = bio_add_page(last, page, PAGE_SIZE, 0);
 			if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
 	 * we only read stripe pages off the disk, set them
 	 * up to date if there were no errors
 	 */
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb..ba5595d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c

@@ -95,7 +95,7 @@ struct scrub_bio {
 	struct scrub_ctx	*sctx;
 	struct btrfs_device	*dev;
 	struct bio		*bio;
-	int			err;
+	blk_status_t		status;
 	u64			logical;
 	u64			physical;
 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 
 struct scrub_bio_ret {
 	struct completion event;
-	int error;
+	blk_status_t status;
 };
 
 static void scrub_bio_wait_endio(struct bio *bio)
 {
 	struct scrub_bio_ret *ret = bio->bi_private;
 
-	ret->error = bio->bi_error;
+	ret->status = bio->bi_status;
 	complete(&ret->event);
 }
 
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	init_completion(&done.event);
-	done.error = 0;
+	done.status = 0;
 	bio->bi_iter.bi_sector = page->logical >> 9;
 	bio->bi_private = &done;
 	bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 		return ret;
 
 	wait_for_completion(&done.event);
-	if (done.error)
+	if (done.status)
 		return -EIO;
 
 	return 0;
@@ -1937,7 +1937,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		struct btrfs_dev_replace *dev_replace =
 			&sbio->sctx->fs_info->dev_replace;
 
@@ -2341,7 +2341,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
 		bio->bi_bdev = sbio->dev->bdev;
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio_set_op_attrs(bio, REQ_OP_READ, 0);
-		sbio->err = 0;
+		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
 		   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
 	struct scrub_block *sblock = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		sblock->no_io_error_seen = 0;
 
 	bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
 	struct scrub_bio *sbio = bio->bi_private;
 	struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-	sbio->err = bio->bi_error;
+	sbio->status = bio->bi_status;
 	sbio->bio = bio;
 
 	btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
 	int i;
 
 	BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
-	if (sbio->err) {
+	if (sbio->status) {
 		for (i = 0; i < sbio->page_count; i++) {
 			struct scrub_page *spage = sbio->pagev[i];
 
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
 	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
 	struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
 			  sparity->nsectors);
 

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67d..84a4959 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
 	struct btrfs_bio *bbio = bio->bi_private;
 	int is_orig_bio = 0;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		atomic_inc(&bbio->error);
-		if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+		if (bio->bi_status == BLK_STS_IOERR ||
+		    bio->bi_status == BLK_STS_TARGET) {
 			unsigned int stripe_index =
 				btrfs_io_bio(bio)->stripe_index;
 			struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
 		 * beyond the tolerance of the btrfs bio
 		 */
 		if (atomic_read(&bbio->error) > bbio->max_errors) {
-			bio->bi_error = -EIO;
+			bio->bi_status = BLK_STS_IOERR;
 		} else {
 			/*
 			 * this bio is actually up to date, we didn't
 			 * go over the max number of errors
 			 */
-			bio->bi_error = 0;
+			bio->bi_status = 0;
 		}
 
 		btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
 		bio->bi_iter.bi_sector = logical >> 9;
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 		btrfs_end_bbio(bbio, bio);
 	}
 }

diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58..5c2cba8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -49,7 +49,7 @@
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 struct writeback_control *wbc);
+			 enum rw_hint hint, struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+					inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1883,7 +1884,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+					inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
-	bh->b_end_io(bh, !bio->bi_error);
+	bh->b_end_io(bh, !bio->bi_status);
 	bio_put(bio);
 }
 
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
 }
 
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-			 struct writeback_control *wbc)
+			 enum rw_hint write_hint, struct writeback_control *wbc)
 {
 	struct bio *bio;
 
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
+	bio->bi_write_hint = write_hint;
 
 	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 int submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
-	return submit_bh_wbc(op, op_flags, bh, NULL);
+	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 

diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9bf90bc..bb3a02c 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h

@@ -18,7 +18,7 @@
 
 #include <linux/fscache-cache.h>
 #include <linux/timer.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
@@ -97,7 +97,7 @@ struct cachefiles_cache {
  * backing file read tracking
  */
 struct cachefiles_one_read {
-	wait_queue_t			monitor;	/* link into monitored waitqueue */
+	wait_queue_entry_t			monitor;	/* link into monitored waitqueue */
 	struct page			*back_page;	/* backing file page we're waiting for */
 	struct page			*netfs_page;	/* netfs page we're going to fill */
 	struct fscache_retrieval	*op;		/* retrieval op covering this */

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 41df8a2..3978b32 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c

@@ -204,7 +204,7 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
 		wait_queue_head_t *wq;
 
 		signed long timeout = 60 * HZ;
-		wait_queue_t wait;
+		wait_queue_entry_t wait;
 		bool requeue;
 
 		/* if the object we're waiting for is queued for processing,

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index afbdc41..18d7aa6 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c

@@ -21,7 +21,7 @@
  * - we use this to detect read completion of backing pages
  * - the caller holds the waitqueue lock
  */
-static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
 				  int sync, void *_key)
 {
 	struct cachefiles_one_read *monitor =
@@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 	}
 
 	/* remove from the waitqueue */
-	list_del(&wait->task_list);
+	list_del(&wait->entry);
 
 	/* move onto the action list and queue for FS-Cache thread pool */
 	ASSERT(monitor->op);

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 987044b..59cb307 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c

@@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	}
 
 	if (new_mode != old_mode) {
+		newattrs.ia_ctime = current_time(inode);
 		newattrs.ia_mode = new_mode;
 		newattrs.ia_valid = ATTR_MODE;
 		ret = __ceph_setattr(inode, &newattrs);

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e8f11fa..7df550c 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 		ceph_mdsc_put_request(req);
 		if (!inode)
 			return ERR_PTR(-ESTALE);
+		if (inode->i_nlink == 0) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
 	}
 
 	return d_obtain_alias(inode);

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3fdde0b..29308a8 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -1671,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode,
 	}
 
 	size = i_size_read(inode);
-	if (!(mode & FALLOC_FL_KEEP_SIZE))
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 		endoff = offset + length;
+		ret = inode_newsize_ok(inode, endoff);
+		if (ret)
+			goto unlock;
+	}
 
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index dcce79b..4de6cdd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -2022,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		    attr->ia_size > inode->i_size) {
 			i_size_write(inode, attr->ia_size);
 			inode->i_blocks = calc_inode_blocks(attr->ia_size);
-			inode->i_ctime = attr->ia_ctime;
 			ci->i_reported_size = attr->ia_size;
 			dirtied |= CEPH_CAP_FILE_EXCL;
 		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
@@ -2044,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
 		     only ? "ctime only" : "ignored");
-		inode->i_ctime = attr->ia_ctime;
 		if (only) {
 			/*
 			 * if kernel wants to dirty ctime but nothing else,
@@ -2067,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 	if (dirtied) {
 		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
 							   &prealloc_cf);
-		inode->i_ctime = current_time(inode);
+		inode->i_ctime = attr->ia_ctime;
 	}
 
 	release &= issued;
@@ -2085,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		req->r_inode_drop = release;
 		req->r_args.setattr.mask = cpu_to_le32(mask);
 		req->r_num_caps = 1;
+		req->r_stamp = attr->ia_ctime;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 	}
 	dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f38e56f..0c05df4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -1687,7 +1687,6 @@ struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 {
 	struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
-	struct timespec ts;
 
 	if (!req)
 		return ERR_PTR(-ENOMEM);
@@ -1706,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
-	ktime_get_real_ts(&ts);
-	req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+	req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
 
 	req->r_op = op;
 	req->r_direct_mode = mode;

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0fd081b..fcef706 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c

@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (to->type & ITER_IOVEC)
+	if (to->type == ITER_IOVEC)
 		ctx->should_dirty = true;
 
 	rc = setup_aio_ctx_iter(ctx, to, READ);

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4d1fcd7..a869363 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c

@@ -24,6 +24,7 @@
 #include <linux/pagemap.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/wait_bit.h>
 
 #include <asm/div64.h>
 #include "cifsfs.h"

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b085319..3b147dc 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c

@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 
 	if (!pages) {
 		pages = vmalloc(max_pages * sizeof(struct page *));
-		if (!bv) {
+		if (!pages) {
 			kvfree(bv);
 			return -ENOMEM;
 		}

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360..a723df3 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c

@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 		     struct cifs_fid *fid, __u16 search_flags,
 		     struct cifs_search_info *srch_inf)
 {
-	return CIFSFindFirst(xid, tcon, path, cifs_sb,
-			     &fid->netfid, search_flags, srch_inf, true);
+	int rc;
+
+	rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+			   &fid->netfid, search_flags, srch_inf, true);
+	if (rc)
+		cifs_dbg(FYI, "find first failed=%d\n", rc);
+	return rc;
 }
 
 static int

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c586918..7e48561 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c

@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
 	kfree(utf16_path);
 	if (rc) {
-		cifs_dbg(VFS, "open dir failed\n");
+		cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
 		return rc;
 	}
 
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
 				  fid->volatile_fid, 0, srch_inf);
 	if (rc) {
-		cifs_dbg(VFS, "query directory failed\n");
+		cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
 		SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 	}
 	return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	sg = init_sg(rqst, sign);
 	if (!sg) {
-		cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+		cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+		rc = -ENOMEM;
 		goto free_req;
 	}
 
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 	iv = kzalloc(iv_len, GFP_KERNEL);
 	if (!iv) {
 		cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+		rc = -ENOMEM;
 		goto free_sg;
 	}
 	iv[0] = 3;

diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3cb5c9e..de50e74 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c

@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
 	pcreatetime = (__u64 *)value;
 	*pcreatetime = CIFS_I(inode)->createtime;
 	return sizeof(__u64);
-
-	return rc;
 }
 
 

diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 8b2a994..a66f662 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c

@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
 }
 EXPORT_SYMBOL(config_item_get);
 
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+	if (item && kref_get_unless_zero(&item->ci_kref))
+		return item;
+	return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
 static void config_item_cleanup(struct config_item *item)
 {
 	struct config_item_type *t = item->ci_type;

diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index a6ab012..c8aabba 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c

@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
 	ret = -ENOMEM;
 	sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
 	if (sl) {
-		sl->sl_target = config_item_get(item);
 		spin_lock(&configfs_dirent_lock);
 		if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
 			spin_unlock(&configfs_dirent_lock);
-			config_item_put(item);
 			kfree(sl);
 			return -ENOENT;
 		}
+		sl->sl_target = config_item_get(item);
 		list_add(&sl->sl_list, &target_sd->s_links);
 		spin_unlock(&configfs_dirent_lock);
 		ret = configfs_create_link(sl, parent_item->ci_dentry,

diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84..6181e95 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c

@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
 			goto errout;
 		}
 		err = submit_bio_wait(bio);
-		if ((err == 0) && bio->bi_error)
+		if (err == 0 && bio->bi_status)
 			err = -EIO;
 		bio_put(bio);
 		if (err)

diff --git a/fs/dax.c b/fs/dax.c
index c22eaf1..33d05aa 100644
--- a/fs/dax.c
+++ b/fs/dax.c

@@ -84,7 +84,7 @@ struct exceptional_entry_key {
 };
 
 struct wait_exceptional_entry_queue {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct exceptional_entry_key key;
 };
 
@@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
 	return wait_table + hash;
 }
 
-static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
 				       int sync, void *keyp)
 {
 	struct exceptional_entry_key *key = keyp;
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 			if (ret < 0)
 				goto out;
 		}
+		start_index = indices[pvec.nr - 1] + 1;
 	}
 out:
 	put_dax(dax_dev);
@@ -1155,6 +1156,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
 	}
 
 	/*
+	 * It is possible, particularly with mixed reads & writes to private
+	 * mappings, that we have raced with a PMD fault that overlaps with
+	 * the PTE we need to set up.  If so just return and the fault will be
+	 * retried.
+	 */
+	if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+		vmf_ret = VM_FAULT_NOPAGE;
+		goto unlock_entry;
+	}
+
+	/*
 	 * Note that we don't bother to use iomap_apply here: DAX required
 	 * the file system block size to be equal the page size, which means
 	 * that we never have to deal with more than a single extent here.
@@ -1398,6 +1410,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 		goto fallback;
 
 	/*
+	 * It is possible, particularly with mixed reads & writes to private
+	 * mappings, that we have raced with a PTE fault that overlaps with
+	 * the PMD we need to set up.  If so just return and the fault will be
+	 * retried.
+	 */
+	if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
+			!pmd_devmap(*vmf->pmd)) {
+		result = 0;
+		goto unlock_entry;
+	}
+
+	/*
 	 * Note that we don't use iomap_apply here.  We aren't doing I/O, only
 	 * setting up a mapping, so really we're using iomap_begin() as a way
 	 * to look up our filesystem block.

diff --git a/fs/dcache.c b/fs/dcache.c
index cddf397..a9f995f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data)
 {
 	struct detach_data *data = _data;
 
-	if (!data->mountpoint && !data->select.found)
+	if (!data->mountpoint && list_empty(&data->select.dispose))
 		__d_drop(data->select.start);
 }
 
@@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry)
 
 		d_walk(dentry, &data, detach_and_collect, check_and_drop);
 
-		if (data.select.found)
+		if (!list_empty(&data.select.dispose))
 			shrink_dentry_list(&data.select.dispose);
+		else if (!data.mountpoint)
+			return;
 
 		if (data.mountpoint) {
 			detach_mounts(data.mountpoint);
 			dput(data.mountpoint);
 		}
-
-		if (!data.mountpoint && !data.select.found)
-			break;
-
 		cond_resched();
 	}
 }

diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea..08cf278 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c

@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
 	dio_complete(dio, 0, true);
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
 
 /*
  * Asynchronous IO callback. 
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
 /**
  * dio_end_io - handle the end io action for the given bio
  * @bio: The direct io bio thats being completed
- * @error: Error if there was one
  *
  * This is meant to be called by any filesystem that uses their own dio_submit_t
  * so that the DIO specific endio actions are dealt with after the filesystem
  * has done it's completion work.
  */
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
 {
 	struct dio *dio = bio->bi_private;
 
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 	else
 		bio->bi_end_io = dio_bio_end_io;
 
+	bio->bi_write_hint = dio->iocb->ki_hint;
+
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 {
 	struct bio_vec *bvec;
 	unsigned i;
-	int err;
+	blk_status_t err = bio->bi_status;
 
-	if (bio->bi_error)
-		dio->io_error = -EIO;
+	if (err) {
+		if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+			dio->io_error = -EAGAIN;
+		else
+			dio->io_error = -EIO;
+	}
 
 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
-		err = bio->bi_error;
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 				set_page_dirty_lock(page);
 			put_page(page);
 		}
-		err = bio->bi_error;
 		bio_put(bio);
 	}
 	return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 			bio = dio->bio_list;
 			dio->bio_list = bio->bi_private;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			ret2 = dio_bio_complete(dio, bio);
+			ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
 			if (ret == 0)
 				ret = ret2;
 		}
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	if (iov_iter_rw(iter) == WRITE) {
 		dio->op = REQ_OP_WRITE;
 		dio->op_flags = REQ_SYNC | REQ_IDLE;
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			dio->op_flags |= REQ_NOWAIT;
 	} else {
 		dio->op = REQ_OP_READ;
 	}

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 68b9fff..9736df2 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c

@@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
  * This is used to atomically remove a wait queue entry from the eventfd wait
  * queue head, and read/reset the counter value.
  */
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt)
 {
 	unsigned long flags;

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5420767..b1c8e23 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c

@@ -244,7 +244,7 @@ struct eppoll_entry {
 	 * Wait queue item that will be linked to the target file wait
 	 * queue head.
 	 */
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* The wait queue head that linked the "wait" wait queue item */
 	wait_queue_head_t *whead;
@@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p)
 	return !list_empty(p);
 }
 
-static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait);
 }
 
 /* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
 {
 	return container_of(p, struct eppoll_entry, wait)->base;
 }
@@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
  * mechanism. It is called by the stored file descriptors when they
  * have events to report.
  */
-static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int pwake = 0;
 	unsigned long flags;
@@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 		 * can't use __remove_wait_queue(). whead->lock is held by
 		 * the caller.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wait->entry);
 	}
 
 	spin_lock_irqsave(&ep->lock, flags);
@@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	int res = 0, eavail, timed_out = 0;
 	unsigned long flags;
 	u64 slack = 0;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	ktime_t expires, *to = NULL;
 
 	if (timeout > 0) {

diff --git a/fs/exec.c b/fs/exec.c
index 72934df..9041990 100644
--- a/fs/exec.c
+++ b/fs/exec.c

@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 	if (write) {
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+		unsigned long ptr_size;
 		struct rlimit *rlim;
 
+		/*
+		 * Since the stack will hold pointers to the strings, we
+		 * must account for them as well.
+		 *
+		 * The size calculation is the entire vma while each arg page is
+		 * built, so each time we get here it's calculating how far it
+		 * is currently (rather than each call being just the newly
+		 * added size from the arg page).  As a result, we need to
+		 * always add the entire size of the pointers, so that on the
+		 * last call to get_arg_page() we'll actually have the entire
+		 * correct size.
+		 */
+		ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+		if (ptr_size > ULONG_MAX - size)
+			goto fail;
+		size += ptr_size;
+
 		acct_arg_size(bprm, size / PAGE_SIZE);
 
 		/*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		rlim = current->signal->rlim;
-		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-			put_page(page);
-			return NULL;
-		}
+		if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+			goto fail;
 	}
 
 	return page;
+
+fail:
+	put_page(page);
+	return NULL;
 }
 
 static void put_arg_page(struct page *page)

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index fd38993..3ec0e46 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  */
 
+#include <linux/quotaops.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
@@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	handle_t *handle;
 	int error, retries = 0;
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR,
 				    ext4_jbd2_credits_xattr(inode));

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8e80461..3219154 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -2523,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh,
 			   int buf_size,
 			   struct inode *dir,
 			   struct ext4_filename *fname,
-			   const struct qstr *d_name,
 			   unsigned int offset,
 			   struct ext4_dir_entry_2 **res_dir);
 extern int ext4_generic_delete_entry(handle_t *handle,
@@ -3007,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
 				   int *has_inline_data);
 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 					struct ext4_filename *fname,
-					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data);
 extern int ext4_delete_inline_entry(handle_t *handle,

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a97dff..3e36508 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c

@@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_sb_info *sbi;
 	struct ext4_extent_header *eh;
 	struct ext4_map_blocks split_map;
-	struct ext4_extent zero_ex;
+	struct ext4_extent zero_ex1, zero_ex2;
 	struct ext4_extent *ex, *abut_ex;
 	ext4_lblk_t ee_block, eof_block;
 	unsigned int ee_len, depth, map_len = map->m_len;
 	int allocated = 0, max_zeroout = 0;
 	int err = 0;
-	int split_flag = 0;
+	int split_flag = EXT4_EXT_DATA_VALID2;
 
 	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
 		"block %llu, max_blocks %u\n", inode->i_ino,
@@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ex = path[depth].p_ext;
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
-	zero_ex.ee_len = 0;
+	zero_ex1.ee_len = 0;
+	zero_ex2.ee_len = 0;
 
 	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
 
@@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (ext4_encrypted_inode(inode))
 		max_zeroout = 0;
 
-	/* If extent is less than s_max_zeroout_kb, zeroout directly */
-	if (max_zeroout && (ee_len <= max_zeroout)) {
-		err = ext4_ext_zeroout(inode, ex);
-		if (err)
-			goto out;
-		zero_ex.ee_block = ex->ee_block;
-		zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
-		ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
-
-		err = ext4_ext_get_access(handle, inode, path + depth);
-		if (err)
-			goto out;
-		ext4_ext_mark_initialized(ex);
-		ext4_ext_try_to_merge(handle, inode, path, ex);
-		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-		goto out;
-	}
-
 	/*
-	 * four cases:
+	 * five cases:
 	 * 1. split the extent into three extents.
-	 * 2. split the extent into two extents, zeroout the first half.
-	 * 3. split the extent into two extents, zeroout the second half.
+	 * 2. split the extent into two extents, zeroout the head of the first
+	 *    extent.
+	 * 3. split the extent into two extents, zeroout the tail of the second
+	 *    extent.
 	 * 4. split the extent into two extents with out zeroout.
+	 * 5. no splitting needed, just possibly zeroout the head and / or the
+	 *    tail of the extent.
 	 */
 	split_map.m_lblk = map->m_lblk;
 	split_map.m_len = map->m_len;
 
-	if (max_zeroout && (allocated > map->m_len)) {
+	if (max_zeroout && (allocated > split_map.m_len)) {
 		if (allocated <= max_zeroout) {
-			/* case 3 */
-			zero_ex.ee_block =
-					 cpu_to_le32(map->m_lblk);
-			zero_ex.ee_len = cpu_to_le16(allocated);
-			ext4_ext_store_pblock(&zero_ex,
-				ext4_ext_pblock(ex) + map->m_lblk - ee_block);
-			err = ext4_ext_zeroout(inode, &zero_ex);
+			/* case 3 or 5 */
+			zero_ex1.ee_block =
+				 cpu_to_le32(split_map.m_lblk +
+					     split_map.m_len);
+			zero_ex1.ee_len =
+				cpu_to_le16(allocated - split_map.m_len);
+			ext4_ext_store_pblock(&zero_ex1,
+				ext4_ext_pblock(ex) + split_map.m_lblk +
+				split_map.m_len - ee_block);
+			err = ext4_ext_zeroout(inode, &zero_ex1);
 			if (err)
 				goto out;
-			split_map.m_lblk = map->m_lblk;
 			split_map.m_len = allocated;
-		} else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
-			/* case 2 */
-			if (map->m_lblk != ee_block) {
-				zero_ex.ee_block = ex->ee_block;
-				zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+		}
+		if (split_map.m_lblk - ee_block + split_map.m_len <
+								max_zeroout) {
+			/* case 2 or 5 */
+			if (split_map.m_lblk != ee_block) {
+				zero_ex2.ee_block = ex->ee_block;
+				zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
 							ee_block);
-				ext4_ext_store_pblock(&zero_ex,
+				ext4_ext_store_pblock(&zero_ex2,
 						      ext4_ext_pblock(ex));
-				err = ext4_ext_zeroout(inode, &zero_ex);
+				err = ext4_ext_zeroout(inode, &zero_ex2);
 				if (err)
 					goto out;
 			}
 
+			split_map.m_len += split_map.m_lblk - ee_block;
 			split_map.m_lblk = ee_block;
-			split_map.m_len = map->m_lblk - ee_block + map->m_len;
 			allocated = map->m_len;
 		}
 	}
@@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		err = 0;
 out:
 	/* If we have gotten a failure, don't zero out status tree */
-	if (!err)
-		err = ext4_zeroout_es(inode, &zero_ex);
+	if (!err) {
+		err = ext4_zeroout_es(inode, &zero_ex1);
+		if (!err)
+			err = ext4_zeroout_es(inode, &zero_ex2);
+	}
 	return err ? err : allocated;
 }
 
@@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
 	/* Zero out partial block at the edges of the range */
 	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 
 	if (file->f_flags & O_SYNC)
 		ext4_handle_sync(handle);
@@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		ext4_handle_sync(handle);
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
+	ext4_update_inode_fsync_trans(handle, inode, 1);
 
 out_stop:
 	ext4_journal_stop(handle);
@@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 
 out_stop:
 	ext4_journal_stop(handle);

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 831fd6be..58e2eea 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
-	inode_lock_shared(inode);
+	if (!inode_trylock_shared(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock_shared(inode);
+	}
 	/*
 	 * Recheck under inode lock - at this point we are sure it cannot
 	 * change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
-	inode_lock(inode);
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock(inode);
+	}
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
 		goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		return ext4_dax_write_iter(iocb, from);
 #endif
 
-	inode_lock(inode);
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		inode_lock(inode);
+	}
+
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
 		goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	iocb->private = &overwrite;
 	/* Check whether we do a DIO overwrite or not */
-	if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
-	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
-		overwrite = 1;
+	if (o_direct && !unaligned_aio) {
+		if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+			if (ext4_should_dioread_nolock(inode))
+				overwrite = 1;
+		} else if (iocb->ki_flags & IOCB_NOWAIT) {
+			ret = -EAGAIN;
+			goto out;
+		}
+	}
 
 	ret = __generic_file_write_iter(iocb, from);
 	inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 		if (ret < 0)
 			return ret;
 	}
+
+	/* Set the flags to support nowait AIO */
+	filp->f_mode |= FMODE_AIO_NOWAIT;
+
 	return dquot_file_open(inode, filp);
 }
 
@@ -474,57 +497,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 	endoff = (loff_t)end_blk << blkbits;
 
 	index = startoff >> PAGE_SHIFT;
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	do {
 		int i, num;
 		unsigned long nr_pages;
 
-		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  (pgoff_t)num);
-		if (nr_pages == 0) {
-			if (whence == SEEK_DATA)
-				break;
-
-			BUG_ON(whence != SEEK_HOLE);
-			/*
-			 * If this is the first time to go into the loop and
-			 * offset is not beyond the end offset, it will be a
-			 * hole at this offset
-			 */
-			if (lastoff == startoff || lastoff < endoff)
-				found = 1;
+		if (nr_pages == 0)
 			break;
-		}
-
-		/*
-		 * If this is the first time to go into the loop and
-		 * offset is smaller than the first page offset, it will be a
-		 * hole at this offset.
-		 */
-		if (lastoff == startoff && whence == SEEK_HOLE &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = 1;
-			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
 			struct buffer_head *bh, *head;
 
 			/*
-			 * If the current offset is not beyond the end of given
-			 * range, it will be a hole.
+			 * If current offset is smaller than the page offset,
+			 * there is a hole at this offset.
 			 */
-			if (lastoff < endoff && whence == SEEK_HOLE &&
-			    page->index > end) {
+			if (whence == SEEK_HOLE && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
 				found = 1;
 				*offset = lastoff;
 				goto out;
 			}
 
+			if (page->index > end)
+				goto out;
+
 			lock_page(page);
 
 			if (unlikely(page->mapping != inode->i_mapping)) {
@@ -564,20 +567,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 			unlock_page(page);
 		}
 
-		/*
-		 * The no. of pages is less than our desired, that would be a
-		 * hole in there.
-		 */
-		if (nr_pages < num && whence == SEEK_HOLE) {
-			found = 1;
-			*offset = lastoff;
+		/* The no. of pages is less than our desired, we are done. */
+		if (nr_pages < num)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	if (whence == SEEK_HOLE && lastoff < endoff) {
+		found = 1;
+		*offset = lastoff;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d5dea4c..8d141c0 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c

@@ -1627,7 +1627,6 @@ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent,
 
 struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 					struct ext4_filename *fname,
-					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
 					int *has_inline_data)
 {
@@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 						EXT4_INLINE_DOTDOT_SIZE;
 	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
 	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
-			      dir, fname, d_name, 0, res_dir);
+			      dir, fname, 0, res_dir);
 	if (ret == 1)
 		goto out_find;
 	if (ret < 0)
@@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
 	inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
 
 	ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
-			      dir, fname, d_name, 0, res_dir);
+			      dir, fname, 0, res_dir);
 	if (ret == 1)
 		goto out_find;
 

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1bd0bfa..5cf82d0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -2124,15 +2124,29 @@ static int ext4_writepage(struct page *page,
 static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 {
 	int len;
-	loff_t size = i_size_read(mpd->inode);
+	loff_t size;
 	int err;
 
 	BUG_ON(page->index != mpd->first_page);
+	clear_page_dirty_for_io(page);
+	/*
+	 * We have to be very careful here!  Nothing protects writeback path
+	 * against i_size changes and the page can be writeably mapped into
+	 * page tables. So an application can be growing i_size and writing
+	 * data through mmap while writeback runs. clear_page_dirty_for_io()
+	 * write-protects our page in page tables and the page cannot get
+	 * written to again until we release page lock. So only after
+	 * clear_page_dirty_for_io() we are safe to sample i_size for
+	 * ext4_bio_write_page() to zero-out tail of the written page. We rely
+	 * on the barrier provided by TestClearPageDirty in
+	 * clear_page_dirty_for_io() to make sure i_size is really sampled only
+	 * after page tables are updated.
+	 */
+	size = i_size_read(mpd->inode);
 	if (page->index == size >> PAGE_SHIFT)
 		len = size & ~PAGE_MASK;
 	else
 		len = PAGE_SIZE;
-	clear_page_dirty_for_io(page);
 	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
@@ -3629,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 		get_block_func = ext4_dio_get_block_unwritten_async;
 		dio_flags = DIO_LOCKING;
 	}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
-#endif
 	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
 				   get_block_func, ext4_end_io_dio, NULL,
 				   dio_flags);
@@ -3713,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
 	 */
 	inode_lock_shared(inode);
 	ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-					   iocb->ki_pos + count);
+					   iocb->ki_pos + count - 1);
 	if (ret)
 		goto out_unlock;
 	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
@@ -4207,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
+	if (ret >= 0)
+		ext4_update_inode_fsync_trans(handle, inode, 1);
 out_stop:
 	ext4_journal_stop(handle);
 out_dio:
@@ -5637,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
 	/* No extended attributes present */
 	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
 	    header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
-		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
-			new_extra_isize);
+		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+		       EXT4_I(inode)->i_extra_isize, 0,
+		       new_extra_isize - EXT4_I(inode)->i_extra_isize);
 		EXT4_I(inode)->i_extra_isize = new_extra_isize;
 		return 0;
 	}

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5083bce2..b7928cd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c

@@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 	err = ext4_mb_load_buddy(sb, group, &e4b);
 	if (err) {
-		ext4_error(sb, "Error loading buddy information for %u", group);
+		ext4_warning(sb, "Error %d loading buddy information for %u",
+			     err, group);
 		put_bh(bitmap_bh);
 		return 0;
 	}
@@ -4044,10 +4045,11 @@ void ext4_discard_preallocations(struct inode *inode)
 		BUG_ON(pa->pa_type != MB_INODE_PA);
 		group = ext4_get_group_number(sb, pa->pa_pstart);
 
-		err = ext4_mb_load_buddy(sb, group, &e4b);
+		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+					     GFP_NOFS|__GFP_NOFAIL);
 		if (err) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+			ext4_error(sb, "Error %d loading buddy information for %u",
+				   err, group);
 			continue;
 		}
 
@@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 	spin_unlock(&lg->lg_prealloc_lock);
 
 	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+		int err;
 
 		group = ext4_get_group_number(sb, pa->pa_pstart);
-		if (ext4_mb_load_buddy(sb, group, &e4b)) {
-			ext4_error(sb, "Error loading buddy information for %u",
-					group);
+		err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+					     GFP_NOFS|__GFP_NOFAIL);
+		if (err) {
+			ext4_error(sb, "Error %d loading buddy information for %u",
+				   err, group);
 			continue;
 		}
 		ext4_lock_group(sb, group);
@@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
 	ret = ext4_mb_load_buddy(sb, group, &e4b);
 	if (ret) {
-		ext4_error(sb, "Error in loading buddy "
-				"information for %u", group);
+		ext4_warning(sb, "Error %d loading buddy information for %u",
+			     ret, group);
 		return ret;
 	}
 	bitmap = e4b.bd_bitmap;

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b81f7d4..404256c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c

@@ -1155,12 +1155,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 static inline int search_dirblock(struct buffer_head *bh,
 				  struct inode *dir,
 				  struct ext4_filename *fname,
-				  const struct qstr *d_name,
 				  unsigned int offset,
 				  struct ext4_dir_entry_2 **res_dir)
 {
 	return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
-			       fname, d_name, offset, res_dir);
+			       fname, offset, res_dir);
 }
 
 /*
@@ -1262,7 +1261,6 @@ static inline bool ext4_match(const struct ext4_filename *fname,
  */
 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 		    struct inode *dir, struct ext4_filename *fname,
-		    const struct qstr *d_name,
 		    unsigned int offset, struct ext4_dir_entry_2 **res_dir)
 {
 	struct ext4_dir_entry_2 * de;
@@ -1355,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 
 	if (ext4_has_inline_data(dir)) {
 		int has_inline_data = 1;
-		ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
+		ret = ext4_find_inline_entry(dir, &fname, res_dir,
 					     &has_inline_data);
 		if (has_inline_data) {
 			if (inlined)
@@ -1447,7 +1445,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 			goto next;
 		}
 		set_buffer_verified(bh);
-		i = search_dirblock(bh, dir, &fname, d_name,
+		i = search_dirblock(bh, dir, &fname,
 			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
 		if (i == 1) {
 			EXT4_I(dir)->i_dir_start_lookup = block;
@@ -1488,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_frame frames[2], *frame;
-	const struct qstr *d_name = fname->usr_fname;
 	struct buffer_head *bh;
 	ext4_lblk_t block;
 	int retval;
@@ -1505,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		if (IS_ERR(bh))
 			goto errout;
 
-		retval = search_dirblock(bh, dir, fname, d_name,
+		retval = search_dirblock(bh, dir, fname,
 					 block << EXT4_BLOCK_SIZE_BITS(sb),
 					 res_dir);
 		if (retval == 1)
@@ -1530,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 
 	bh = NULL;
 errout:
-	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
+	dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
 success:
 	dx_release(frames);
 	return bh;

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138..c2fce44 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c

@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
 		}
 #endif
 
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			SetPageError(page);
 			mapping_set_error(page->mapping, -EIO);
 		}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
 				continue;
 			}
 			clear_buffer_async_write(bh);
-			if (bio->bi_error)
+			if (bio->bi_status)
 				buffer_io_error(bh);
 		} while ((bh = bh->b_this_page) != head);
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
 		      bdevname(bio->bi_bdev, b),
 		      (long long) bio->bi_iter.bi_sector,
 		      (unsigned) bio_sectors(bio),
-		      bio->bi_error)) {
+		      bio->bi_status)) {
 		ext4_finish_bio(bio);
 		bio_put(bio);
 		return;
 	}
 	bio->bi_end_io = NULL;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct inode *inode = io_end->inode;
 
 		ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
 			     "(offset %llu size %ld starting block %llu)",
-			     bio->bi_error, inode->i_ino,
+			     bio->bi_status, inode->i_ino,
 			     (unsigned long long) io_end->offset,
 			     (long) io_end->size,
 			     (unsigned long long)
 			     bi_sector >> (inode->i_blkbits - 9));
-		mapping_set_error(inode->i_mapping, bio->bi_error);
+		mapping_set_error(inode->i_mapping,
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	if (bio) {
 		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
 				  REQ_SYNC : 0;
+		io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
 		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 		submit_bio(io->io_bio);
 	}
@@ -396,6 +398,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
 		ret = io_submit_init_bio(io, bh);
 		if (ret)
 			return ret;
+		io->io_bio->bi_write_hint = inode->i_write_hint;
 	}
 	ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
 	if (ret != bh->b_size)

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829..40a5497 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c

@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
 	int i;
 
 	if (ext4_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b177da..9006cb5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -848,14 +848,9 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
 {
 	int type;
 
-	if (ext4_has_feature_quota(sb)) {
-		dquot_disable(sb, -1,
-			      DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
-	} else {
-		/* Use our quota_off function to clear inode flags etc. */
-		for (type = 0; type < EXT4_MAXQUOTAS; type++)
-			ext4_quota_off(sb, type);
-	}
+	/* Use our quota_off function to clear inode flags etc. */
+	for (type = 0; type < EXT4_MAXQUOTAS; type++)
+		ext4_quota_off(sb, type);
 }
 #else
 static inline void ext4_quota_off_umount(struct super_block *sb)
@@ -1179,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 		return res;
 	}
 
+	res = dquot_initialize(inode);
+	if (res)
+		return res;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_MISC,
 			ext4_jbd2_credits_xattr(inode));
@@ -3952,7 +3950,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_qcop = &ext4_qctl_operations;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
-	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
@@ -5485,7 +5483,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 		goto out;
 
 	err = dquot_quota_off(sb, type);
-	if (err)
+	if (err || ext4_has_feature_quota(sb))
 		goto out_put;
 
 	inode_lock(inode);
@@ -5505,6 +5503,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 out_unlock:
 	inode_unlock(inode);
 out_put:
+	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
 	iput(inode);
 	return err;
 out:

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8fb7ce1..5d3c253 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c

@@ -888,6 +888,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			else {
 				u32 ref;
 
+				WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 				/* The old block is released after updating
 				   the inode. */
 				error = dquot_alloc_block(inode,
@@ -954,6 +956,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			/* We need to allocate a new block */
 			ext4_fsblk_t goal, block;
 
+			WARN_ON_ONCE(dquot_initialize_needed(inode));
+
 			goal = ext4_group_first_block_no(sb,
 						EXT4_I(inode)->i_block_group);
 
@@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		return -EINVAL;
 	if (strlen(name) > 255)
 		return -ERANGE;
+
 	ext4_write_lock_xattr(inode, &no_expand);
 
 	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 	int credits = ext4_jbd2_credits_xattr(inode);
 
+	error = dquot_initialize(inode);
+	if (error)
+		return error;
 retry:
 	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
 	if (IS_ERR(handle)) {

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bd..36fe820 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
 		f2fs_show_injection_info(FAULT_IO);
-		bio->bi_error = -EIO;
+		bio->bi_status = BLK_STS_IOERR;
 	}
 #endif
 
 	if (f2fs_bio_encrypted(bio)) {
-		if (bio->bi_error) {
+		if (bio->bi_status) {
 			fscrypt_release_ctx(bio->bi_private);
 		} else {
 			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (!bio->bi_error) {
+		if (!bio->bi_status) {
 			if (!PageUptodate(page))
 				SetPageUptodate(page);
 		} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
 			unlock_page(page);
 			mempool_free(page, sbi->write_io_dummy);
 
-			if (unlikely(bio->bi_error))
+			if (unlikely(bio->bi_status))
 				f2fs_stop_checkpoint(sbi, true);
 			continue;
 		}
 
 		fscrypt_pullback_bio_page(&page, true);
 
-		if (unlikely(bio->bi_error)) {
+		if (unlikely(bio->bi_status)) {
 			mapping_set_error(page->mapping, -EIO);
 			f2fs_stop_checkpoint(sbi, true);
 		}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2185c7a..fd2e651 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h

@@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
 {
 	SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
 	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 retval;
 	int err;
 
 	shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	retval = *ctx;
+	barrier_data(ctx);
+	return retval;
 }
 
 static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9684585..ea9f455 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c

@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 {
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	dc->error = bio->bi_error;
+	dc->error = blk_status_to_errno(bio->bi_status);
 	dc->state = D_DONE;
 	complete(&dc->wait);
 	bio_put(bio);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83355ec..0b89b0b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c

@@ -1937,7 +1937,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
 		(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
-	memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+	memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
 
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267..ed051f8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c

@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 }
 #endif
 
+static bool rw_hint_valid(enum rw_hint hint)
+{
+	switch (hint) {
+	case RWF_WRITE_LIFE_NOT_SET:
+	case RWH_WRITE_LIFE_NONE:
+	case RWH_WRITE_LIFE_SHORT:
+	case RWH_WRITE_LIFE_MEDIUM:
+	case RWH_WRITE_LIFE_LONG:
+	case RWH_WRITE_LIFE_EXTREME:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	u64 *argp = (u64 __user *)arg;
+	enum rw_hint hint;
+	u64 h;
+
+	switch (cmd) {
+	case F_GET_FILE_RW_HINT:
+		h = file_write_hint(file);
+		if (copy_to_user(argp, &h, sizeof(*argp)))
+			return -EFAULT;
+		return 0;
+	case F_SET_FILE_RW_HINT:
+		if (copy_from_user(&h, argp, sizeof(h)))
+			return -EFAULT;
+		hint = (enum rw_hint) h;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		spin_lock(&file->f_lock);
+		file->f_write_hint = hint;
+		spin_unlock(&file->f_lock);
+		return 0;
+	case F_GET_RW_HINT:
+		h = inode->i_write_hint;
+		if (copy_to_user(argp, &h, sizeof(*argp)))
+			return -EFAULT;
+		return 0;
+	case F_SET_RW_HINT:
+		if (copy_from_user(&h, argp, sizeof(h)))
+			return -EFAULT;
+		hint = (enum rw_hint) h;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		inode_lock(inode);
+		inode->i_write_hint = hint;
+		inode_unlock(inode);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GET_SEALS:
 		err = shmem_fcntl(filp, cmd, arg);
 		break;
+	case F_GET_RW_HINT:
+	case F_SET_RW_HINT:
+	case F_GET_FILE_RW_HINT:
+	case F_SET_FILE_RW_HINT:
+		err = fcntl_rw_hint(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}

diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 611b540..e747b3d 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c

@@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 
 void pin_kill(struct fs_pin *p)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (!p) {
 		rcu_read_unlock();
@@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p)
 		rcu_read_unlock();
 		schedule();
 		rcu_read_lock();
-		if (likely(list_empty(&wait.task_list)))
+		if (likely(list_empty(&wait.entry)))
 			break;
 		/* OK, we know p couldn't have been freed yet */
 		spin_lock_irq(&p->wait.lock);

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d..aa3d445 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h

@@ -815,7 +815,6 @@ struct gfs2_sbd {
 	atomic_t sd_log_in_flight;
 	struct bio *sd_log_bio;
 	wait_queue_head_t sd_log_flush_wait;
-	int sd_log_error;
 
 	atomic_t sd_reserving_log;
 	wait_queue_head_t sd_reserving_log_wait;

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f865b96..d2955da 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c

@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	struct gfs2_log_header *lh;
 	unsigned int tail;
 	u32 hash;
-	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144..885d36e 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c

@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  */
 
 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
-				  int error)
+				  blk_status_t error)
 {
 	struct buffer_head *bh, *next;
 	struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct page *page;
 	int i;
 
-	if (bio->bi_error) {
-		sdp->sd_log_error = bio->bi_error;
-		fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
-	}
+	if (bio->bi_status)
+		fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		page = bvec->bv_page;
 		if (page_has_buffers(page))
-			gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 		else
 			mempool_free(page, gfs2_page_pool);
 	}

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc1..fabe1614f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c

@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
 		do {
 			struct buffer_head *next = bh->b_this_page;
 			len -= bh->b_size;
-			bh->b_end_io(bh, !bio->bi_error);
+			bh->b_end_io(bh, !bio->bi_status);
 			bh = next;
 		} while (bh && len);
 	}

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548..e76058d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c

@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (!bio->bi_error)
+	if (!bio->bi_status)
 		SetPageUptodate(page);
 	else
-		pr_warn("error %d reading superblock\n", bio->bi_error);
+		pr_warn("error %d reading superblock\n", bio->bi_status);
 	unlock_page(page);
 }
 
@@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 
 	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-	memcpy(s->s_uuid, str->sb_uuid, 16);
+	memcpy(&s->s_uuid, str->sb_uuid, 16);
 }
 
 /**

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7a51534..e77bc52 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c

@@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
 	return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
 }
 
-static int gfs2_uuid_valid(const u8 *uuid)
-{
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		if (uuid[i])
-			return 1;
-	}
-	return 0;
-}
-
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
+
 	buf[0] = '\0';
-	if (!gfs2_uuid_valid(uuid))
+	if (uuid_is_null(&s->s_uuid))
 		return 0;
-	return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+	return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
 }
 
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 {
 	struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
 	struct super_block *s = sdp->sd_vfs;
-	const u8 *uuid = s->s_uuid;
 
 	add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
 	add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
 	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
 		add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
-	if (gfs2_uuid_valid(uuid))
-		add_uevent_var(env, "UUID=%pUB", uuid);
+	if (!uuid_is_null(&s->s_uuid))
+		add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
 	return 0;
 }
 

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dde8613..d44f545 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)))
 			return addr;
 	}
 

diff --git a/fs/inode.c b/fs/inode.c
index db59147..ab3b9a7 100644
--- a/fs/inode.c
+++ b/fs/inode.c

@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	i_gid_write(inode, 0);
 	atomic_set(&inode->i_writecount, 0);
 	inode->i_size = 0;
+	inode->i_write_hint = WRITE_LIFE_NOT_SET;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
@@ -1891,11 +1892,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	wait_queue_head_t *wq;
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
-	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 	schedule();
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 	spin_lock(&inode_hash_lock);
 }
 
@@ -2038,11 +2039,11 @@ static void __inode_dio_wait(struct inode *inode)
 	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
 
 	do {
-		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&inode->i_dio_count))
 			schedule();
 	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wait);
+	finish_wait(wq, &q.wq_entry);
 }
 
 /**

diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892..fa6cd5b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c

@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
 
-	if (bio->bi_error)
-		iomap_dio_set_error(dio, bio->bi_error);
+	if (bio->bi_status)
+		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
 	if (atomic_dec_and_test(&dio->ref)) {
 		if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 		bio->bi_bdev = iomap->bdev;
 		bio->bi_iter.bi_sector =
 			iomap->blkno + ((pos - iomap->offset) >> 9);
+		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
 
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		flags |= IOMAP_WRITE;
 	}
 
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (filemap_range_has_page(mapping, start, end)) {
+			ret = -EAGAIN;
+			goto out_free_dio;
+		}
+		flags |= IOMAP_NOWAIT;
+	}
+
 	ret = filemap_write_and_wait_range(mapping, start, end);
 	if (ret)
 		goto out_free_dio;

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ebad342..7d5ef3b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c

@@ -2579,10 +2579,10 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
 		wait_queue_head_t *wq;
 		DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
 		wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		spin_unlock(&journal->j_list_lock);
 		schedule();
-		finish_wait(wq, &wait.wait);
+		finish_wait(wq, &wait.wq_entry);
 		goto restart;
 	}
 

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9ee4832..2d30a6d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c

@@ -680,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
 
 	rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
 	handle->h_buffer_credits = nblocks;
+	/*
+	 * Restore the original nofs context because the journal restart
+	 * is basically the same thing as journal stop and start.
+	 * start_this_handle will start a new nofs context.
+	 */
+	memalloc_nofs_restore(handle->saved_alloc_context);
 	ret = start_this_handle(journal, handle, gfp_mask);
 	return ret;
 }

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1f..a21f0e9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c

@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
 
 	bp->l_flag |= lbmDONE;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		bp->l_flag |= lbmERROR;
 
 		jfs_err("lbmIODone: I/O error in JFS log");

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1..ce93db3 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c

@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
 {
 	struct page *page = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
 		SetPageError(page);
 	}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
 
 	BUG_ON(!PagePrivate(page));
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
 		SetPageError(page);
 	}

diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f8..d6d1486 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c

@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+		page_endio(page, op_is_write(bio_op(bio)),
+				blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
@@ -614,6 +615,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 			goto confused;
 
 		wbc_init_bio(wbc, bio);
+		bio->bi_write_hint = inode->i_write_hint;
 	}
 
 	/*

diff --git a/fs/namespace.c b/fs/namespace.c
index 8bd3e4d..5a44384 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c

@@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 		return err;
 	}
 
+	put_mnt_ns(old_mnt_ns);
+
 	/* Update the pwd and root */
 	set_fs_pwd(fs, &root);
 	set_fs_root(fs, &root);

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d..d8863a8 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c

@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
 {
 	struct parallel_io *par = bio->bi_private;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
 	struct parallel_io *par = bio->bi_private;
 	struct nfs_pgio_header *header = par->data;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
 		pnfs_set_lo_fail(header->lseg);

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e..390ac9c 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c

@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
 	 * A single slot, so highest used slotid is either 0 or -1
 	 */
 	nfs4_free_slot(tbl, slot);
-	nfs4_slot_tbl_drain_complete(tbl);
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd77..2ac00bf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(nfs_link);
 
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
-	struct dentry *old_dentry = data->old_dentry;
-	struct dentry *new_dentry = data->new_dentry;
-	struct inode *old_inode = d_inode(old_dentry);
-	struct inode *new_inode = d_inode(new_dentry);
-
-	nfs_mark_for_revalidate(old_inode);
-
-	switch (task->tk_status) {
-	case 0:
-		if (new_inode != NULL)
-			nfs_drop_nlink(new_inode);
-		d_move(old_dentry, new_dentry);
-		nfs_set_verifier(new_dentry,
-					nfs_save_change_attribute(data->new_dir));
-		break;
-	case -ENOENT:
-		nfs_dentry_handle_enoent(old_dentry);
-	}
-}
-
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
-	struct dentry *dentry = NULL;
+	struct dentry *dentry = NULL, *rehash = NULL;
 	struct rpc_task *task;
 	int error = -EBUSY;
 
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * To prevent any new references to the target during the
 		 * rename, we unhash the dentry in advance.
 		 */
-		if (!d_unhashed(new_dentry))
+		if (!d_unhashed(new_dentry)) {
 			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
 
 		if (d_count(new_dentry) > 2) {
 			int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 
 			new_dentry = dentry;
+			rehash = NULL;
 			new_inode = NULL;
 		}
 	}
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (new_inode != NULL)
 		NFS_PROTO(new_inode)->return_delegation(new_inode);
 
-	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
-					nfs_complete_rename);
+	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
 		goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error == 0)
 		error = task->tk_status;
 	rpc_put_task(task);
+	nfs_mark_for_revalidate(old_inode);
 out:
+	if (rehash)
+		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
+	if (!error) {
+		if (new_inode != NULL)
+			nfs_drop_nlink(new_inode);
+		/*
+		 * The d_move() should be here instead of in an async RPC completion
+		 * handler because we need the proper locks to move the dentry.  If
+		 * we're interrupted by a signal, the async RPC completion handler
+		 * should mark the directories for revalidation.
+		 */
+		d_move(old_dentry, new_dentry);
+		nfs_set_verifier(new_dentry,
+					nfs_save_change_attribute(new_dir));
+	} else if (error == -ENOENT)
+		nfs_dentry_handle_enoent(old_dentry);
+
 	/* new dentry created? */
 	if (dentry)
 		dput(dentry);

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f5714ee..23542dc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c

@@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 			goto out_err_free;
 
 		/* fh */
+		rc = -EIO;
 		p = xdr_inline_decode(&stream, 4);
 		if (!p)
 			goto out_err_free;

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e9b4c33..8701d76 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -7,6 +7,7 @@
 #include <linux/security.h>
 #include <linux/crc32.h>
 #include <linux/nfs_page.h>
+#include <linux/wait_bit.h>
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
@@ -398,7 +399,6 @@ extern struct file_system_type nfs4_referral_fs_type;
 bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
 struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
 			struct nfs_subversion *);
-void nfs_initialise_sb(struct super_block *);
 int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
 int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
 struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
@@ -458,7 +458,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 
 /* super.c */
-void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
 void nfs_umount_begin(struct super_block *);
 int  nfs_statfs(struct dentry *, struct kstatfs *);
 int  nfs_show_options(struct seq_file *, struct dentry *);

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1a224a3..e5686be 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c

@@ -246,7 +246,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
 
 	devname = nfs_devname(dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname))
-		mnt = (struct vfsmount *)devname;
+		mnt = ERR_CAST(devname);
 	else
 		mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
 

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 929d09a..319a47d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c

@@ -177,7 +177,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		goto out;
 
-	if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+	if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
 				    &res->commit_res.verf->verifier)) {
 		status = -EAGAIN;
 		goto out;

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 692a7a8..66776f0 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c

@@ -582,7 +582,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
 			 */
 			nfs4_schedule_path_down_recovery(pos);
 		default:
-			spin_lock(&nn->nfs_client_lock);
 			goto out;
 		}
 

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a..98b0b66 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 
 	/* Except MODE, it seems harmless of setting twice. */
 	if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-		attrset[1] & FATTR4_WORD1_MODE)
+		(attrset[1] & FATTR4_WORD1_MODE ||
+		 attrset[2] & FATTR4_WORD2_MODE_UMASK))
 		sattr->ia_valid &= ~ATTR_MODE;
 
 	if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -6372,7 +6373,7 @@ struct nfs4_lock_waiter {
 };
 
 static int
-nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key)
+nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
 {
 	int ret;
 	struct cb_notify_lock_args *cbnl = key;
@@ -6415,7 +6416,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 					   .inode = state->inode,
 					   .owner = &owner,
 					   .notified = false };
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	/* Don't bother with waitqueue if we don't expect a callback */
 	if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
 	size_t max_pages = max_response_pages(server);
 
 	dprintk("--> %s\n", __func__);
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	nfs4_free_pages(lgp->args.layout.pages, max_pages);
 	pnfs_put_layout_hdr(NFS_I(inode)->layout);
 	put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
 	if (status == 0 && lgp->res.layoutp->len)
 		lseg = pnfs_layout_process(lgp);
-	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	if (status)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de03..cbf82b0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c

@@ -2134,6 +2134,8 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
 	put_rpccred(cred);
 	switch (status) {
 	case 0:
+	case -EINTR:
+	case -ERESTARTSYS:
 		break;
 	case -ETIMEDOUT:
 		if (clnt->cl_softrtry)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index adc6ec2..c383d09 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -2094,12 +2094,26 @@ pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
 
+/*
+ * Check for any intersection between the request and the pgio->pg_lseg,
+ * and if none, put this pgio->pg_lseg away.
+ */
+static void
+pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
+		pnfs_put_lseg(pgio->pg_lseg);
+		pgio->pg_lseg = NULL;
+	}
+}
+
 void
 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
 	u64 rd_size = req->wb_bytes;
 
 	pnfs_generic_pg_check_layout(pgio);
+	pnfs_generic_pg_check_range(pgio, req);
 	if (pgio->pg_lseg == NULL) {
 		if (pgio->pg_dreq == NULL)
 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2131,6 +2145,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			   struct nfs_page *req, u64 wb_size)
 {
 	pnfs_generic_pg_check_layout(pgio);
+	pnfs_generic_pg_check_range(pgio, req);
 	if (pgio->pg_lseg == NULL) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
@@ -2191,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
 		seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
 				     pgio->pg_lseg->pls_range.length);
 		req_start = req_offset(req);
-		WARN_ON_ONCE(req_start >= seg_end);
+
 		/* start of request is past the last byte of this segment */
-		if (req_start >= seg_end) {
-			/* reference the new lseg */
-			if (pgio->pg_ops->pg_cleanup)
-				pgio->pg_ops->pg_cleanup(pgio);
-			if (pgio->pg_ops->pg_init)
-				pgio->pg_ops->pg_init(pgio, req);
+		if (req_start >= seg_end)
 			return 0;
-		}
 
 		/* adjust 'size' iff there are fewer bytes left in the
 		 * segment than what nfs_generic_pg_test returned */

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d05b75..99731e3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h

@@ -593,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
 	return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2);
 }
 
+static inline bool
+pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req)
+{
+	u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length);
+	u64 req_last = req_offset(req) + req->wb_bytes;
+
+	return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last,
+				req_offset(req), req_last);
+}
+
 extern unsigned int layoutstats_timer;
 
 #ifdef NFS_DEBUG

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f3822a..eceb4ea 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c

@@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount);
 /*
  * Initialise the common bits of the superblock
  */
-inline void nfs_initialise_sb(struct super_block *sb)
+static void nfs_initialise_sb(struct super_block *sb)
 {
 	struct nfs_server *server = NFS_SB(sb);
 
@@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super);
 /*
  * Finish setting up a cloned NFS2/3/4 superblock
  */
-void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_clone_super(struct super_block *sb,
+			    struct nfs_mount_info *mount_info)
 {
 	const struct super_block *old_sb = mount_info->cloned->sb;
 	struct nfs_server *server = NFS_SB(sb);

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213a..c862c24 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c

@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 	u8 *buf, *d, type, assoc;
 	int error;
 
+	if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+		return -EINVAL;
+
 	buf = kzalloc(bufflen, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 		goto out_free_buf;
 	}
 	req = scsi_req(rq);
-	scsi_req_init(rq);
 
 	error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
 	if (error)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e71f11b..3bc08c3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c

@@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
 #endif
 
 static inline int
-uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
 {
 	int len;
 
@@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 			if (strcmp(buf, "fsloc") == 0)
 				err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
 			else if (strcmp(buf, "uuid") == 0)
-				err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+				err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
 			else if (strcmp(buf, "secinfo") == 0)
 				err = secinfo_parse(&mesg, buf, &exp);
 			else

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6..4523346 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c

@@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	if (!p)
 		return 0;
 	p = xdr_decode_hyper(p, &args->offset);
+
 	args->count = ntohl(*p++);
-
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	len = min(args->count, max_blocksize);
 
 	/* set up the kvec */
@@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->verf   = p; p += 2;
 	args->dircount = ~0;
 	args->count  = ntohl(*p++);
-
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->dircount = ntohl(*p++);
 	args->count    = ntohl(*p++);
 
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	len = args->count = min(args->count, max_blocksize);
 	while (len > 0) {
 		struct page *p = *(rqstp->rq_next_page++);
@@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
-	return 1;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
 int

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c453a19..dadb3bf 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			opdesc->op_get_currentstateid(cstate, &op->u);
 		op->status = opdesc->op_func(rqstp, cstate, &op->u);
 
+		/* Only from SEQUENCE */
+		if (cstate->status == nfserr_replay_cache) {
+			dprintk("%s NFS4.1 replay from cache\n", __func__);
+			status = op->status;
+			goto out;
+		}
 		if (!op->status) {
 			if (opdesc->op_set_currentstateid)
 				opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			if (need_wrongsec_check(rqstp))
 				op->status = check_nfsd_access(current_fh->fh_export, rqstp);
 		}
-
 encode_op:
-		/* Only from SEQUENCE */
-		if (cstate->status == nfserr_replay_cache) {
-			dprintk("%s NFS4.1 replay from cache\n", __func__);
-			status = op->status;
-			goto out;
-		}
 		if (op->status == nfserr_replay_me) {
 			op->replay = &cstate->replay_owner->so_replay;
 			nfsd4_encode_replay(&resp->xdr, op);

diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a..de07ff6 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c

@@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	len = args->count     = ntohl(*p++);
 	p++; /* totalcount - unused */
 
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
-
 	len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
 
 	/* set up somewhere to store response.
@@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 		v++;
 	}
 	args->vlen = v;
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 int
@@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	args->cookie = ntohl(*p++);
 	args->count  = ntohl(*p++);
 	args->count  = min_t(u32, args->count, PAGE_SIZE);
-	if (!xdr_argsize_check(rqstp, p))
-		return 0;
 	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return 1;
+	return xdr_argsize_check(rqstp, p);
 }
 
 /*

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2a..e73c86d 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c

@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 {
 	struct nilfs_segment_buffer *segbuf = bio->bi_private;
 
-	if (bio->bi_error)
+	if (bio->bi_status)
 		atomic_inc(&segbuf->sb_err);
 
 	bio_put(bio);

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index febed12..70ded52 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c

@@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
 }
 
 struct nilfs_segctor_wait_request {
-	wait_queue_t	wq;
+	wait_queue_entry_t	wq;
 	__u32		seq;
 	int		err;
 	atomic_t	done;
@@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
 	unsigned long flags;
 
 	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
-	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
-				 wq.task_list) {
+	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
 		if (!atomic_read(&wrq->done) &&
 		    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
 			wrq->err = err;

diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 3582583..4690cd7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c

@@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 					PTR_ERR(dent_inode));
 		kfree(name);
 		/* Return the error code. */
-		return (struct dentry *)dent_inode;
+		return ERR_CAST(dent_inode);
 	}
 	/* It is guaranteed that @name is no longer allocated at this point. */
 	if (MREF_ERR(mref) == -ENOENT) {

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332..ffe0039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c

@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
 {
 	struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
 
-	if (bio->bi_error) {
-		mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
-		wc->wc_error = bio->bi_error;
+	if (bio->bi_status) {
+		mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+		wc->wc_error = blk_status_to_errno(bio->bi_status);
 	}
 
 	o2hb_bio_wait_dec(wc, 1);

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937..4689940 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c

@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
 	struct ocfs2_lock_res *lockres;
 
 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
+	/* had_lock means that the currect process already takes the cluster
+	 * lock previously. If had_lock is 1, we have nothing to do here, and
+	 * it will get unlocked where we got the lock.
+	 */
 	if (!had_lock) {
 		ocfs2_remove_holder(lockres, oh);
 		ocfs2_inode_unlock(inode, ex);

diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc98..9f88188 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c

@@ -119,7 +119,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 
 	if (IS_ERR(inode)) {
 		mlog_errno(PTR_ERR(inode));
-		result = (void *)inode;
+		result = ERR_CAST(inode);
 		goto bail;
 	}
 

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ca1646f..83005f4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c

@@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
 	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
 	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
-	memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+	memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
 	       sizeof(di->id2.i_super.s_uuid));
 
 	osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d..f70c377 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c

@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
 			   void *buffer,
 			   size_t buffer_size)
 {
-	int ret;
+	int ret, had_lock;
 	struct buffer_head *di_bh = NULL;
+	struct ocfs2_lock_holder oh;
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 0);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+	if (had_lock < 0) {
+		mlog_errno(had_lock);
+		return had_lock;
 	}
 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
 				     name, buffer, buffer_size);
 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
-	ocfs2_inode_unlock(inode, 0);
+	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
 
 	brelse(di_bh);
 
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
 {
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_dinode *di;
-	int ret, credits, ref_meta = 0, ref_credits = 0;
+	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
 	struct ocfs2_refcount_tree *ref_tree = NULL;
+	struct ocfs2_lock_holder oh;
 
 	struct ocfs2_xattr_info xi = {
 		.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
 		return -ENOMEM;
 	}
 
-	ret = ocfs2_inode_lock(inode, &di_bh, 1);
-	if (ret < 0) {
+	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+	if (had_lock < 0) {
+		ret = had_lock;
 		mlog_errno(ret);
 		goto cleanup_nolock;
 	}
@@ -3670,7 +3673,7 @@ int ocfs2_xattr_set(struct inode *inode,
 		if (ret)
 			mlog_errno(ret);
 	}
-	ocfs2_inode_unlock(inode, 1);
+	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
 cleanup_nolock:
 	brelse(di_bh);
 	brelse(xbs.xattr_bh);

diff --git a/fs/open.c b/fs/open.c
index cd0c5be..3fe0c4a 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
+	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 83b5060..038d675 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c

@@ -46,8 +46,8 @@ static void run_down(struct slot_map *m)
 	spin_lock(&m->q.lock);
 	if (m->c != -1) {
 		for (;;) {
-			if (likely(list_empty(&wait.task_list)))
-				__add_wait_queue_tail(&m->q, &wait);
+			if (likely(list_empty(&wait.entry)))
+				__add_wait_queue_entry_tail(&m->q, &wait);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 			if (m->c == -1)
@@ -84,8 +84,8 @@ static int wait_for_free(struct slot_map *m)
 
 	do {
 		long n = left, t;
-		if (likely(list_empty(&wait.task_list)))
-			__add_wait_queue_tail_exclusive(&m->q, &wait);
+		if (likely(list_empty(&wait.entry)))
+			__add_wait_queue_entry_tail_exclusive(&m->q, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (m->c > 0)
@@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m)
 			left = -EINTR;
 	} while (left > 0);
 
-	if (!list_empty(&wait.task_list))
-		list_del(&wait.task_list);
+	if (!list_empty(&wait.entry))
+		list_del(&wait.entry);
 	else if (left <= 0 && waitqueue_active(&m->q))
 		__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
 	__set_current_state(TASK_RUNNING);

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac51..c0c9683 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig

@@ -1,5 +1,6 @@
 config OVERLAY_FS
 	tristate "Overlay filesystem support"
+	select EXPORTFS
 	help
 	  An overlay filesystem combines two filesystems - an 'upper' filesystem
 	  and a 'lower' filesystem.  When a name exists in both filesystems, the

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9008ab9..e5869f9 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c

@@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
@@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 			  struct dentry *upper)
 {
 	struct super_block *sb = lower->d_sb;
-	uuid_be *uuid = (uuid_be *) &sb->s_uuid;
 	const struct ovl_fh *fh = NULL;
 	int err;
 
@@ -294,13 +293,17 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * up and a pure upper inode.
 	 */
 	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-	    uuid_be_cmp(*uuid, NULL_UUID_BE)) {
-		fh = ovl_encode_fh(lower, uuid);
+	    !uuid_is_null(&sb->s_uuid)) {
+		fh = ovl_encode_fh(lower, &sb->s_uuid);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
 
-	err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 */
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+				 fh ? fh->len : 0, 0);
 	kfree(fh);
 
 	return err;
@@ -326,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		.link = link
 	};
 
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
-	err = PTR_ERR(upper);
-	if (IS_ERR(upper))
-		goto out;
-
 	err = security_inode_copy_up(dentry, &new_creds);
 	if (err < 0)
-		goto out1;
+		goto out;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
@@ -342,13 +339,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (tmpfile)
 		temp = ovl_do_tmpfile(upperdir, stat->mode);
 	else
-		temp = ovl_lookup_temp(workdir, dentry);
-	err = PTR_ERR(temp);
-	if (IS_ERR(temp))
-		goto out1;
-
+		temp = ovl_lookup_temp(workdir);
 	err = 0;
-	if (!tmpfile)
+	if (IS_ERR(temp)) {
+		err = PTR_ERR(temp);
+		temp = NULL;
+	}
+
+	if (!err && !tmpfile)
 		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
@@ -357,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	}
 
 	if (err)
-		goto out2;
+		goto out;
 
 	if (S_ISREG(stat->mode)) {
 		struct path upperpath;
@@ -393,10 +391,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	/*
 	 * Store identifier of lower inode in upper inode xattr to
 	 * allow lookup of the copy up origin inode.
+	 *
+	 * Don't set origin when we are breaking the association with a lower
+	 * hard link.
 	 */
-	err = ovl_set_origin(dentry, lowerpath->dentry, temp);
-	if (err)
+	if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+		err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+		if (err)
+			goto out_cleanup;
+	}
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	if (IS_ERR(upper)) {
+		err = PTR_ERR(upper);
+		upper = NULL;
 		goto out_cleanup;
+	}
 
 	if (tmpfile)
 		err = ovl_do_link(temp, udir, upper, true);
@@ -411,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 	/* Restore timestamps on parent (best effort) */
 	ovl_set_timestamps(upperdir, pstat);
-out2:
-	dput(temp);
-out1:
-	dput(upper);
 out:
+	dput(temp);
+	dput(upper);
 	return err;
 
 out_cleanup:
 	if (!tmpfile)
 		ovl_cleanup(wdir, temp);
-	goto out2;
+	goto out;
 }
 
 /*
@@ -454,6 +463,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	ovl_path_upper(parent, &parentpath);
 	upperdir = parentpath.dentry;
 
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(parent, upperdir);
+	if (err)
+		return err;
+
 	err = vfs_getattr(&parentpath, &pstat,
 			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 723b98b..a63a716 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c

@@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 	}
 }
 
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+struct dentry *ovl_lookup_temp(struct dentry *workdir)
 {
 	struct dentry *temp;
 	char name[20];
@@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
 	struct dentry *whiteout;
 	struct inode *wdir = workdir->d_inode;
 
-	whiteout = ovl_lookup_temp(workdir, dentry);
+	whiteout = ovl_lookup_temp(workdir);
 	if (IS_ERR(whiteout))
 		return whiteout;
 
@@ -127,17 +127,28 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 	return err;
 }
 
-static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+			       int xerr)
 {
 	int err;
 
-	err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
+	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
 	if (!err)
 		ovl_dentry_set_opaque(dentry);
 
 	return err;
 }
 
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+{
+	/*
+	 * Fail with -EIO when trying to create opaque dir and upper doesn't
+	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+	 * return a specific error for noxattr case.
+	 */
+	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
+}
+
 /* Common operations required to be done after creation of file on upper */
 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 			    struct dentry *newdentry, bool hardlink)
@@ -162,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry)
 	return OVL_TYPE_MERGE(ovl_path_type(dentry));
 }
 
+static bool ovl_type_origin(struct dentry *dentry)
+{
+	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 			    struct cattr *attr, struct dentry *hardlink)
 {
@@ -250,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
 	if (upper->d_parent->d_inode != udir)
 		goto out_unlock;
 
-	opaquedir = ovl_lookup_temp(workdir, dentry);
+	opaquedir = ovl_lookup_temp(workdir);
 	err = PTR_ERR(opaquedir);
 	if (IS_ERR(opaquedir))
 		goto out_unlock;
@@ -382,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 	if (err)
 		goto out;
 
-	newdentry = ovl_lookup_temp(workdir, dentry);
+	newdentry = ovl_lookup_temp(workdir);
 	err = PTR_ERR(newdentry);
 	if (IS_ERR(newdentry))
 		goto out_unlock;
@@ -846,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
 	if (IS_ERR(redirect))
 		return PTR_ERR(redirect);
 
-	err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
-			      redirect, strlen(redirect), 0);
+	err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+				 OVL_XATTR_REDIRECT,
+				 redirect, strlen(redirect), -EXDEV);
 	if (!err) {
 		spin_lock(&dentry->d_lock);
 		ovl_dentry_set_redirect(dentry, redirect);
 		spin_unlock(&dentry->d_lock);
 	} else {
 		kfree(redirect);
-		if (err == -EOPNOTSUPP)
-			ovl_clear_redirect_dir(dentry->d_sb);
-		else
-			pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+		pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
 		/* Fall back to userspace copy-up */
 		err = -EXDEV;
 	}
@@ -943,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	old_upperdir = ovl_dentry_upper(old->d_parent);
 	new_upperdir = ovl_dentry_upper(new->d_parent);
 
+	if (!samedir) {
+		/*
+		 * When moving a merge dir or non-dir with copy up origin into
+		 * a new parent, we are marking the new parent dir "impure".
+		 * When ovl_iterate() iterates an "impure" upper dir, it will
+		 * lookup the origin inodes of the entries to fill d_ino.
+		 */
+		if (ovl_type_origin(old)) {
+			err = ovl_set_impure(new->d_parent, new_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+		if (!overwrite && ovl_type_origin(new)) {
+			err = ovl_set_impure(old->d_parent, old_upperdir);
+			if (err)
+				goto out_revert_creds;
+		}
+	}
+
 	trap = lock_rename(new_upperdir, old_upperdir);
 
 	olddentry = lookup_one_len(old->d_name.name, old_upperdir,
@@ -992,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		if (ovl_type_merge_or_lower(old))
 			err = ovl_set_redirect(old, samedir);
 		else if (!old_opaque && ovl_type_merge(new->d_parent))
-			err = ovl_set_opaque(old, olddentry);
+			err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
 		if (err)
 			goto out_dput;
 	}
@@ -1000,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		if (ovl_type_merge_or_lower(new))
 			err = ovl_set_redirect(new, samedir);
 		else if (!new_opaque && ovl_type_merge(old->d_parent))
-			err = ovl_set_opaque(new, newdentry);
+			err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
 		if (err)
 			goto out_dput;
 	}

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ad9547f..d613e2c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c

@@ -240,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name,
 	return res;
 }
 
+static bool ovl_can_list(const char *s)
+{
+	/* List all non-trusted xatts */
+	if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+		return true;
+
+	/* Never list trusted.overlay, list other trusted for superuser only */
+	return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+}
+
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 {
 	struct dentry *realdentry = ovl_dentry_real(dentry);
@@ -263,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 			return -EIO;
 
 		len -= slen;
-		if (ovl_is_private_xattr(s)) {
+		if (!ovl_can_list(s)) {
 			res -= slen;
 			memmove(s, s + slen, len);
 		} else {

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index bad0f66..de0d4f7 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c

@@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	 * Make sure that the stored uuid matches the uuid of the lower
 	 * layer where file handle will be decoded.
 	 */
-	if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
 		goto out;
 
 	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
@@ -169,17 +169,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 
 static bool ovl_is_opaquedir(struct dentry *dentry)
 {
-	int res;
-	char val;
-
-	if (!d_is_dir(dentry))
-		return false;
-
-	res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
-	if (res == 1 && val == 'y')
-		return true;
-
-	return false;
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
 }
 
 static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
@@ -351,6 +341,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
 	bool upperopaque = false;
+	bool upperimpure = false;
 	char *upperredirect = NULL;
 	struct dentry *this;
 	unsigned int i;
@@ -395,6 +386,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 				poe = roe;
 		}
 		upperopaque = d.opaque;
+		if (upperdentry && d.is_dir)
+			upperimpure = ovl_is_impuredir(upperdentry);
 	}
 
 	if (!d.stop && poe->numlower) {
@@ -463,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	revert_creds(old_cred);
 	oe->opaque = upperopaque;
+	oe->impure = upperimpure;
 	oe->redirect = upperredirect;
 	oe->__upperdentry = upperdentry;
 	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index caa36cb..10863b4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h

@@ -24,6 +24,7 @@ enum ovl_path_type {
 #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
 #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
 
 /*
  * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
@@ -55,7 +56,7 @@ struct ovl_fh {
 	u8 len;		/* size of this header + size of fid */
 	u8 flags;	/* OVL_FH_FLAG_* */
 	u8 type;	/* fid_type of fid */
-	uuid_be uuid;	/* uuid of filesystem */
+	uuid_t uuid;	/* uuid of filesystem */
 	u8 fid[0];	/* file identifier */
 } __packed;
 
@@ -203,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_impure(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
-void ovl_clear_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
@@ -219,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry);
 struct file *ovl_path_open(struct path *path, int flags);
 int ovl_copy_up_start(struct dentry *dentry);
 void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
 
 /* namei.c */
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -263,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct dentry *workdir);
 struct cattr {
 	dev_t rdev;
 	umode_t mode;

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index b2023ddb..34bc4a9 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h

@@ -28,6 +28,7 @@ struct ovl_fs {
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
 	bool tmpfile;
+	bool noxattr;
 	wait_queue_head_t copyup_wq;
 	/* sb common to all layers */
 	struct super_block *same_sb;
@@ -42,6 +43,7 @@ struct ovl_entry {
 			u64 version;
 			const char *redirect;
 			bool opaque;
+			bool impure;
 			bool copying;
 		};
 		struct rcu_head rcu;

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9828b7d..4882ffb 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c

@@ -891,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 				dput(temp);
 			else
 				pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+			/*
+			 * Check if upper/work fs supports trusted.overlay.*
+			 * xattr
+			 */
+			err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
+					      "0", 1, 0);
+			if (err) {
+				ufs->noxattr = true;
+				pr_warn("overlayfs: upper fs does not support xattr.\n");
+			} else {
+				vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
+			}
 		}
 	}
 
@@ -961,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	path_put(&workpath);
 	kfree(lowertmp);
 
-	oe->__upperdentry = upperpath.dentry;
+	if (upperpath.dentry) {
+		oe->__upperdentry = upperpath.dentry;
+		oe->impure = ovl_is_impuredir(upperpath.dentry);
+	}
 	for (i = 0; i < numlower; i++) {
 		oe->lowerstack[i].dentry = stack[i].dentry;
 		oe->lowerstack[i].mnt = ufs->lower_mnt[i];

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index cfdea47..8090489 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c

@@ -175,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
 	return oe->opaque;
 }
 
+bool ovl_dentry_is_impure(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->impure;
+}
+
 bool ovl_dentry_is_whiteout(struct dentry *dentry)
 {
 	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -191,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 
-	return ofs->config.redirect_dir;
-}
-
-void ovl_clear_redirect_dir(struct super_block *sb)
-{
-	struct ovl_fs *ofs = sb->s_fs_info;
-
-	ofs->config.redirect_dir = false;
+	return ofs->config.redirect_dir && !ofs->noxattr;
 }
 
 const char *ovl_dentry_get_redirect(struct dentry *dentry)
@@ -303,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry)
 	wake_up_locked(&ofs->copyup_wq);
 	spin_unlock(&ofs->copyup_wq.lock);
 }
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+	int res;
+	char val;
+
+	if (!d_is_dir(dentry))
+		return false;
+
+	res = vfs_getxattr(dentry, name, &val, 1);
+	if (res == 1 && val == 'y')
+		return true;
+
+	return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+		       const char *name, const void *value, size_t size,
+		       int xerr)
+{
+	int err;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+	if (ofs->noxattr)
+		return xerr;
+
+	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+	if (err == -EOPNOTSUPP) {
+		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+		ofs->noxattr = true;
+		return xerr;
+	}
+
+	return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+	int err;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->impure)
+		return 0;
+
+	/*
+	 * Do not fail when upper doesn't support xattrs.
+	 * Upper inodes won't have origin nor redirect xattr anyway.
+	 */
+	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+				 "y", 1, 0);
+	if (!err)
+		oe->impure = true;
+
+	return err;
+}

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 45f6bf6..f1e1927 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 	if (!mmget_not_zero(mm))
 		goto free;
 
-	flags = write ? FOLL_WRITE : 0;
+	flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
 
 	while (count > 0) {
 		int this_len = min_t(int, count, PAGE_SIZE);

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33..520802d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
-	if (stack_guard_page_start(vma, start))
-		start += PAGE_SIZE;
 	end = vma->vm_end;
-	if (stack_guard_page_end(vma, end))
-		end -= PAGE_SIZE;
 
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ebf80c7..48813ae 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c

@@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode)
 }
 EXPORT_SYMBOL(dquot_initialize);
 
+bool dquot_initialize_needed(struct inode *inode)
+{
+	struct dquot **dquots;
+	int i;
+
+	if (!dquot_active(inode))
+		return false;
+
+	dquots = i_dquot(inode);
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(dquot_initialize_needed);
+
 /*
  * Release all quotas referenced by inode.
  *

diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d44..d591eee 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c

@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
-		return -EOPNOTSUPP;
-
 	init_sync_kiocb(&kiocb, filp);
-	if (flags & RWF_HIPRI)
-		kiocb.ki_flags |= IOCB_HIPRI;
-	if (flags & RWF_DSYNC)
-		kiocb.ki_flags |= IOCB_DSYNC;
-	if (flags & RWF_SYNC)
-		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	ret = kiocb_set_rw_flags(&kiocb, flags);
+	if (ret)
+		return ret;
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
@@ -1285,7 +1279,7 @@ static size_t compat_writev(struct file *file,
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 
 out:
 	if (ret > 0)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da01f49..a11d773 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c

@@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s,
 		depth = reiserfs_write_unlock_nested(s);
 		if (reiserfs_barrier_flush(s))
 			__sync_dirty_buffer(jl->j_commit_bh,
-					REQ_PREFLUSH | REQ_FUA);
+					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(jl->j_commit_bh);
 		reiserfs_write_lock_nested(s, depth);
@@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb,
 
 		if (reiserfs_barrier_flush(sb))
 			__sync_dirty_buffer(journal->j_header_bh,
-					REQ_PREFLUSH | REQ_FUA);
+					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 		else
 			sync_dirty_buffer(journal->j_header_bh);
 
@@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s)
 
 static void queue_log_writer(struct super_block *s)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
 

diff --git a/fs/select.c b/fs/select.c
index d6c652a..5b524a9 100644
--- a/fs/select.c
+++ b/fs/select.c

@@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 	return table->entry++;
 }
 
-static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_wqueues *pwq = wait->private;
 	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_table_entry *entry;
 

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 7e3d711..593b022 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c

@@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
 	if (likely(!waitqueue_active(wqh)))
 		return;
 
-	/* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+	/* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
 	wake_up_poll(wqh, POLLHUP | POLLFREE);
 }
 

diff --git a/fs/stat.c b/fs/stat.c
index f494b18..c356108 100644
--- a/fs/stat.c
+++ b/fs/stat.c

@@ -672,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
 		inode->i_bytes -= 512;
 	}
 }
+EXPORT_SYMBOL(__inode_add_bytes);
 
 void inode_add_bytes(struct inode *inode, loff_t bytes)
 {

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a0376a2..f80be4c 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c

@@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 			ufs_error (sb, "ufs_free_fragments",
 				   "bit already cleared for fragment %u", i);
 	}
-	
+
+	inode_sub_bytes(inode, count << uspi->s_fshift);
 	fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
 	uspi->cs_total.cs_nffree += count;
 	fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -184,6 +185,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
 			ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
 		}
 		ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
+		inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
 		if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 			ufs_clusteracct (sb, ucpi, blkno, 1);
 
@@ -398,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	/*
 	 * There is not enough space for user on the device
 	 */
-	if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
-		mutex_unlock(&UFS_SB(sb)->s_lock);
-		UFSD("EXIT (FAILED)\n");
-		return 0;
+	if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+		if (!capable(CAP_SYS_RESOURCE)) {
+			mutex_unlock(&UFS_SB(sb)->s_lock);
+			UFSD("EXIT (FAILED)\n");
+			return 0;
+		}
 	}
 
 	if (goal >= uspi->s_size) 
@@ -419,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 		if (result) {
 			ufs_clear_frags(inode, result + oldcount,
 					newcount - oldcount, locked_page != NULL);
+			*err = 0;
 			write_seqlock(&UFS_I(inode)->meta_lock);
 			ufs_cpu_to_data_ptr(sb, p, result);
-			write_sequnlock(&UFS_I(inode)->meta_lock);
-			*err = 0;
 			UFS_I(inode)->i_lastfrag =
 				max(UFS_I(inode)->i_lastfrag, fragment + count);
+			write_sequnlock(&UFS_I(inode)->meta_lock);
 		}
 		mutex_unlock(&UFS_SB(sb)->s_lock);
 		UFSD("EXIT, result %llu\n", (unsigned long long)result);
@@ -437,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	result = ufs_add_fragments(inode, tmp, oldcount, newcount);
 	if (result) {
 		*err = 0;
+		read_seqlock_excl(&UFS_I(inode)->meta_lock);
 		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
 						fragment + count);
+		read_sequnlock_excl(&UFS_I(inode)->meta_lock);
 		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
 				locked_page != NULL);
 		mutex_unlock(&UFS_SB(sb)->s_lock);
@@ -449,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	/*
 	 * allocate new block and move data
 	 */
-	switch (fs32_to_cpu(sb, usb1->fs_optim)) {
-	    case UFS_OPTSPACE:
+	if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
 		request = newcount;
-		if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
-		    > uspi->s_dsize * uspi->s_minfree / (2 * 100))
-			break;
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-		break;
-	    default:
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-	
-	    case UFS_OPTTIME:
+		if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+			usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+	} else {
 		request = uspi->s_fpb;
-		if (uspi->cs_total.cs_nffree < uspi->s_dsize *
-		    (uspi->s_minfree - 2) / 100)
-			break;
-		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-		break;
+		if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+			usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
 	}
 	result = ufs_alloc_fragments (inode, cgno, goal, request, err);
 	if (result) {
 		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
 				locked_page != NULL);
+		mutex_unlock(&UFS_SB(sb)->s_lock);
 		ufs_change_blocknr(inode, fragment - oldcount, oldcount,
 				   uspi->s_sbbase + tmp,
 				   uspi->s_sbbase + result, locked_page);
+		*err = 0;
 		write_seqlock(&UFS_I(inode)->meta_lock);
 		ufs_cpu_to_data_ptr(sb, p, result);
-		write_sequnlock(&UFS_I(inode)->meta_lock);
-		*err = 0;
 		UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
 						fragment + count);
-		mutex_unlock(&UFS_SB(sb)->s_lock);
+		write_sequnlock(&UFS_I(inode)->meta_lock);
 		if (newcount < request)
 			ufs_free_fragments (inode, result + newcount, request - newcount);
 		ufs_free_fragments (inode, tmp, oldcount);
@@ -494,6 +490,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	return 0;
 }		
 
+static bool try_add_frags(struct inode *inode, unsigned frags)
+{
+	unsigned size = frags * i_blocksize(inode);
+	spin_lock(&inode->i_lock);
+	__inode_add_bytes(inode, size);
+	if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
+		__inode_sub_bytes(inode, size);
+		spin_unlock(&inode->i_lock);
+		return false;
+	}
+	spin_unlock(&inode->i_lock);
+	return true;
+}
+
 static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 			     unsigned oldcount, unsigned newcount)
 {
@@ -530,6 +540,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	for (i = oldcount; i < newcount; i++)
 		if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
 			return 0;
+
+	if (!try_add_frags(inode, count))
+		return 0;
 	/*
 	 * Block can be extended
 	 */
@@ -647,6 +660,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 			ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
 		i = uspi->s_fpb - count;
 
+		inode_sub_bytes(inode, i << uspi->s_fshift);
 		fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
 		uspi->cs_total.cs_nffree += i;
 		fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +671,8 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 	result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
 	if (result == INVBLOCK)
 		return 0;
+	if (!try_add_frags(inode, count))
+		return 0;
 	for (i = 0; i < count; i++)
 		ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
 	
@@ -716,6 +732,8 @@ static u64 ufs_alloccg_block(struct inode *inode,
 		return INVBLOCK;
 	ucpi->c_rotor = result;
 gotit:
+	if (!try_add_frags(inode, uspi->s_fpb))
+		return 0;
 	blkno = ufs_fragstoblks(result);
 	ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 	if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7e41aee..f36d6a5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c

@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
 
 	p = ufs_get_direct_data_ptr(uspi, ufsi, block);
 	tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
-				new_size, err, locked_page);
+				new_size - (lastfrag & uspi->s_fpbmask), err,
+				locked_page);
 	return tmp != 0;
 }
 
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
 			goal += uspi->s_fpb;
 	}
 	tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
-				goal, uspi->s_fpb, err, locked_page);
+				goal, nfrags, err, locked_page);
 
 	if (!tmp) {
 		*err = -ENOSPC;
@@ -400,11 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	u64 phys64 = 0;
 	unsigned frag = fragment & uspi->s_fpbmask;
 
-	if (!create) {
-		phys64 = ufs_frag_map(inode, offsets, depth);
-		goto out;
-	}
+	phys64 = ufs_frag_map(inode, offsets, depth);
+	if (!create)
+		goto done;
 
+	if (phys64) {
+		if (fragment >= UFS_NDIR_FRAGMENT)
+			goto done;
+		read_seqlock_excl(&UFS_I(inode)->meta_lock);
+		if (fragment < UFS_I(inode)->i_lastfrag) {
+			read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+			goto done;
+		}
+		read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+	}
         /* This code entered only while writing ....? */
 
 	mutex_lock(&UFS_I(inode)->truncate_mutex);
@@ -448,6 +458,11 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
 	}
 	mutex_unlock(&UFS_I(inode)->truncate_mutex);
 	return err;
+
+done:
+	if (phys64)
+		map_bh(bh_result, sb, phys64 + frag);
+	return 0;
 }
 
 static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -551,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
 	set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
-	if (inode->i_nlink == 0) {
-		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-		return -1;
-	}
+	if (inode->i_nlink == 0)
+		return -ESTALE;
 
 	/*
 	 * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -563,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
 
 	inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
-	inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
-	inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
-	inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+	inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+	inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+	inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
 	inode->i_ctime.tv_nsec = 0;
@@ -599,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
 	set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
-	if (inode->i_nlink == 0) {
-		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-		return -1;
-	}
+	if (inode->i_nlink == 0)
+		return -ESTALE;
 
         /*
          * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -642,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	struct buffer_head * bh;
 	struct inode *inode;
-	int err;
+	int err = -EIO;
 
 	UFSD("ENTER, ino %lu\n", ino);
 
@@ -677,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 		err = ufs1_read_inode(inode,
 				      ufs_inode + ufs_inotofsbo(inode->i_ino));
 	}
-
+	brelse(bh);
 	if (err)
 		goto bad_inode;
+
 	inode->i_version++;
 	ufsi->i_lastfrag =
 		(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -688,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 
 	ufs_set_inode_ops(inode);
 
-	brelse(bh);
-
 	UFSD("EXIT\n");
 	unlock_new_inode(inode);
 	return inode;
 
 bad_inode:
 	iget_failed(inode);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(err);
 }
 
 static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
@@ -841,8 +851,11 @@ void ufs_evict_inode(struct inode * inode)
 	truncate_inode_pages_final(&inode->i_data);
 	if (want_delete) {
 		inode->i_size = 0;
-		if (inode->i_blocks)
+		if (inode->i_blocks &&
+		    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		     S_ISLNK(inode->i_mode)))
 			ufs_truncate_blocks(inode);
+		ufs_update_inode(inode, inode_needs_sync(inode));
 	}
 
 	invalidate_inode_buffers(inode);
@@ -868,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
 	ctx->to = from + count;
 }
 
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
 static void ufs_trunc_direct(struct inode *inode)
@@ -1100,25 +1112,30 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
        return err;
 }
 
-static void __ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	unsigned offsets[4];
-	int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+	int depth;
 	int depth2;
 	unsigned i;
 	struct ufs_buffer_head *ubh[3];
 	void *p;
 	u64 block;
 
-	if (!depth)
-		return;
+	if (inode->i_size) {
+		sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+		depth = ufs_block_to_path(inode, last, offsets);
+		if (!depth)
+			return;
+	} else {
+		depth = 1;
+	}
 
-	/* find the last non-zero in offsets[] */
 	for (depth2 = depth - 1; depth2; depth2--)
-		if (offsets[depth2])
+		if (offsets[depth2] != uspi->s_apb - 1)
 			break;
 
 	mutex_lock(&ufsi->truncate_mutex);
@@ -1127,9 +1144,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
 		offsets[0] = UFS_IND_BLOCK;
 	} else {
 		/* get the blocks that should be partially emptied */
-		p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+		p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
 		for (i = 0; i < depth2; i++) {
-			offsets[i]++;	/* next branch is fully freed */
 			block = ufs_data_ptr_to_cpu(sb, p);
 			if (!block)
 				break;
@@ -1140,7 +1156,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
 				write_sequnlock(&ufsi->meta_lock);
 				break;
 			}
-			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+			p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
 		}
 		while (i--)
 			free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
@@ -1155,7 +1171,9 @@ static void __ufs_truncate_blocks(struct inode *inode)
 			free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
 		}
 	}
+	read_seqlock_excl(&ufsi->meta_lock);
 	ufsi->i_lastfrag = DIRECT_FRAGMENT;
+	read_sequnlock_excl(&ufsi->meta_lock);
 	mark_inode_dirty(inode);
 	mutex_unlock(&ufsi->truncate_mutex);
 }
@@ -1183,7 +1201,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
 
 	truncate_setsize(inode, size);
 
-	__ufs_truncate_blocks(inode);
+	ufs_truncate_blocks(inode);
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	mark_inode_dirty(inode);
 out:
@@ -1191,16 +1209,6 @@ static int ufs_truncate(struct inode *inode, loff_t size)
 	return err;
 }
 
-static void ufs_truncate_blocks(struct inode *inode)
-{
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-	__ufs_truncate_blocks(inode);
-}
-
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(dentry);

diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 131b2b7..0a4f58a 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c

@@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
 	usb3 = ubh_get_usb_third(uspi);
 
 	if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-	     (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+	     (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
 	    mtype == UFS_MOUNT_UFSTYPE_UFS2) {
 		/*we have statistic in different place, then usual*/
 		uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
@@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb)
 	usb2 = ubh_get_usb_second(uspi);
 	usb3 = ubh_get_usb_third(uspi);
 
-	if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-	     (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
-	    mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+	if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
 		/*we have statistic in different place, then usual*/
 		usb2->fs_un.fs_u2.cs_ndir =
 			cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
@@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb)
 			cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
 		usb3->fs_un1.fs_u2.cs_nffree =
 			cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
-	} else {
-		usb1->fs_cstotal.cs_ndir =
-			cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
-		usb1->fs_cstotal.cs_nbfree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
-		usb1->fs_cstotal.cs_nifree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
-		usb1->fs_cstotal.cs_nffree =
-			cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+		goto out;
 	}
+
+	if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+	     (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+		/* store stats in both old and new places */
+		usb2->fs_un.fs_u2.cs_ndir =
+			cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+		usb2->fs_un.fs_u2.cs_nbfree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+		usb3->fs_un1.fs_u2.cs_nifree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+		usb3->fs_un1.fs_u2.cs_nffree =
+			cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
+	}
+	usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+	usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+	usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+	usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
 	ubh_mark_buffer_dirty(USPI_UBH(uspi));
 	ufs_print_super_stuff(sb, usb1, usb2, usb3);
 	UFSD("EXIT\n");
@@ -746,6 +754,23 @@ static void ufs_put_super(struct super_block *sb)
 	return;
 }
 
+static u64 ufs_max_bytes(struct super_block *sb)
+{
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+	int bits = uspi->s_apbshift;
+	u64 res;
+
+	if (bits > 21)
+		res = ~0ULL;
+	else
+		res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
+			(1LL << (3*bits));
+
+	if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
+		return MAX_LFS_FILESIZE;
+	return res << uspi->s_bshift;
+}
+
 static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ufs_sb_info * sbi;
@@ -812,9 +837,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	uspi->s_dirblksize = UFS_SECTOR_SIZE;
 	super_block_offset=UFS_SBLOCK;
 
-	/* Keep 2Gig file limit. Some UFS variants need to override 
-	   this but as I don't know which I'll let those in the know loosen
-	   the rules */
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
 	switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
 	case UFS_MOUNT_UFSTYPE_44BSD:
 		UFSD("ufstype=44bsd\n");
@@ -980,6 +1004,13 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 		flags |=  UFS_ST_SUN;
 	}
 
+	if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+	    uspi->s_postblformat == UFS_42POSTBLFMT) {
+		if (!silent)
+			pr_err("this is not a 44bsd filesystem");
+		goto failed;
+	}
+
 	/*
 	 * Check ufs magic number
 	 */
@@ -1127,8 +1158,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
 
 	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
-		uspi->s_u2_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
-		uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+		uspi->s_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+		uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
 	} else {
 		uspi->s_size  =  fs32_to_cpu(sb, usb1->fs_size);
 		uspi->s_dsize =  fs32_to_cpu(sb, usb1->fs_dsize);
@@ -1177,6 +1208,18 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 	uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
 	uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
 
+	uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+					      uspi->s_minfree, 100);
+	if (uspi->s_minfree <= 5) {
+		uspi->s_time_to_space = ~0ULL;
+		uspi->s_space_to_time = 0;
+		usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+	} else {
+		uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+		uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+					      uspi->s_minfree - 2, 100) - 1;
+	}
+
 	/*
 	 * Compute another frequently used values
 	 */
@@ -1212,6 +1255,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 			    "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
 		uspi->s_maxsymlinklen = maxsymlen;
 	}
+	sb->s_maxbytes = ufs_max_bytes(sb);
 	sb->s_max_links = UFS_LINK_MAX;
 
 	inode = ufs_iget(sb, UFS_ROOTINO);
@@ -1365,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	mutex_lock(&UFS_SB(sb)->s_lock);
 	usb3 = ubh_get_usb_third(uspi);
 	
-	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+	if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
 		buf->f_type = UFS2_MAGIC;
-		buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
-	} else {
+	else
 		buf->f_type = UFS_MAGIC;
-		buf->f_blocks = uspi->s_dsize;
-	}
-	buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-		uspi->cs_total.cs_nffree;
+
+	buf->f_blocks = uspi->s_dsize;
+	buf->f_bfree = ufs_freefrags(uspi);
 	buf->f_ffree = uspi->cs_total.cs_nifree;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
-		? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+	buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+		? (buf->f_bfree - uspi->s_root_blocks) : 0;
 	buf->f_files = uspi->s_ncg * uspi->s_ipg;
 	buf->f_namelen = UFS_MAXNAMLEN;
 	buf->f_fsid.val[0] = (u32)id;

diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 0cbd5d3..150eef6 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h

@@ -733,10 +733,8 @@ struct ufs_sb_private_info {
 	__u32	s_dblkno;	/* offset of first data after cg */
 	__u32	s_cgoffset;	/* cylinder group offset in cylinder */
 	__u32	s_cgmask;	/* used to calc mod fs_ntrak */
-	__u32	s_size;		/* number of blocks (fragments) in fs */
-	__u32	s_dsize;	/* number of data blocks in fs */
-	__u64	s_u2_size;	/* ufs2: number of blocks (fragments) in fs */
-	__u64	s_u2_dsize;	/*ufs2:  number of data blocks in fs */
+	__u64	s_size;		/* number of blocks (fragments) in fs */
+	__u64	s_dsize;	/* number of data blocks in fs */
 	__u32	s_ncg;		/* number of cylinder groups */
 	__u32	s_bsize;	/* size of basic blocks */
 	__u32	s_fsize;	/* size of fragments */
@@ -793,6 +791,9 @@ struct ufs_sb_private_info {
 	__u32	s_maxsymlinklen;/* upper limit on fast symlinks' size */
 	__s32	fs_magic;       /* filesystem magic */
 	unsigned int s_dirblksize;
+	__u64   s_root_blocks;
+	__u64	s_time_to_space;
+	__u64	s_space_to_time;
 };
 
 /*

diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index f41ad0a..02497a4 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c

@@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
 struct page *ufs_get_locked_page(struct address_space *mapping,
 				 pgoff_t index)
 {
-	struct page *page;
-
-	page = find_lock_page(mapping, index);
+	struct inode *inode = mapping->host;
+	struct page *page = find_lock_page(mapping, index);
 	if (!page) {
 		page = read_mapping_page(mapping, index, NULL);
 
@@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "read_mapping_page error: ino %lu, index: %lu\n",
 			       mapping->host->i_ino, index);
-			goto out;
+			return page;
 		}
 
 		lock_page(page);
@@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 			/* Truncate got there first */
 			unlock_page(page);
 			put_page(page);
-			page = NULL;
-			goto out;
+			return NULL;
 		}
 
 		if (!PageUptodate(page) || PageError(page)) {
@@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "can not read page: ino %lu, index: %lu\n",
-			       mapping->host->i_ino, index);
+			       inode->i_ino, index);
 
-			page = ERR_PTR(-EIO);
+			return ERR_PTR(-EIO);
 		}
 	}
-out:
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 	return page;
 }

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b7fbf53..9fc7119 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h

@@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
 #define ubh_blkmap(ubh,begin,bit) \
 	((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
 
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
 static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
 {
 	return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-		uspi->cs_total.cs_nffree -
-		(uspi->s_dsize * (percentreserved) / 100);
+		uspi->cs_total.cs_nffree;
 }
 
 /*
@@ -473,15 +468,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
 static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
 	struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
 {
+	u8 mask;
 	switch (uspi->s_fpb) {
 	case 8:
 	    	return (*ubh_get_addr (ubh, begin + block) == 0xff);
 	case 4:
-		return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2)));
+		mask = 0x0f << ((block & 0x01) << 2);
+		return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
 	case 2:
-		return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1)));
+		mask = 0x03 << ((block & 0x03) << 1);
+		return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
 	case 1:
-		return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07)));
+		mask = 0x01 << (block & 0x07);
+		return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
 	}
 	return 0;	
 }

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f7555fc..6148ccd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c

@@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx {
 
 struct userfaultfd_wait_queue {
 	struct uffd_msg msg;
-	wait_queue_t wq;
+	wait_queue_entry_t wq;
 	struct userfaultfd_ctx *ctx;
 	bool waken;
 };
@@ -91,7 +91,7 @@ struct userfaultfd_wake_range {
 	unsigned long len;
 };
 
-static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
+static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
 	struct userfaultfd_wake_range *range = key;
@@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
 		 * wouldn't be enough, the smp_mb__before_spinlock is
 		 * enough to avoid an explicit smp_mb() here.
 		 */
-		list_del_init(&wq->task_list);
+		list_del_init(&wq->entry);
 out:
 	return ret;
 }
@@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	bool must_wait, return_to_userland;
 	long blocking_state;
 
-	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
 	ret = VM_FAULT_SIGBUS;
+
+	/*
+	 * We don't do userfault handling for the final child pid update.
+	 *
+	 * We also don't do userfault handling during
+	 * coredumping. hugetlbfs has the special
+	 * follow_hugetlb_page() to skip missing pages in the
+	 * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+	 * the no_page_table() helper in follow_page_mask(), but the
+	 * shmem_vm_ops->fault method is invoked even during
+	 * coredumping without mmap_sem and it ends up here.
+	 */
+	if (current->flags & (PF_EXITING|PF_DUMPCORE))
+		goto out;
+
+	/*
+	 * Coredumping runs without mmap_sem so we can only check that
+	 * the mmap_sem is held, if PF_DUMPCORE was not set.
+	 */
+	WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
 	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx)
 		goto out;
@@ -361,12 +380,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 		goto out;
 
 	/*
-	 * We don't do userfault handling for the final child pid update.
-	 */
-	if (current->flags & PF_EXITING)
-		goto out;
-
-	/*
 	 * Check that we can return VM_FAULT_RETRY.
 	 *
 	 * NOTE: it should become possible to return VM_FAULT_RETRY
@@ -509,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	 * and it's fine not to block on the spinlock. The uwq on this
 	 * kernel stack can be released after the list_del_init.
 	 */
-	if (!list_empty_careful(&uwq.wq.task_list)) {
+	if (!list_empty_careful(&uwq.wq.entry)) {
 		spin_lock(&ctx->fault_pending_wqh.lock);
 		/*
 		 * No need of list_del_init(), the uwq on the stack
 		 * will be freed shortly anyway.
 		 */
-		list_del(&uwq.wq.task_list);
+		list_del(&uwq.wq.entry);
 		spin_unlock(&ctx->fault_pending_wqh.lock);
 	}
 
@@ -847,7 +860,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 static inline struct userfaultfd_wait_queue *find_userfault_in(
 		wait_queue_head_t *wqh)
 {
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 
 	VM_BUG_ON(!spin_is_locked(&wqh->lock));
@@ -856,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
 	if (!waitqueue_active(wqh))
 		goto out;
 	/* walk in reverse to provide FIFO behavior to read userfaults */
-	wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
+	wq = list_last_entry(&wqh->head, typeof(*wq), entry);
 	uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 out:
 	return uwq;
@@ -990,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			 * changes __remove_wait_queue() to use
 			 * list_del_init() in turn breaking the
 			 * !list_empty_careful() check in
-			 * handle_userfault(). The uwq->wq.task_list
+			 * handle_userfault(). The uwq->wq.head list
 			 * must never be empty at any time during the
 			 * refile, or the waitqueue could disappear
 			 * from under us. The "wait_queue_head_t"
 			 * parameter of __remove_wait_queue() is unused
 			 * anyway.
 			 */
-			list_del(&uwq->wq.task_list);
+			list_del(&uwq->wq.entry);
 			__add_wait_queue(&ctx->fault_wqh, &uwq->wq);
 
 			write_seqcount_end(&ctx->refile_seq);
@@ -1019,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 				fork_nctx = (struct userfaultfd_ctx *)
 					(unsigned long)
 					uwq->msg.arg.reserved.reserved1;
-				list_move(&uwq->wq.task_list, &fork_event);
+				list_move(&uwq->wq.entry, &fork_event);
 				spin_unlock(&ctx->event_wqh.lock);
 				ret = 0;
 				break;
@@ -1056,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			if (!list_empty(&fork_event)) {
 				uwq = list_first_entry(&fork_event,
 						       typeof(*uwq),
-						       wq.task_list);
-				list_del(&uwq->wq.task_list);
+						       wq.entry);
+				list_del(&uwq->wq.entry);
 				__add_wait_queue(&ctx->event_wqh, &uwq->wq);
 				userfaultfd_event_complete(ctx, uwq);
 			}
@@ -1734,17 +1747,17 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
 static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct userfaultfd_ctx *ctx = f->private_data;
-	wait_queue_t *wq;
+	wait_queue_entry_t *wq;
 	struct userfaultfd_wait_queue *uwq;
 	unsigned long pending = 0, total = 0;
 
 	spin_lock(&ctx->fault_pending_wqh.lock);
-	list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		pending++;
 		total++;
 	}
-	list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) {
+	list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
 		uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
 		total++;
 	}

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5c90f82..a6e955b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile

@@ -98,8 +98,7 @@
 				   xfs_sysfs.o \
 				   xfs_trans.o \
 				   xfs_xattr.o \
-				   kmem.o \
-				   uuid.o
+				   kmem.o
 
 # low-level transaction/log code
 xfs-y				+= xfs_log.o \

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f02eb76..a7048eaf 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c

@@ -1280,7 +1280,6 @@ xfs_bmap_read_extents(
 		xfs_bmbt_rec_t	*frp;
 		xfs_fsblock_t	nextbno;
 		xfs_extnum_t	num_recs;
-		xfs_extnum_t	start;
 
 		num_recs = xfs_btree_get_numrecs(block);
 		if (unlikely(i + num_recs > room)) {
@@ -1303,7 +1302,6 @@ xfs_bmap_read_extents(
 		 * Copy records into the extent records.
 		 */
 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
-		start = i;
 		for (j = 0; j < num_recs; j++, i++, frp++) {
 			xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
 			trp->l0 = be64_to_cpu(frp->l0);
@@ -2065,8 +2063,10 @@ xfs_bmap_add_extent_delay_real(
 		}
 		temp = xfs_bmap_worst_indlen(bma->ip, temp);
 		temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
-		diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-			(bma->cur ? bma->cur->bc_private.b.allocated : 0));
+		diff = (int)(temp + temp2 -
+			     (startblockval(PREV.br_startblock) -
+			      (bma->cur ?
+			       bma->cur->bc_private.b.allocated : 0)));
 		if (diff > 0) {
 			error = xfs_mod_fdblocks(bma->ip->i_mount,
 						 -((int64_t)diff), false);
@@ -2123,7 +2123,6 @@ xfs_bmap_add_extent_delay_real(
 		temp = da_new;
 		if (bma->cur)
 			temp += bma->cur->bc_private.b.allocated;
-		ASSERT(temp <= da_old);
 		if (temp < da_old)
 			xfs_mod_fdblocks(bma->ip->i_mount,
 					(int64_t)(da_old - temp), false);

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5392674..3a673ba 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c

@@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks(
 			xfs_btree_readahead_ptr(cur, ptr, 1);
 
 			/* save for the next iteration of the loop */
-			lptr = *ptr;
+			xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
 		}
 
 		/* for each buffer in the level */

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef3..82a38d8 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c

@@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers(
 	if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
 		return -EOPNOTSUPP;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	INIT_LIST_HEAD(&debris);
+
+	/*
+	 * In this first part, we use an empty transaction to gather up
+	 * all the leftover CoW extents so that we can subsequently
+	 * delete them.  The empty transaction is used to avoid
+	 * a buffer lock deadlock if there happens to be a loop in the
+	 * refcountbt because we're allowed to re-grab a buffer that is
+	 * already attached to our transaction.  When we're done
+	 * recording the CoW debris we cancel the (empty) transaction
+	 * and everything goes away cleanly.
+	 */
+	error = xfs_trans_alloc_empty(mp, &tp);
 	if (error)
 		return error;
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+	if (error)
+		goto out_trans;
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	/* Find all the leftover CoW staging extents. */
-	INIT_LIST_HEAD(&debris);
 	memset(&low, 0, sizeof(low));
 	memset(&high, 0, sizeof(high));
 	low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers(
 	if (error)
 		goto out_cursor;
 	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
+	xfs_trans_cancel(tp);
 
 	/* Now iterate the list to free the leftovers */
-	list_for_each_entry(rr, &debris, rr_list) {
+	list_for_each_entry_safe(rr, n, &debris, rr_list) {
 		/* Set up transaction. */
 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
 		if (error)
@@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers(
 		error = xfs_trans_commit(tp);
 		if (error)
 			goto out_free;
+
+		list_del(&rr->rr_list);
+		kmem_free(rr);
 	}
 
+	return error;
+out_defer:
+	xfs_defer_cancel(&dfops);
+out_trans:
+	xfs_trans_cancel(tp);
 out_free:
 	/* Free the leftover list */
 	list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1712,6 @@ xfs_refcount_recover_cow_leftovers(
 
 out_cursor:
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-	xfs_buf_relse(agbp);
-	goto out_free;
-
-out_defer:
-	xfs_defer_cancel(&dfops);
-	xfs_trans_cancel(tp);
-	goto out_free;
+	xfs_trans_brelse(tp, agbp);
+	goto out_trans;
 }

diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
deleted file mode 100644
index b83f76b..0000000
--- a/fs/xfs/uuid.c
+++ /dev/null

@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-	__be32	uu_timelow;
-	__be16	uu_timemid;
-	__be16	uu_timehi;
-	__be16	uu_clockseq;
-	__be16	uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-	xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-		   be16_to_cpu(uup->uu_timemid);
-	fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-	int	i;
-	char	*cp = (char *)uuid;
-
-	if (uuid == NULL)
-		return 0;
-	/* implied check of version number here... */
-	for (i = 0; i < sizeof *uuid; i++)
-		if (*cp++) return 0;	/* not nil */
-	return 1;	/* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}

diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
deleted file mode 100644
index 104db0f..0000000
--- a/fs/xfs/uuid.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-	unsigned char	__u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
-	memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif	/* __XFS_SUPPORT_UUID_H__ */

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7..d20c29b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -276,7 +276,7 @@ xfs_end_io(
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
-	int			error = ioend->io_bio->bi_error;
+	int			error;
 
 	/*
 	 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
 	/*
 	 * Clean up any COW blocks on an I/O error.
 	 */
+	error = blk_status_to_errno(ioend->io_bio->bi_status);
 	if (unlikely(error)) {
 		switch (ioend->io_type) {
 		case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
 	else if (ioend->io_append_trans)
 		queue_work(mp->m_data_workqueue, &ioend->io_work);
 	else
-		xfs_destroy_ioend(ioend, bio->bi_error);
+		xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 
 STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
 	 * time.
 	 */
 	if (status) {
-		ioend->io_bio->bi_error = status;
+		ioend->io_bio->bi_status = errno_to_blk_status(status);
 		bio_endio(ioend->io_bio);
 		return status;
 	}
 
+	ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
 	submit_bio(ioend->io_bio);
 	return 0;
 }
@@ -564,6 +566,7 @@ xfs_chain_bio(
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+	ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
 	submit_bio(ioend->io_bio);
 	ioend->io_bio = new;
 }
@@ -1316,9 +1319,12 @@ xfs_vm_bmap(
 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
 	 * bypasseѕ the file system for actual I/O.  We really can't allow
 	 * that on reflinks inodes, so we have to skip out here.  And yes,
-	 * 0 is the magic code for a bmap error..
+	 * 0 is the magic code for a bmap error.
+	 *
+	 * Since we don't pass back blockdev info, we can't return bmap
+	 * information for rt files either.
 	 */
-	if (xfs_is_reflink_inode(ip))
+	if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
 		return 0;
 
 	filemap_write_and_wait(mapping);

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2b95430..9e3cc21 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -582,9 +582,13 @@ xfs_getbmap(
 		}
 		break;
 	default:
+		/* Local format data forks report no extents. */
+		if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+			bmv->bmv_entries = 0;
+			return 0;
+		}
 		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
 			return -EINVAL;
 
 		if (xfs_get_extsz_hint(ip) ||
@@ -712,7 +716,7 @@ xfs_getbmap(
 			 * extents.
 			 */
 			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+			    map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
 				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
 
                         if (map[i].br_startblock == HOLESTARTBLOCK &&

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa392..438505f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c

@@ -97,12 +97,16 @@ static inline void
 xfs_buf_ioacct_inc(
 	struct xfs_buf	*bp)
 {
-	if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+	if (bp->b_flags & XBF_NO_IOACCT)
 		return;
 
 	ASSERT(bp->b_flags & XBF_ASYNC);
-	bp->b_flags |= _XBF_IN_FLIGHT;
-	percpu_counter_inc(&bp->b_target->bt_io_count);
+	spin_lock(&bp->b_lock);
+	if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+		bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+		percpu_counter_inc(&bp->b_target->bt_io_count);
+	}
+	spin_unlock(&bp->b_lock);
 }
 
 /*
@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc(
  * freed and unaccount from the buftarg.
  */
 static inline void
+__xfs_buf_ioacct_dec(
+	struct xfs_buf	*bp)
+{
+	lockdep_assert_held(&bp->b_lock);
+
+	if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+		bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+		percpu_counter_dec(&bp->b_target->bt_io_count);
+	}
+}
+
+static inline void
 xfs_buf_ioacct_dec(
 	struct xfs_buf	*bp)
 {
-	if (!(bp->b_flags & _XBF_IN_FLIGHT))
-		return;
-
-	bp->b_flags &= ~_XBF_IN_FLIGHT;
-	percpu_counter_dec(&bp->b_target->bt_io_count);
+	spin_lock(&bp->b_lock);
+	__xfs_buf_ioacct_dec(bp);
+	spin_unlock(&bp->b_lock);
 }
 
 /*
@@ -149,9 +163,9 @@ xfs_buf_stale(
 	 * unaccounted (released to LRU) before that occurs. Drop in-flight
 	 * status now to preserve accounting consistency.
 	 */
-	xfs_buf_ioacct_dec(bp);
-
 	spin_lock(&bp->b_lock);
+	__xfs_buf_ioacct_dec(bp);
+
 	atomic_set(&bp->b_lru_ref, 0);
 	if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
 	    (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -979,12 +993,12 @@ xfs_buf_rele(
 		 * ensures the decrement occurs only once per-buf.
 		 */
 		if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
-			xfs_buf_ioacct_dec(bp);
+			__xfs_buf_ioacct_dec(bp);
 		goto out_unlock;
 	}
 
 	/* the last reference has been dropped ... */
-	xfs_buf_ioacct_dec(bp);
+	__xfs_buf_ioacct_dec(bp);
 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
 		/*
 		 * If the buffer is added to the LRU take a new reference to the
@@ -1213,8 +1227,11 @@ xfs_buf_bio_end_io(
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
 	 */
-	if (bio->bi_error)
-		cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+	if (bio->bi_status) {
+		int error = blk_status_to_errno(bio->bi_status);
+
+		cmpxchg(&bp->b_io_error, 0, error);
+	}
 
 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8d1d44f..1508121 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h

@@ -63,7 +63,6 @@ typedef enum {
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
-#define _XBF_IN_FLIGHT	 (1 << 25) /* I/O in flight, for accounting purposes */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
-	{ _XBF_COMPOUND,	"COMPOUND" }, \
-	{ _XBF_IN_FLIGHT,	"IN_FLIGHT" }
+	{ _XBF_COMPOUND,	"COMPOUND" }
 
 
 /*
  * Internal state flags.
  */
 #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */
+#define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
 /*
  * The xfs_buftarg contains 2 notions of "sector size" -

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a8..17f27a2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c

@@ -237,7 +237,11 @@ xfs_file_dax_read(
 	if (!count)
 		return 0; /* skip atime */
 
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	}
 	ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	xfs_ilock(ip, iolock);
+	if (!xfs_ilock_nowait(ip, iolock)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, iolock);
+	}
 
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
 	 * otherwise demote the lock if we had to take the exclusive lock
 	 * for other reasons in xfs_file_aio_write_checks.
 	 */
-	if (unaligned_io)
-		inode_dio_wait(inode);
-	else if (iolock == XFS_IOLOCK_EXCL) {
+	if (unaligned_io) {
+		/* If we are going to wait for other DIO to finish, bail */
+		if (iocb->ki_flags & IOCB_NOWAIT) {
+			if (atomic_read(&inode->i_dio_count))
+				return -EAGAIN;
+		} else {
+			inode_dio_wait(inode);
+		}
+	} else if (iolock == XFS_IOLOCK_EXCL) {
 		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
 		iolock = XFS_IOLOCK_SHARED;
 	}
@@ -585,7 +599,12 @@ xfs_file_dax_write(
 	size_t			count;
 	loff_t			pos;
 
-	xfs_ilock(ip, iolock);
+	if (!xfs_ilock_nowait(ip, iolock)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		xfs_ilock(ip, iolock);
+	}
+
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
 		return -EFBIG;
 	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
 		return -EIO;
+	file->f_mode |= FMODE_AIO_NOWAIT;
 	return 0;
 }
 
@@ -1043,49 +1063,17 @@ xfs_find_get_desired_pgoff(
 
 	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_SHIFT;
+	end = (endoff - 1) >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;
 		unsigned int	i;
 
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  want);
-		/*
-		 * No page mapped into given range.  If we are searching holes
-		 * and if this is the first time we got into the loop, it means
-		 * that the given offset is landed in a hole, return it.
-		 *
-		 * If we have already stepped through some block buffers to find
-		 * holes but they all contains data.  In this case, the last
-		 * offset is already updated and pointed to the end of the last
-		 * mapped page, if it does not reach the endpoint to search,
-		 * that means there should be a hole between them.
-		 */
-		if (nr_pages == 0) {
-			/* Data search found nothing */
-			if (type == DATA_OFF)
-				break;
-
-			ASSERT(type == HOLE_OFF);
-			if (lastoff == startoff || lastoff < endoff) {
-				found = true;
-				*offset = lastoff;
-			}
+		if (nr_pages == 0)
 			break;
-		}
-
-		/*
-		 * At lease we found one page.  If this is the first time we
-		 * step into the loop, and if the first page index offset is
-		 * greater than the given search offset, a hole was found.
-		 */
-		if (type == HOLE_OFF && lastoff == startoff &&
-		    lastoff < page_offset(pvec.pages[0])) {
-			found = true;
-			break;
-		}
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page	*page = pvec.pages[i];
@@ -1098,18 +1086,18 @@ xfs_find_get_desired_pgoff(
 			 * file mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 *
-			 * Searching done if the page index is out of range.
-			 * If the current offset is not reaches the end of
-			 * the specified search range, there should be a hole
-			 * between them.
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
 			 */
-			if (page->index > end) {
-				if (type == HOLE_OFF && lastoff < endoff) {
-					*offset = lastoff;
-					found = true;
-				}
+			if (type == HOLE_OFF && lastoff < endoff &&
+			    lastoff < page_offset(pvec.pages[i])) {
+				found = true;
+				*offset = lastoff;
 				goto out;
 			}
+			/* Searching done if the page index is out of range. */
+			if (page->index > end)
+				goto out;
 
 			lock_page(page);
 			/*
@@ -1151,21 +1139,20 @@ xfs_find_get_desired_pgoff(
 
 		/*
 		 * The number of returned pages less than our desired, search
-		 * done.  In this case, nothing was found for searching data,
-		 * but we found a hole behind the last offset.
+		 * done.
 		 */
-		if (nr_pages < want) {
-			if (type == HOLE_OFF) {
-				*offset = lastoff;
-				found = true;
-			}
+		if (nr_pages < want)
 			break;
-		}
 
 		index = pvec.pages[i - 1]->index + 1;
 		pagevec_release(&pvec);
 	} while (index <= end);
 
+	/* No page at lastoff and we are not done - we found a hole. */
+	if (type == HOLE_OFF && lastoff < endoff) {
+		*offset = lastoff;
+		found = true;
+	}
 out:
 	pagevec_release(&pvec);
 	return found;

diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3683819..814ed729 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c

@@ -828,6 +828,7 @@ xfs_getfsmap(
 	struct xfs_fsmap		dkeys[2];	/* per-dev keys */
 	struct xfs_getfsmap_dev		handlers[XFS_GETFSMAP_DEVS];
 	struct xfs_getfsmap_info	info = { NULL };
+	bool				use_rmap;
 	int				i;
 	int				error = 0;
 
@@ -837,12 +838,14 @@ xfs_getfsmap(
 	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
 		return -EINVAL;
 
+	use_rmap = capable(CAP_SYS_ADMIN) &&
+		   xfs_sb_version_hasrmapbt(&mp->m_sb);
 	head->fmh_entries = 0;
 
 	/* Set up our device handlers. */
 	memset(handlers, 0, sizeof(handlers));
 	handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (use_rmap)
 		handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
 	else
 		handlers[0].fn = xfs_getfsmap_datadev_bnobt;

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f61c84f8..b9c12e1 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c

@@ -66,7 +66,6 @@ xfs_inode_alloc(
 
 	XFS_STATS_INC(mp, vn_active);
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
-	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(!xfs_isiflocked(ip));
 	ASSERT(ip->i_ino == 0);
 
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
 {
 	struct xfs_mount	*mp = pag->pag_mount;
 
-	ASSERT(spin_is_locked(&pag->pag_ici_lock));
+	lockdep_assert_held(&pag->pag_ici_lock);
 	if (pag->pag_ici_reclaimable++)
 		return;
 
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
 {
 	struct xfs_mount	*mp = pag->pag_mount;
 
-	ASSERT(spin_is_locked(&pag->pag_ici_lock));
+	lockdep_assert_held(&pag->pag_ici_lock);
 	if (--pag->pag_ici_reclaimable)
 		return;
 
@@ -270,12 +269,12 @@ xfs_inew_wait(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (!xfs_iflags_test(ip, XFS_INEW))
 			break;
 		schedule();
 	} while (true);
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 /*

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c..c0a1e84 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c

@@ -622,12 +622,12 @@ __xfs_iflock(
 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
 
 	do {
-		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_isiflocked(ip))
 			io_schedule();
 	} while (!xfs_iflock_nowait(ip));
 
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 STATIC uint
@@ -2486,11 +2486,11 @@ __xfs_iunpin_wait(
 	xfs_iunpin(ip);
 
 	do {
-		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 		if (xfs_ipincount(ip))
 			io_schedule();
 	} while (xfs_ipincount(ip));
-	finish_wait(wq, &wait.wait);
+	finish_wait(wq, &wait.wq_entry);
 }
 
 void

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 08cb7d1..013cc78 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c

@@ -834,9 +834,7 @@ xfs_inode_item_format_convert(
 		in_f->ilf_dsize = in_f32->ilf_dsize;
 		in_f->ilf_ino = in_f32->ilf_ino;
 		/* copy biggest field of ilf_u */
-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-		       in_f32->ilf_u.ilfu_uuid.__u_bits,
-		       sizeof(uuid_t));
+		uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
 		in_f->ilf_blkno = in_f32->ilf_blkno;
 		in_f->ilf_len = in_f32->ilf_len;
 		in_f->ilf_boffset = in_f32->ilf_boffset;
@@ -851,9 +849,7 @@ xfs_inode_item_format_convert(
 		in_f->ilf_dsize = in_f64->ilf_dsize;
 		in_f->ilf_ino = in_f64->ilf_ino;
 		/* copy biggest field of ilf_u */
-		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-		       in_f64->ilf_u.ilfu_uuid.__u_bits,
-		       sizeof(uuid_t));
+		uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
 		in_f->ilf_blkno = in_f64->ilf_blkno;
 		in_f->ilf_len = in_f64->ilf_len;
 		in_f->ilf_boffset = in_f64->ilf_boffset;

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf..05dc87e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c

@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
 		lockmode = xfs_ilock_data_map_shared(ip);
 	}
 
+	if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+		error = -EAGAIN;
+		goto out_unlock;
+	}
+
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
 		length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
 
 	if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
 		if (flags & IOMAP_DIRECT) {
+			/*
+			 * A reflinked inode will result in CoW alloc.
+			 * FIXME: It could still overwrite on unshared extents
+			 * and not need allocation.
+			 */
+			if (flags & IOMAP_NOWAIT) {
+				error = -EAGAIN;
+				goto out_unlock;
+			}
 			/* may drop and re-acquire the ilock */
 			error = xfs_reflink_allocate_cow(ip, &imap, &shared,
 					&lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
 
 	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
 		/*
+		 * If nowait is set bail since we are going to make
+		 * allocations.
+		 */
+		if (flags & IOMAP_NOWAIT) {
+			error = -EAGAIN;
+			goto out_unlock;
+		}
+		/*
 		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 		 * pages to keep the chunks of work done where somewhat symmetric
 		 * with the work writeback does. This is a completely arbitrary

diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 044fb0e..2d167fe 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h

@@ -19,6 +19,7 @@
 #define __XFS_LINUX__
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /*
  * Kernel specific type declarations for XFS
@@ -42,7 +43,6 @@ typedef __u32			xfs_nlink_t;
 
 #include "kmem.h"
 #include "mrlock.h"
-#include "uuid.h"
 
 #include <linux/semaphore.h>
 #include <linux/mm.h>

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cd0b077..8cec1e5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c

@@ -352,13 +352,13 @@ xlog_header_check_mount(
 {
 	ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
 
-	if (uuid_is_nil(&head->h_fs_uuid)) {
+	if (uuid_is_null(&head->h_fs_uuid)) {
 		/*
 		 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
-		 * h_fs_uuid is nil, we assume this log was last mounted
+		 * h_fs_uuid is null, we assume this log was last mounted
 		 * by IRIX and continue.
 		 */
-		xfs_warn(mp, "nil uuid in log - IRIX style log");
+		xfs_warn(mp, "null uuid in log - IRIX style log");
 	} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
 		xfs_warn(mp, "log has mismatched uuid - can't recover");
 		xlog_header_check_dump(mp, head);

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2eaf818..d249546 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c

@@ -74,20 +74,19 @@ xfs_uuid_mount(
 	int			hole, i;
 
 	/* Publish UUID in struct super_block */
-	BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
-	memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+	uuid_copy(&mp->m_super->s_uuid, uuid);
 
 	if (mp->m_flags & XFS_MOUNT_NOUUID)
 		return 0;
 
-	if (uuid_is_nil(uuid)) {
-		xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+	if (uuid_is_null(uuid)) {
+		xfs_warn(mp, "Filesystem has null UUID - can't mount");
 		return -EINVAL;
 	}
 
 	mutex_lock(&xfs_uuid_table_mutex);
 	for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
-		if (uuid_is_nil(&xfs_uuid_table[i])) {
+		if (uuid_is_null(&xfs_uuid_table[i])) {
 			hole = i;
 			continue;
 		}
@@ -124,7 +123,7 @@ xfs_uuid_unmount(
 
 	mutex_lock(&xfs_uuid_table_mutex);
 	for (i = 0; i < xfs_uuid_table_size; i++) {
-		if (uuid_is_nil(&xfs_uuid_table[i]))
+		if (uuid_is_null(&xfs_uuid_table[i]))
 			continue;
 		if (!uuid_equal(uuid, &xfs_uuid_table[i]))
 			continue;
@@ -793,7 +792,10 @@ xfs_mountfs(
 	 *  Copies the low order bits of the timestamp and the randomly
 	 *  set "sequence" number out of a UUID.
 	 */
-	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+	mp->m_fixedfsid[0] =
+		(get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+		 get_unaligned_be16(&sbp->sb_uuid.b[4]);
+	mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 
 	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
 

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575..97df4db 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c

@@ -1766,7 +1766,8 @@ STATIC int __init
 xfs_init_zones(void)
 {
 	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
-			offsetof(struct xfs_ioend, io_inline_bio));
+			offsetof(struct xfs_ioend, io_inline_bio),
+			BIOSET_NEED_BVECS);
 	if (!xfs_ioend_bioset)
 		goto out;
 

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 197f3ff..c1b163c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h

@@ -61,17 +61,18 @@ bool acpi_ata_match(acpi_handle handle);
 bool acpi_bay_match(acpi_handle handle);
 bool acpi_dock_match(acpi_handle handle);
 
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
-union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 			u64 rev, u64 func, union acpi_object *argv4);
 
 static inline union acpi_object *
-acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
-			union acpi_object *argv4, acpi_object_type type)
+acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+			u64 func, union acpi_object *argv4,
+			acpi_object_type type)
 {
 	union acpi_object *obj;
 
-	obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+	obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	if (obj && obj->type != type) {
 		ACPI_FREE(obj);
 		obj = NULL;
@@ -210,7 +211,8 @@ struct acpi_device_flags {
 	u32 of_compatible_ok:1;
 	u32 coherent_dma:1;
 	u32 cca_seen:1;
-	u32 reserved:20;
+	u32 spi_i2c_slave:1;
+	u32 reserved:19;
 };
 
 /* File System */

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index d92543f..bdc55c0 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h

@@ -374,6 +374,20 @@ struct acpi_table_desc {
 	u16 validation_count;
 };
 
+/*
+ * Maximum value of the validation_count field in struct acpi_table_desc.
+ * When reached, validation_count cannot be changed any more and the table will
+ * be permanently regarded as validated.
+ *
+ * This is to prevent situations in which unbalanced table get/put operations
+ * may cause premature table unmapping in the OS to happen.
+ *
+ * The maximum validation count can be defined to any value, but should be
+ * greater than the maximum number of OS early stage mapping slots to avoid
+ * leaking early stage table mappings to the late stage.
+ */
+#define ACPI_MAX_TABLE_VALIDATIONS          ACPI_UINT16_MAX
+
 /* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL  (0)	/* Virtual address, external maintained */

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c0bd0d7..bb83731 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h

@@ -913,4 +913,55 @@ void drm_dp_aux_unregister(struct drm_dp_aux *aux);
 int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc);
 int drm_dp_stop_crc(struct drm_dp_aux *aux);
 
+struct drm_dp_dpcd_ident {
+	u8 oui[3];
+	u8 device_id[6];
+	u8 hw_rev;
+	u8 sw_major_rev;
+	u8 sw_minor_rev;
+} __packed;
+
+/**
+ * struct drm_dp_desc - DP branch/sink device descriptor
+ * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch).
+ * @quirks: Quirks; use drm_dp_has_quirk() to query for the quirks.
+ */
+struct drm_dp_desc {
+	struct drm_dp_dpcd_ident ident;
+	u32 quirks;
+};
+
+int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
+		     bool is_branch);
+
+/**
+ * enum drm_dp_quirk - Display Port sink/branch device specific quirks
+ *
+ * Display Port sink and branch devices in the wild have a variety of bugs, try
+ * to collect them here. The quirks are shared, but it's up to the drivers to
+ * implement workarounds for them.
+ */
+enum drm_dp_quirk {
+	/**
+	 * @DP_DPCD_QUIRK_LIMITED_M_N:
+	 *
+	 * The device requires main link attributes Mvid and Nvid to be limited
+	 * to 16 bits.
+	 */
+	DP_DPCD_QUIRK_LIMITED_M_N,
+};
+
+/**
+ * drm_dp_has_quirk() - does the DP device have a specific quirk
+ * @desc: Device decriptor filled by drm_dp_read_desc()
+ * @quirk: Quirk to query for
+ *
+ * Return true if DP device identified by @desc has @quirk.
+ */
+static inline bool
+drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk)
+{
+	return desc->quirks & BIT(quirk);
+}
+
 #endif /* _DRM_DP_HELPER_H_ */

diff --git a/include/dt-bindings/clock/sun50i-a64-ccu.h b/include/dt-bindings/clock/sun50i-a64-ccu.h
index 370c0a0..d66432c 100644
--- a/include/dt-bindings/clock/sun50i-a64-ccu.h
+++ b/include/dt-bindings/clock/sun50i-a64-ccu.h

@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_
 #define _DT_BINDINGS_CLK_SUN50I_A64_H_
 
+#define CLK_PLL_PERIPH0		11
+
 #define CLK_BUS_MIPI_DSI	28
 #define CLK_BUS_CE		29
 #define CLK_BUS_DMA		30

diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index c2afc41..e139fe5 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h

@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
 #define _DT_BINDINGS_CLK_SUN8I_H3_H_
 
+#define CLK_PLL_PERIPH0		9
+
 #define CLK_CPUX		14
 
 #define CLK_BUS_CE		20

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 137e4a3..cafdfb8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -26,6 +26,7 @@
 #include <linux/resource_ext.h>
 #include <linux/device.h>
 #include <linux/property.h>
+#include <linux/uuid.h>
 
 #ifndef _LINUX
 #define _LINUX
@@ -457,7 +458,6 @@ struct acpi_osc_context {
 	struct acpi_buffer ret;		/* free by caller if success */
 };
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
@@ -741,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 }
 
 static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
-						   const u8 *uuid,
+						   const guid_t *guid,
 						   int rev, int func,
 						   union acpi_object *argv4)
 {

diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index 2793652..a414a2b 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h

@@ -8,6 +8,7 @@
 #ifndef __BCM47XX_NVRAM_H
 #define __BCM47XX_NVRAM_H
 
+#include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d1b04b0..664a27d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
 /*
  * will die
  */
-#define bio_to_phys(bio)	(page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
 #define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
 
 /*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 	return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+	BIOSET_NEED_BVECS = BIT(0),
+	BIOSET_NEED_RESCUER = BIT(1),
+};
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
 {
-	bio->bi_error = -EIO;
+	bio->bi_status = BLK_STS_IOERR;
+	bio_endio(bio);
+}
+
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+	bio->bi_status = BLK_STS_AGAIN;
 	bio_endio(bio);
 }
 
@@ -426,6 +429,7 @@ extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
 		     unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c47aa24..1454230 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h

@@ -33,14 +33,12 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
-	wait_queue_t		dispatch_wait;
+	wait_queue_entry_t		dispatch_wait;
 	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
 	struct blk_mq_tags	*sched_tags;
 
-	struct srcu_struct	queue_rq_srcu;
-
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
 #endif
+
+	/* Must be the last member - see also blk_mq_hw_ctx_size(). */
+	struct srcu_struct	queue_rq_srcu[0];
 };
 
 struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
 	bool last;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+		const struct blk_mq_queue_data *);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
 	init_request_fn		*init_request;
 	exit_request_fn		*exit_request;
 	reinit_request_fn	*reinit_request;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	map_queues_fn		*map_queues;
 
@@ -155,10 +159,6 @@ struct blk_mq_ops {
 };
 
 enum {
-	BLK_MQ_RQ_QUEUE_OK	= 0,	/* queued fine */
-	BLK_MQ_RQ_QUEUE_BUSY	= 1,	/* requeue IO for later */
-	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
-
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
@@ -204,10 +204,10 @@ enum {
 	BLK_MQ_REQ_INTERNAL	= (1 << 2), /* allocate internal/sched tag */
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
-		unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+		unsigned int op, unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 enum {
@@ -230,15 +230,14 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
-void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 bool blk_mq_queue_stopped(struct request_queue *q);
@@ -248,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -265,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc..d2eb87c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h

@@ -17,6 +17,27 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define	BLK_STS_OK 0
+#define BLK_STS_NOTSUPP		((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT		((__force blk_status_t)2)
+#define BLK_STS_NOSPC		((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT	((__force blk_status_t)4)
+#define BLK_STS_TARGET		((__force blk_status_t)5)
+#define BLK_STS_NEXUS		((__force blk_status_t)6)
+#define BLK_STS_MEDIUM		((__force blk_status_t)7)
+#define BLK_STS_PROTECTION	((__force blk_status_t)8)
+#define BLK_STS_RESOURCE	((__force blk_status_t)9)
+#define BLK_STS_IOERR		((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN		((__force blk_status_t)12)
+
 struct blk_issue_stat {
 	u64 stat;
 };
@@ -28,13 +49,14 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
-	int			bi_error;
+	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
 						 */
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
+	unsigned short		bi_write_hint;
 
 	struct bvec_iter	bi_iter;
 
@@ -205,6 +227,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
+	__REQ_NOWAIT,           /* Don't wait if request will block */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -223,6 +246,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab92c4e..25f6a0c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -55,7 +55,7 @@ struct blk_stat_callback;
  */
 #define BLKCG_MAX_POLS		3
 
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
 #define BLK_RL_SYNCFULL		(1U << 0)
 #define BLK_RL_ASYNCFULL	(1U << 1)
@@ -225,6 +225,8 @@ struct request {
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
+	unsigned short write_hint;
+
 	unsigned long deadline;
 	struct list_head timeout_list;
 
@@ -391,6 +393,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	atomic_t		shared_hctx_restart;
+
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
@@ -410,8 +414,12 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	/* Called just after a request is allocated */
 	init_rq_fn		*init_rq_fn;
+	/* Called just before a request is freed */
 	exit_rq_fn		*exit_rq_fn;
+	/* Called from inside blk_get_request() */
+	void (*initialize_rq_fn)(struct request *rq);
 
 	const struct blk_mq_ops	*mq_ops;
 
@@ -586,6 +594,11 @@ struct request_queue {
 
 	size_t			cmd_size;
 	void			*rq_alloc_data;
+
+	struct work_struct	release_work;
+
+#define BLK_MAX_WRITE_HINTS	5
+	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
@@ -618,6 +631,8 @@ struct request_queue {
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
 #define QUEUE_FLAG_POLL_STATS  28	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29	/* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30	/* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED    31	/* queue has been quiesced */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -629,6 +644,13 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_POLL))
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
 	if (q->queue_lock)
@@ -708,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q)	\
+	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 
 static inline bool blk_account_rq(struct request *rq)
 {
@@ -810,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 {
-	if (bio_data(a) == bio_data(b))
+	if (bio_page(a) == bio_page(b) &&
+	    bio_offset(a) == bio_offset(b))
 		return true;
 
 	return false;
@@ -858,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
-	return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
 struct rq_map_data {
 	struct page **pages;
 	int page_order;
@@ -929,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+				       gfp_t gfp_mask);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -937,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     int (*bio_ctr)(struct bio *, struct bio *, void *),
 			     void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-			    struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -963,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
@@ -977,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
 bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1109,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
 			       unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
 			    unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
 			      unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
@@ -1370,11 +1385,6 @@ enum blk_default_limits {
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
-	return q->limits.bounce_pfn;
-}
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
@@ -1776,7 +1786,7 @@ struct blk_integrity_iter {
 	const char		*disk_name;
 };
 
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity_profile {
 	integrity_processing_fn		*generate_fn;

diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
index aa2e191..51c5bd6 100644
--- a/include/linux/ceph/ceph_debug.h
+++ b/include/linux/ceph/ceph_debug.h

@@ -3,6 +3,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/string.h>
+
 #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
 
 /*
@@ -12,12 +14,10 @@
  */
 
 # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
-extern const char *ceph_file_part(const char *s, int len);
 #  define dout(fmt, ...)						\
 	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
 		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
-		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
-		 __LINE__, ##__VA_ARGS__)
+		 kbasename(__FILE__), __LINE__, ##__VA_ARGS__)
 # else
 /* faux printk call just to see any compiler warnings. */
 #  define dout(fmt, ...)	do {				\

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 2174594..ec47101 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h

@@ -48,6 +48,7 @@ enum {
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 	CSS_VISIBLE	= (1 << 3), /* css is visible to userland */
+	CSS_DYING	= (1 << 4), /* css is dying */
 };
 
 /* bits in struct cgroup flags field */

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed2573e..710a005 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h

@@ -344,6 +344,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 }
 
 /**
+ * css_is_dying - test whether the specified css is dying
+ * @css: target css
+ *
+ * Test whether @css is in the process of offlining or already offline.  In
+ * most cases, ->css_online() and ->css_offline() callbacks should be
+ * enough; however, the actual offline operations are RCU delayed and this
+ * test returns %true also when @css is scheduled to be offlined.
+ *
+ * This is useful, for example, when the use case requires synchronous
+ * behavior with respect to cgroup removal.  cgroup removal schedules css
+ * offlining but the css can seem alive while the operation is being
+ * delayed.  If the delay affects user visible semantics, this test can be
+ * used to resolve the situation.
+ */
+static inline bool css_is_dying(struct cgroup_subsys_state *css)
+{
+	return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
+}
+
+/**
  * css_put - put a css reference
  * @css: target css
  *

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index fccf7f4..bbb3712 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h

@@ -27,7 +27,7 @@ struct cleancache_filekey {
 
 struct cleancache_ops {
 	int (*init_fs)(size_t);
-	int (*init_shared_fs)(char *uuid, size_t);
+	int (*init_shared_fs)(uuid_t *uuid, size_t);
 	int (*get_page)(int, struct cleancache_filekey,
 			pgoff_t, struct page *);
 	void (*put_page)(int, struct cleancache_filekey,

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f2b10d9..8149045 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h

@@ -96,6 +96,7 @@ struct clocksource {
 	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
 	void (*mark_unstable)(struct clocksource *cs);
+	void (*tick_stable)(struct clocksource *cs);
 
 	/* private: */
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index de17999..d614c5e 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h

@@ -15,3 +15,11 @@
  * with any version that can compile the kernel
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+/*
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well.
+ */
+#undef inline
+#define inline inline __attribute__((unused)) notrace

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f811005..707242fd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -17,11 +17,7 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
-#ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else /* CONFIG_SPARSE_RCU_POINTER */
-# define __rcu
-#endif /* CONFIG_SPARSE_RCU_POINTER */
 # define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 2319b8c..c967090 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h

@@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item,
 				       const char *name,
 				       struct config_item_type *type);
 
-extern struct config_item * config_item_get(struct config_item *);
+extern struct config_item *config_item_get(struct config_item *);
+extern struct config_item *config_item_get_unless_zero(struct config_item *);
 extern void config_item_put(struct config_item *);
 
 struct config_item_type {

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2404ad2..4bf4479 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h

@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node);
 		(cpu) = cpumask_next_zero((cpu), (mask)),	\
 		(cpu) < nr_cpu_ids;)
 
+extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+
+/**
+ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask poiter
+ * @start: the start location
+ *
+ * The implementation does not assume any bit in @mask is set (including @start).
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_wrap(cpu, mask, start)					\
+	for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false);	\
+	     (cpu) < nr_cpumask_bits;						\
+	     (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
@@ -276,6 +293,12 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+{
+	__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
@@ -286,6 +309,11 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
 	clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
 /**
  * cpumask_test_cpu - test for a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c..456da50 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h

@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error);
+			    struct bio *bio, blk_status_t *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
-				    struct request *clone, int error,
+				    struct request *clone, blk_status_t error,
 				    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);

diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 4eac267..92f2083 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h

@@ -78,6 +78,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
 struct iommu_domain;
 struct msi_msg;
+struct device;
 
 static inline int iommu_dma_init(void)
 {

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 5e9c74c..9bbf21a 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h

@@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; }
 static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
-	void *private_data) { return -1; }
+	void *private_data) { return -ENXIO; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
 	{ return false; }
 static inline void dmi_memdev_name(u16 handle, const char **bank,

diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec36f42..8269bcb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h

@@ -137,6 +137,18 @@ struct efi_boot_memmap {
 #define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
 #define EFI_CAPSULE_INITIATE_RESET		0x00040000
 
+struct capsule_info {
+	efi_capsule_header_t	header;
+	int			reset_type;
+	long			index;
+	size_t			count;
+	size_t			total_size;
+	phys_addr_t		*pages;
+	size_t			page_bytes_remain;
+};
+
+int __efi_capsule_setup_info(struct capsule_info *cap_info);
+
 /*
  * Allocation types for calls to boottime->allocate_pages.
  */
@@ -1403,7 +1415,7 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
 				 size_t size, int *reset);
 
 extern int efi_capsule_update(efi_capsule_header_t *capsule,
-			      struct page **pages);
+			      phys_addr_t *pages);
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 9ec5e22..5bc8f86 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h

@@ -104,8 +104,9 @@ struct elevator_mq_ops {
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
-	void (*put_request)(struct request *);
+	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*prepare_request)(struct request *, struct bio *bio);
+	void (*finish_request)(struct request *);
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-	int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
-	void (*put_rq_priv)(struct request_queue *, struct request *);
 	void (*init_icq)(struct io_cq *);
 	void (*exit_icq)(struct io_cq *);
 };
@@ -153,7 +152,7 @@ struct elevator_type
 #endif
 
 	/* managed by elevator core */
-	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
+	char icq_cache_name[ELV_NAME_MAX + 6];	/* elvname + "_io_cq" */
 	struct list_head list;
 };
 

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index ff0b981..9e4befd 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h

@@ -37,7 +37,7 @@ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
@@ -73,7 +73,7 @@ static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
 }
 
 static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
-						wait_queue_t *wait, __u64 *cnt)
+						wait_queue_entry_t *wait, __u64 *cnt)
 {
 	return -ENOSYS;
 }

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 56197f8..62d948f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h

@@ -272,6 +272,16 @@ struct bpf_prog_aux;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 803e5a9..771fe11 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -2,7 +2,7 @@
 #define _LINUX_FS_H
 
 #include <linux/linkage.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/kdev_t.h>
 #include <linux/dcache.h>
 #include <linux/path.h>
@@ -20,6 +20,7 @@
 #include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
+#include <linux/fcntl.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
@@ -30,6 +31,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/delayed_call.h>
+#include <linux/uuid.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -142,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT	((__force fmode_t)0x8000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -261,6 +266,18 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+	WRITE_LIFE_NOT_SET	= 0,
+	WRITE_LIFE_NONE		= RWH_WRITE_LIFE_NONE,
+	WRITE_LIFE_SHORT	= RWH_WRITE_LIFE_SHORT,
+	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
+	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
+	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
+};
+
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
@@ -268,6 +285,7 @@ struct writeback_control;
 #define IOCB_DSYNC		(1 << 4)
 #define IOCB_SYNC		(1 << 5)
 #define IOCB_WRITE		(1 << 6)
+#define IOCB_NOWAIT		(1 << 7)
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -275,6 +293,7 @@ struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
+	enum rw_hint		ki_hint;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -282,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 	return kiocb->ki_complete == NULL;
 }
 
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-		.ki_filp = filp,
-		.ki_flags = iocb_flags(filp),
-	};
-}
-
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -592,6 +601,7 @@ struct inode {
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	unsigned int		i_blkbits;
+	enum rw_hint		i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
@@ -846,6 +856,7 @@ struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
+	enum rw_hint		f_write_hint;
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1021,8 +1032,6 @@ struct file_lock_context {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-#include <linux/fcntl.h>
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 /*
@@ -1328,8 +1337,8 @@ struct super_block {
 
 	struct sb_writers	s_writers;
 
-	char s_id[32];				/* Informational name */
-	u8 s_uuid[16];				/* UUID */
+	char			s_id[32];	/* Informational name */
+	uuid_t			s_uuid;		/* UUID */
 
 	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
@@ -1873,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
 }
 
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+		return file->f_write_hint;
+
+	return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+		.ki_flags = iocb_flags(filp),
+		.ki_hint = file_write_hint(filp),
+	};
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
@@ -2517,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
 				   loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+				  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
@@ -2843,7 +2873,7 @@ enum {
 	DIO_SKIP_DIO_COUNT = 0x08,
 };
 
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
 
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 			     struct block_device *bdev, struct iov_iter *iter,
@@ -3056,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
 	return res;
 }
 
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+	if (unlikely(flags & ~RWF_SUPPORTED))
+		return -EOPNOTSUPP;
+
+	if (flags & RWF_NOWAIT) {
+		if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+			return -EOPNOTSUPP;
+		ki->ki_flags |= IOCB_NOWAIT;
+	}
+	if (flags & RWF_HIPRI)
+		ki->ki_flags |= IOCB_HIPRI;
+	if (flags & RWF_DSYNC)
+		ki->ki_flags |= IOCB_DSYNC;
+	if (flags & RWF_SYNC)
+		ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+	return 0;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index acff943..e619fae 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h

@@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
 	return NULL;
 }
 
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	uuid_be_to_bin(uuid_str, (uuid_be *)to);
-	return 0;
-}
-
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno)
 	dev_t devt = MKDEV(0, 0);
 	return devt;
 }
-
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-	return -EINVAL;
-}
 #endif /* CONFIG_BLOCK */
 
 #endif /* _LINUX_GENHD_H */

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b1a44f5..a89d37e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h

@@ -41,7 +41,7 @@ struct vm_area_struct;
 #define ___GFP_WRITE		0x800000u
 #define ___GFP_KSWAPD_RECLAIM	0x1000000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x4000000u
+#define ___GFP_NOLOCKDEP	0x2000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif

diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index c0d712d..f738d50 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h

@@ -56,7 +56,14 @@ struct gpiod_lookup_table {
 	.flags = _flags,                                                  \
 }
 
+#ifdef CONFIG_GPIOLIB
 void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
 void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
+#else
+static inline
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
+static inline
+void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
+#endif
 
 #endif /* __LINUX_GPIO_MACHINE_H */

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 661e5c2..082dc1b 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h

@@ -167,7 +167,6 @@ static inline void hash_del_rcu(struct hlist_node *node)
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
  * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca3..dc152e4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h

@@ -671,7 +671,7 @@ struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-	int	(*reset_poll)(ide_drive_t *);
+	blk_status_t (*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
 	void	(*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
 void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
 			      const struct ide_devset *setting, int arg);
 
 void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
 
 void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
 void ide_tf_dump(const char *, struct ide_cmd *);

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8d5fcd6..283dc2f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h

@@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						    netdev_features_t features)
 {
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_HW_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged_multi(skb)) {
+		/* In the case of multi-tagged packets, use a direct mask
+		 * instead of using netdev_interesect_features(), to make
+		 * sure that only devices supporting NETIF_F_HW_CSUM will
+		 * have checksum offloading support.
+		 */
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			    NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_HW_VLAN_STAG_TX;
+	}
 
 	return features;
 }

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e78..69f4e94 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h

@@ -52,6 +52,7 @@ struct iomap {
 #define IOMAP_REPORT		(1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT		(1 << 5) /* Don't wait for writeback */
 
 struct iomap_ops {
 	/*

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index fffb912..1fa293a 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h

@@ -417,6 +417,10 @@
 #define ICH_HCR_EN			(1 << 0)
 #define ICH_HCR_UIE			(1 << 1)
 
+#define ICH_VMCR_ACK_CTL_SHIFT		2
+#define ICH_VMCR_ACK_CTL_MASK		(1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT		3
+#define ICH_VMCR_FIQ_EN_MASK		(1 << ICH_VMCR_FIQ_EN_SHIFT)
 #define ICH_VMCR_CBPR_SHIFT		4
 #define ICH_VMCR_CBPR_MASK		(1 << ICH_VMCR_CBPR_SHIFT)
 #define ICH_VMCR_EOIM_SHIFT		9

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30f3d..d3453ee 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h

@@ -25,7 +25,18 @@
 #define GICC_ENABLE			0x1
 #define GICC_INT_PRI_THRESHOLD		0xf0
 
-#define GIC_CPU_CTRL_EOImodeNS		(1 << 9)
+#define GIC_CPU_CTRL_EnableGrp0_SHIFT	0
+#define GIC_CPU_CTRL_EnableGrp0		(1 << GIC_CPU_CTRL_EnableGrp0_SHIFT)
+#define GIC_CPU_CTRL_EnableGrp1_SHIFT	1
+#define GIC_CPU_CTRL_EnableGrp1		(1 << GIC_CPU_CTRL_EnableGrp1_SHIFT)
+#define GIC_CPU_CTRL_AckCtl_SHIFT	2
+#define GIC_CPU_CTRL_AckCtl		(1 << GIC_CPU_CTRL_AckCtl_SHIFT)
+#define GIC_CPU_CTRL_FIQEn_SHIFT	3
+#define GIC_CPU_CTRL_FIQEn		(1 << GIC_CPU_CTRL_FIQEn_SHIFT)
+#define GIC_CPU_CTRL_CBPR_SHIFT		4
+#define GIC_CPU_CTRL_CBPR		(1 << GIC_CPU_CTRL_CBPR_SHIFT)
+#define GIC_CPU_CTRL_EOImodeNS_SHIFT	9
+#define GIC_CPU_CTRL_EOImodeNS		(1 << GIC_CPU_CTRL_EOImodeNS_SHIFT)
 
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
@@ -84,8 +95,19 @@
 #define GICH_LR_EOI			(1 << 19)
 #define GICH_LR_HW			(1 << 31)
 
-#define GICH_VMCR_CTRL_SHIFT		0
-#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_ENABLE_GRP0_SHIFT	0
+#define GICH_VMCR_ENABLE_GRP0_MASK	(1 << GICH_VMCR_ENABLE_GRP0_SHIFT)
+#define GICH_VMCR_ENABLE_GRP1_SHIFT	1
+#define GICH_VMCR_ENABLE_GRP1_MASK	(1 << GICH_VMCR_ENABLE_GRP1_SHIFT)
+#define GICH_VMCR_ACK_CTL_SHIFT		2
+#define GICH_VMCR_ACK_CTL_MASK		(1 << GICH_VMCR_ACK_CTL_SHIFT)
+#define GICH_VMCR_FIQ_EN_SHIFT		3
+#define GICH_VMCR_FIQ_EN_MASK		(1 << GICH_VMCR_FIQ_EN_SHIFT)
+#define GICH_VMCR_CBPR_SHIFT		4
+#define GICH_VMCR_CBPR_MASK		(1 << GICH_VMCR_CBPR_SHIFT)
+#define GICH_VMCR_EOI_MODE_SHIFT	9
+#define GICH_VMCR_EOI_MODE_MASK		(1 << GICH_VMCR_EOI_MODE_SHIFT)
+
 #define GICH_VMCR_PRIMASK_SHIFT		27
 #define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
 #define GICH_VMCR_BINPOINT_SHIFT	21

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 36872fb..734377a 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h

@@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
+#ifndef __jiffy_arch_data
+#define __jiffy_arch_data
+#endif
+
 /*
  * The 64-bit value is not atomic - you MUST NOT read it
  * without sampling the sequence number in jiffies_lock.
  * get_jiffies_64() will do this for you as appropriate.
  */
 extern u64 __cacheline_aligned_in_smp jiffies_64;
-extern unsigned long volatile __cacheline_aligned_in_smp jiffies;
+extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 13bc08a..1c91f26 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h

@@ -490,9 +490,13 @@ extern int root_mountflags;
 
 extern bool early_boot_irqs_disabled;
 
-/* Values used for system_state */
+/*
+ * Values used for system_state. Ordering of the states must not be changed
+ * as code checks for <, <=, >, >= STATE.
+ */
 extern enum system_states {
 	SYSTEM_BOOTING,
+	SYSTEM_SCHEDULING,
 	SYSTEM_RUNNING,
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,

diff --git a/include/linux/key.h b/include/linux/key.h
index 0c9b93b..78e25aa 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h

@@ -173,7 +173,6 @@ struct key {
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
 #define KEY_DEBUG_MAGIC		0x18273645u
-#define KEY_DEBUG_MAGIC_X	0xf8e9dacbu
 #endif
 
 	unsigned long		flags;		/* status flags (change with bitops) */

diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 0c1de05..76c2fbc 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h

@@ -46,7 +46,7 @@ struct kvm_kernel_irqfd_resampler {
 struct kvm_kernel_irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	/* Update side is protected by irqfds.lock */
 	struct kvm_kernel_irq_routing_entry irq_entry;
 	seqcount_t irq_entry_sc;

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 171baa9..d117381 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h

@@ -110,6 +110,25 @@ static inline void init_llist_head(struct llist_head *list)
 	for ((pos) = (node); pos; (pos) = (pos)->next)
 
 /**
+ * llist_for_each_safe - iterate over some deleted entries of a lock-less list
+ *			 safe against removal of list entry
+ * @pos:	the &struct llist_node to use as a loop cursor
+ * @n:		another &struct llist_node to use as temporary storage
+ * @node:	the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_safe(pos, n, node)			\
+	for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n))
+
+/**
  * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
  * @pos:	the type * to use as a loop cursor.
  * @node:	the fist entry of deleted list entries.

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4ce24a3..8098695 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h

@@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 }
 #endif
 
+extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr);
 #else
 static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return 0;
 }
 
+static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
+		phys_addr_t end_addr)
+{
+	return 0;
+}
+
 #endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index b4ee8f6..8e2828d 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h

@@ -470,6 +470,7 @@ struct mlx4_update_qp_params {
 	u16	rate_val;
 };
 
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn);
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		   enum mlx4_update_qp_attr attr,
 		   struct mlx4_update_qp_params *params);

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dd9a263..a940ec6 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h

@@ -787,8 +787,14 @@ enum {
 };
 
 enum {
-	CQE_RSS_HTYPE_IP	= 0x3 << 6,
-	CQE_RSS_HTYPE_L4	= 0x3 << 2,
+	CQE_RSS_HTYPE_IP	= 0x3 << 2,
+	/* cqe->rss_hash_type[3:2] - IP destination selected for hash
+	 * (00 = none,  01 = IPv4, 10 = IPv6, 11 = Reserved)
+	 */
+	CQE_RSS_HTYPE_L4	= 0x3 << 6,
+	/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
+	 * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
+	 */
 };
 
 enum {

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bcdf739..93273d9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h

@@ -787,7 +787,12 @@ enum {
 
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
+enum {
+	MLX5_CMD_ENT_STATE_PENDING_COMP,
+};
+
 struct mlx5_cmd_work_ent {
+	unsigned long		state;
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
 	void		       *uout;
@@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
 void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 32de072..edafedb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h

@@ -766,6 +766,12 @@ enum {
 	MLX5_CAP_PORT_TYPE_ETH = 0x1,
 };
 
+enum {
+	MLX5_CAP_UMR_FENCE_STRONG	= 0x0,
+	MLX5_CAP_UMR_FENCE_SMALL	= 0x1,
+	MLX5_CAP_UMR_FENCE_NONE		= 0x2,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x80];
 
@@ -875,7 +881,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_202[0x1];
 	u8         ipoib_enhanced_offloads[0x1];
 	u8         ipoib_basic_offloads[0x1];
-	u8         reserved_at_205[0xa];
+	u8         reserved_at_205[0x5];
+	u8         umr_fence[0x2];
+	u8         reserved_at_20c[0x3];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cb17c6..6f543a47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -1393,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 {
 	return !vma->vm_ops;
@@ -1414,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma);
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
-					     unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSDOWN) &&
-		(vma->vm_start == addr) &&
-		!vma_growsdown(vma->vm_prev, addr);
-}
-
-/* Is the vma a continuation of the stack vma below it? */
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
-{
-	return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
-}
-
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
-					   unsigned long addr)
-{
-	return (vma->vm_flags & VM_GROWSUP) &&
-		(vma->vm_end == addr) &&
-		!vma_growsup(vma->vm_next, addr);
-}
-
 int vma_is_stack_for_current(struct vm_area_struct *vma);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
@@ -2222,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping,
 				pgoff_t offset,
 				unsigned long size);
 
+extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 
@@ -2250,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
 	return vma;
 }
 
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_start = vma->vm_start;
+
+	if (vma->vm_flags & VM_GROWSDOWN) {
+		vm_start -= stack_guard_gap;
+		if (vm_start > vma->vm_start)
+			vm_start = 0;
+	}
+	return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_end = vma->vm_end;
+
+	if (vma->vm_flags & VM_GROWSUP) {
+		vm_end += stack_guard_gap;
+		if (vm_end < vma->vm_end)
+			vm_end = -PAGE_SIZE;
+	}
+	return vm_end;
+}
+
 static inline unsigned long vma_pages(struct vm_area_struct *vma)
 {
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -2327,6 +2324,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_COW	0x4000	/* internal GUP flag */
 
+static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+{
+	if (vm_fault & VM_FAULT_OOM)
+		return -ENOMEM;
+	if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+		return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT;
+	if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
+		return -EFAULT;
+	return 0;
+}
+
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ebaccd4..ef6a13b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h

@@ -678,6 +678,7 @@ typedef struct pglist_data {
 	 * is the first PFN that needs to be initialised.
 	 */
 	unsigned long first_deferred_pfn;
+	unsigned long static_init_size;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 566fda5..3f74ef2 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h

@@ -467,6 +467,7 @@ enum dmi_field {
 	DMI_PRODUCT_VERSION,
 	DMI_PRODUCT_SERIAL,
 	DMI_PRODUCT_UUID,
+	DMI_PRODUCT_FAMILY,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6be1949..1ee7b30 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h

@@ -457,7 +457,7 @@ enum hwparam_type {
 	hwparam_ioport,		/* Module parameter configures an I/O port */
 	hwparam_iomem,		/* Module parameter configures an I/O mem address */
 	hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
-	hwparam_irq,		/* Module parameter configures an I/O port */
+	hwparam_irq,		/* Module parameter configures an IRQ */
 	hwparam_dma,		/* Module parameter configures a DMA channel */
 	hwparam_dma_addr,	/* Module parameter configures a DMA buffer address */
 	hwparam_other,		/* Module parameter configures some other value */

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3f39d27..4ed952c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -914,8 +914,7 @@ struct xfrmdev_ops {
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
  *	Called when a user wants to change the Maximum Transfer Unit
- *	of a device. If not defined, any request to change MTU will
- *	will return an error.
+ *	of a device.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
  *	Callback used when the transmitter has not made any progress
@@ -1596,8 +1595,8 @@ enum netdev_priv_flags {
  *	@rtnl_link_state:	This enum represents the phases of creating
  *				a new link
  *
- *	@destructor:		Called from unregister,
- *				can be used to call free_netdev
+ *	@needs_free_netdev:	Should unregister perform free_netdev?
+ *	@priv_destructor:	Called from unregister
  *	@npinfo:		XXX: need comments on this one
  * 	@nd_net:		Network namespace this network device is inside
  *
@@ -1858,7 +1857,8 @@ struct net_device {
 		RTNL_LINK_INITIALIZING,
 	} rtnl_link_state:16;
 
-	void (*destructor)(struct net_device *dev);
+	bool needs_free_netdev;
+	void (*priv_destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
 	struct netpoll_info __rcu	*npinfo;
@@ -4261,6 +4261,11 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
+static inline bool netdev_unregistering(const struct net_device *dev)
+{
+	return dev->reg_state == NETREG_UNREGISTERING;
+}
+
 static inline const char *netdev_reg_state(const struct net_device *dev)
 {
 	switch (dev->reg_state) {

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index be378cf..b3044c2c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h

@@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m,
 int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u);
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size);
+		    int usersize, int size, int aligned_size);
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a30efb4..e0cbf17 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h

@@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
 /* True if the target is not a standard target */
 #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
 
+static inline bool ebt_invalid_target(int target)
+{
+	return (target < -NUM_STANDARD_TARGETS || target >= 0);
+}
+
 #endif

diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index e997c4a..bc711a1 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h

@@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt {
 };
 
 
-#define FCNVME_ASSOC_HOSTID_LEN		16
 #define FCNVME_ASSOC_HOSTNQN_LEN	256
 #define FCNVME_ASSOC_SUBNQN_LEN		256
 
@@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	__be16	cntlid;
 	__be16	sqsize;
 	__be32	rsvd52;
-	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
 	u8	rsvd632[384];

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bac..6b8ee9e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h

@@ -16,6 +16,7 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /* NQN names in commands fields specified one size */
 #define NVMF_NQN_FIELD_LEN	256
@@ -86,7 +87,7 @@ enum {
 	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
 };
 
-#define NVMF_AQ_DEPTH		32
+#define NVME_AQ_DEPTH		32
 
 enum {
 	NVME_REG_CAP	= 0x0000,	/* Controller Capabilities */
@@ -101,6 +102,7 @@ enum {
 	NVME_REG_ACQ	= 0x0030,	/* Admin CQ Base Address */
 	NVME_REG_CMBLOC = 0x0038,	/* Controller Memory Buffer Location */
 	NVME_REG_CMBSZ	= 0x003c,	/* Controller Memory Buffer Size */
+	NVME_REG_DBS	= 0x1000,	/* SQ 0 Tail Doorbell */
 };
 
 #define NVME_CAP_MQES(cap)	((cap) & 0xffff)
@@ -207,9 +209,15 @@ struct nvme_id_ctrl {
 	__u8			tnvmcap[16];
 	__u8			unvmcap[16];
 	__le32			rpmbs;
-	__u8			rsvd316[4];
+	__le16			edstt;
+	__u8			dsto;
+	__u8			fwug;
 	__le16			kas;
-	__u8			rsvd322[190];
+	__le16			hctma;
+	__le16			mntmt;
+	__le16			mxtmt;
+	__le32			sanicap;
+	__u8			rsvd332[180];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
@@ -245,6 +253,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
 	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 7,
 };
 
@@ -274,7 +283,7 @@ struct nvme_id_ns {
 	__le16			nabsn;
 	__le16			nabo;
 	__le16			nabspf;
-	__u16			rsvd46;
+	__le16			noiob;
 	__u8			nvmcap[16];
 	__u8			rsvd64[40];
 	__u8			nguid[16];
@@ -288,6 +297,7 @@ enum {
 	NVME_ID_CNS_NS			= 0x00,
 	NVME_ID_CNS_CTRL		= 0x01,
 	NVME_ID_CNS_NS_ACTIVE_LIST	= 0x02,
+	NVME_ID_CNS_NS_DESC_LIST	= 0x03,
 	NVME_ID_CNS_NS_PRESENT_LIST	= 0x10,
 	NVME_ID_CNS_NS_PRESENT		= 0x11,
 	NVME_ID_CNS_CTRL_NS_LIST	= 0x12,
@@ -295,6 +305,19 @@ enum {
 };
 
 enum {
+	NVME_DIR_IDENTIFY		= 0x00,
+	NVME_DIR_STREAMS		= 0x01,
+	NVME_DIR_SND_ID_OP_ENABLE	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_ID	= 0x01,
+	NVME_DIR_SND_ST_OP_REL_RSC	= 0x02,
+	NVME_DIR_RCV_ID_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_PARAM	= 0x01,
+	NVME_DIR_RCV_ST_OP_STATUS	= 0x02,
+	NVME_DIR_RCV_ST_OP_RESOURCE	= 0x03,
+	NVME_DIR_ENDIR			= 0x01,
+};
+
+enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
 	NVME_NS_FLBAS_LBA_MASK	= 0xf,
 	NVME_NS_FLBAS_META_EXT	= 0x10,
@@ -314,6 +337,22 @@ enum {
 	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
+struct nvme_ns_id_desc {
+	__u8 nidt;
+	__u8 nidl;
+	__le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN	8
+#define NVME_NIDT_NGUID_LEN	16
+#define NVME_NIDT_UUID_LEN	16
+
+enum {
+	NVME_NIDT_EUI64		= 0x01,
+	NVME_NIDT_NGUID		= 0x02,
+	NVME_NIDT_UUID		= 0x03,
+};
+
 struct nvme_smart_log {
 	__u8			critical_warning;
 	__u8			temperature[2];
@@ -535,6 +574,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
+	NVME_RW_DTYPE_STREAMS		= 1 << 4,
 };
 
 struct nvme_dsm_cmd {
@@ -586,6 +626,11 @@ struct nvme_feat_auto_pst {
 	__le64 entries[32];
 };
 
+enum {
+	NVME_HOST_MEM_ENABLE	= (1 << 0),
+	NVME_HOST_MEM_RETURN	= (1 << 1),
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -604,6 +649,8 @@ enum nvme_admin_opcode {
 	nvme_admin_download_fw		= 0x11,
 	nvme_admin_ns_attach		= 0x15,
 	nvme_admin_keep_alive		= 0x18,
+	nvme_admin_directive_send	= 0x19,
+	nvme_admin_directive_recv	= 0x1a,
 	nvme_admin_dbbuf		= 0x7C,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
@@ -658,6 +705,8 @@ struct nvme_identify {
 	__u32			rsvd11[5];
 };
 
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
 struct nvme_features {
 	__u8			opcode;
 	__u8			flags;
@@ -667,7 +716,16 @@ struct nvme_features {
 	union nvme_data_ptr	dptr;
 	__le32			fid;
 	__le32			dword11;
-	__u32			rsvd12[4];
+	__le32                  dword12;
+	__le32                  dword13;
+	__le32                  dword14;
+	__le32                  dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+	__le64			addr;
+	__le32			size;
+	__u32			rsvd;
 };
 
 struct nvme_create_cq {
@@ -756,6 +814,24 @@ struct nvme_get_log_page_command {
 	__u32			rsvd14[2];
 };
 
+struct nvme_directive_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__le32			numd;
+	__u8			doper;
+	__u8			dtype;
+	__le16			dspec;
+	__u8			endir;
+	__u8			tdtype;
+	__u16			rsvd15;
+
+	__u32			rsvd16[3];
+};
+
 /*
  * Fabrics subcommands.
  */
@@ -843,7 +919,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-	__u8		hostid[16];
+	uuid_t		hostid;
 	__le16		cntlid;
 	char		resv4[238];
 	char		subsysnqn[NVMF_NQN_FIELD_LEN];
@@ -886,6 +962,18 @@ struct nvme_dbbuf {
 	__u32			rsvd12[6];
 };
 
+struct streams_directive_params {
+	__u16	msl;
+	__u16	nssa;
+	__u16	nsso;
+	__u8	rsvd[10];
+	__u32	sws;
+	__u16	sgs;
+	__u16	nsa;
+	__u16	nso;
+	__u8	rsvd2[6];
+};
+
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
@@ -906,6 +994,7 @@ struct nvme_command {
 		struct nvmf_property_set_command prop_set;
 		struct nvmf_property_get_command prop_get;
 		struct nvme_dbbuf dbbuf;
+		struct nvme_directive_cmd directive;
 	};
 };
 
@@ -1050,4 +1139,8 @@ struct nvme_completion {
 #define NVME_VS(major, minor, tertiary) \
 	(((major) << 16) | ((minor) << 8) | (tertiary))
 
+#define NVME_MAJOR(ver)		((ver) >> 16)
+#define NVME_MINOR(ver)		(((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver)	((ver) & 0xff)
+
 #endif /* _LINUX_NVME_H */

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index dc8224a..e0d1946 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h

@@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np,
 						   const char *bus_id,
 						   struct device *parent);
 
+extern int of_platform_device_destroy(struct device *dev, void *data);
 extern int of_platform_bus_probe(struct device_node *root,
 				 const struct of_device_id *matches,
 				 struct device *parent);

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 316a19f..e7bbd9d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -524,7 +524,7 @@ void page_endio(struct page *page, bool is_write, int err);
 /*
  * Add an arbitrary waiter to a page's wait queue
  */
-extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
+extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 
 /*
  * Fault everything in given userspace address range in.

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 7a4e83a..dd86c97 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h

@@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { }
 static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
 #endif
 
-extern const u8 pci_acpi_dsm_uuid[];
+extern const guid_t pci_acpi_dsm_guid;
 #define DEVICE_LABEL_DSM	0x07
 #define RESET_DELAY_DSM		0x08
 #define FUNCTION_DELAY_DSM	0x09

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 33c2b0b..8039f9f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h

@@ -183,6 +183,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
 	/* Do not use FLR even if device advertises PCI_AF_CAP */
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
+	/*
+	 * Resume before calling the driver's system suspend hooks, disabling
+	 * the direct_complete optimization.
+	 */
+	PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11),
 };
 
 enum pci_irq_reroute_variant {
@@ -1342,9 +1347,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
 			       const struct irq_affinity *aff_desc)
 {
-	if (min_vecs > 1)
-		return -EINVAL;
-	return 1;
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
+		return 1;
+	return -ENOSPC;
 }
 
 static inline void pci_free_irq_vectors(struct pci_dev *dev)

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 279e3c5..7620eb1 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h

@@ -42,8 +42,6 @@
  * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high
  *	impedance to VDD). If the argument is != 0 pull-up is enabled,
  *	if it is 0, pull-up is total, i.e. the pin is connected to VDD.
- * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous
- *	input and output operations.
  * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open
  *	collector) which means it is usually wired with other output ports
  *	which are then pulled up with an external resistor. Setting this
@@ -98,7 +96,6 @@ enum pin_config_param {
 	PIN_CONFIG_BIAS_PULL_DOWN,
 	PIN_CONFIG_BIAS_PULL_PIN_DEFAULT,
 	PIN_CONFIG_BIAS_PULL_UP,
-	PIN_CONFIG_BIDIRECTIONAL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_PUSH_PULL,

diff --git a/include/linux/poll.h b/include/linux/poll.h
index 75ffc57..2889f09 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h

@@ -75,7 +75,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 struct poll_table_entry {
 	struct file *filp;
 	unsigned long key;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_address;
 };
 

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 422bc2e..ef3eb8b 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h

@@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
-			  struct task_struct *new_parent);
+			  struct task_struct *new_parent,
+			  const struct cred *ptracer_cred);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
@@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 	if (unlikely(ptrace) && current->ptrace) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		__ptrace_link(child, current->parent, current->ptracer_cred);
 
 		if (child->ptrace & PT_SEIZED)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
@@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 
 		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
+	else
+		child->ptracer_cred = NULL;
 }
 
 /**

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9c6f768..dda22f4 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h

@@ -44,6 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 void inode_reclaim_rsv_space(struct inode *inode, qsize_t number);
 
 int dquot_initialize(struct inode *inode);
+bool dquot_initialize_needed(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, struct kqid qid);
 static inline struct dquot *dqgrab(struct dquot *dquot)
@@ -207,6 +208,11 @@ static inline int dquot_initialize(struct inode *inode)
 	return 0;
 }
 
+static inline bool dquot_initialize_needed(struct inode *inode)
+{
+	return false;
+}
+
 static inline void dquot_drop(struct inode *inode)
 {
 }

diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
index 4b766b6..426cee6 100644
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h

@@ -7,6 +7,10 @@
  * unlimited scalability while maintaining a constant level of contention
  * on the root node.
  *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ba4d262..c3ad00e 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h

@@ -1,6 +1,10 @@
 /*
  * RCU segmented callback lists
  *
+ * This seemingly RCU-private file must be available to SRCU users
+ * because the size of the TREE SRCU srcu_struct structure depends
+ * on these definitions.
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e1e5d00..f816fc7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h

@@ -34,104 +34,15 @@
 #define __LINUX_RCUPDATE_H
 
 #include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-#include <linux/lockdep.h>
-#include <linux/debugobjects.h>
-#include <linux/bug.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
+#include <linux/atomic.h>
 #include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/lockdep.h>
+#include <asm/processor.h>
+#include <linux/cpumask.h>
 
-#include <asm/barrier.h>
-
-#ifndef CONFIG_TINY_RCU
-extern int rcu_expedited; /* for sysctl */
-extern int rcu_normal;    /* also for sysctl */
-#endif /* #ifndef CONFIG_TINY_RCU */
-
-#ifdef CONFIG_TINY_RCU
-/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
-static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
-{
-	return true;
-}
-static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
-{
-	return false;
-}
-
-static inline void rcu_expedite_gp(void)
-{
-}
-
-static inline void rcu_unexpedite_gp(void)
-{
-}
-#else /* #ifdef CONFIG_TINY_RCU */
-bool rcu_gp_is_normal(void);     /* Internal RCU use. */
-bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
-void rcu_expedite_gp(void);
-void rcu_unexpedite_gp(void);
-#endif /* #else #ifdef CONFIG_TINY_RCU */
-
-enum rcutorture_type {
-	RCU_FLAVOR,
-	RCU_BH_FLAVOR,
-	RCU_SCHED_FLAVOR,
-	RCU_TASKS_FLAVOR,
-	SRCU_FLAVOR,
-	INVALID_RCU_FLAVOR
-};
-
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
-void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
-			    unsigned long *gpnum, unsigned long *completed);
-void rcutorture_record_test_transition(void);
-void rcutorture_record_progress(unsigned long vernum);
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-bool rcu_irq_enter_disabled(void);
-#else
-static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
-					  int *flags,
-					  unsigned long *gpnum,
-					  unsigned long *completed)
-{
-	*flags = 0;
-	*gpnum = 0;
-	*completed = 0;
-}
-static inline void rcutorture_record_test_transition(void)
-{
-}
-static inline void rcutorture_record_progress(unsigned long vernum)
-{
-}
-static inline bool rcu_irq_enter_disabled(void)
-{
-	return false;
-}
-#ifdef CONFIG_RCU_TRACE
-void do_trace_rcu_torture_read(const char *rcutorturename,
-			       struct rcu_head *rhp,
-			       unsigned long secs,
-			       unsigned long c_old,
-			       unsigned long c);
-#else
-#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
-#endif
-#endif
-
-#define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
-#define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 #define ulong2long(a)		(*(long *)(&(a)))
@@ -139,115 +50,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 /* Exported common interfaces */
 
 #ifdef CONFIG_PREEMPT_RCU
-
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head,
-	      rcu_callback_t func);
-
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 #else /* #ifdef CONFIG_PREEMPT_RCU */
-
-/* In classic RCU, call_rcu() is just call_rcu_sched(). */
 #define	call_rcu	call_rcu_sched
-
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
- *  OR
- *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_bh(struct rcu_head *head,
-		 rcu_callback_t func);
-
-/**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_sched() assumes
- * that the read-side critical sections end on enabling of preemption
- * or on voluntary preemption.
- * RCU read-side critical sections are delimited by :
- *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(),
- *  OR
- *  anything that disables preemption.
- *  These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_sched(struct rcu_head *head,
-		    rcu_callback_t func);
-
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
-
-/**
- * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_tasks() assumes
- * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), entry into idle, or transition to usermode
- * execution.  As such, there are no read-side primitives analogous to
- * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
- * to determine that all tasks have passed through a safe state, not so
- * much for data-strcuture synchronization.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void rcu_barrier_tasks(void);
@@ -301,22 +111,12 @@ void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcu_cpu_starting(unsigned int cpu);
 
-#ifndef CONFIG_TINY_RCU
-void rcu_end_inkernel_boot(void);
-#else /* #ifndef CONFIG_TINY_RCU */
-static inline void rcu_end_inkernel_boot(void) { }
-#endif /* #ifndef CONFIG_TINY_RCU */
-
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
 #else /* #ifdef CONFIG_RCU_STALL_COMMON */
-static inline void rcu_sysrq_start(void)
-{
-}
-static inline void rcu_sysrq_end(void)
-{
-}
+static inline void rcu_sysrq_start(void) { }
+static inline void rcu_sysrq_end(void) { }
 #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -330,9 +130,7 @@ static inline void rcu_user_exit(void) { }
 #ifdef CONFIG_RCU_NOCB_CPU
 void rcu_init_nohz(void);
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
-static inline void rcu_init_nohz(void)
-{
-}
+static inline void rcu_init_nohz(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
 /**
@@ -397,10 +195,6 @@ do { \
 		rcu_note_voluntary_context_switch(current); \
 } while (0)
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-bool __rcu_is_watching(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
-
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -414,10 +208,6 @@ bool __rcu_is_watching(void);
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 
-#define RCU_SCHEDULER_INACTIVE	0
-#define RCU_SCHEDULER_INIT	1
-#define RCU_SCHEDULER_RUNNING	2
-
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -430,30 +220,16 @@ void destroy_rcu_head(struct rcu_head *head);
 void init_rcu_head_on_stack(struct rcu_head *head);
 void destroy_rcu_head_on_stack(struct rcu_head *head);
 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-static inline void init_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head(struct rcu_head *head)
-{
-}
-
-static inline void init_rcu_head_on_stack(struct rcu_head *head)
-{
-}
-
-static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
-{
-}
+static inline void init_rcu_head(struct rcu_head *head) { }
+static inline void destroy_rcu_head(struct rcu_head *head) { }
+static inline void init_rcu_head_on_stack(struct rcu_head *head) { }
+static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
-static inline bool rcu_lockdep_current_cpu_online(void)
-{
-	return true;
-}
+static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -473,18 +249,8 @@ extern struct lockdep_map rcu_bh_lock_map;
 extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
 int debug_lockdep_rcu_enabled(void);
-
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
-
-/**
- * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
- * RCU-sched read-side critical section.  In absence of
- * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
- * critical section unless it can prove otherwise.
- */
 int rcu_read_lock_sched_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -531,9 +297,7 @@ static inline void rcu_preempt_sleep_check(void)
 			 "Illegal context switch in RCU read-side critical section");
 }
 #else /* #ifdef CONFIG_PROVE_RCU */
-static inline void rcu_preempt_sleep_check(void)
-{
-}
+static inline void rcu_preempt_sleep_check(void) { }
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 #define rcu_sleep_check()						\
@@ -1084,52 +848,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#ifdef CONFIG_TINY_RCU
-static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
-{
-	*nextevt = KTIME_MAX;
-	return 0;
-}
-#endif /* #ifdef CONFIG_TINY_RCU */
-
-#if defined(CONFIG_RCU_NOCB_CPU_ALL)
-static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
-#elif defined(CONFIG_RCU_NOCB_CPU)
-bool rcu_is_nocb_cpu(int cpu);
-#else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
-#endif
-
-
-/* Only for use by adaptive-ticks code. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-bool rcu_sys_is_idle(void);
-void rcu_sysidle_force_exit(void);
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static inline bool rcu_sys_is_idle(void)
-{
-	return false;
-}
-
-static inline void rcu_sysidle_force_exit(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-
-/*
- * Dump the ftrace buffer, but only one time per callsite per boot.
- */
-#define rcu_ftrace_dump(oops_dump_mode) \
-do { \
-	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
-	\
-	if (!atomic_read(&___rfd_beenhere) && \
-	    !atomic_xchg(&___rfd_beenhere, 1)) \
-		ftrace_dump(oops_dump_mode); \
-} while (0)
 
 /*
  * Place this after a lock-acquisition primitive to guarantee that

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 74d9c3a..5becbbc 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h

@@ -25,7 +25,7 @@
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
-#include <linux/cache.h>
+#include <linux/ktime.h>
 
 struct rcu_dynticks;
 static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
@@ -33,10 +33,8 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 	return 0;
 }
 
-static inline bool rcu_eqs_special_set(int cpu)
-{
-	return false;  /* Never flag non-existent other CPUs! */
-}
+/* Never flag non-existent other CPUs! */
+static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
 static inline unsigned long get_state_synchronize_rcu(void)
 {
@@ -98,159 +96,38 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 		rcu_note_voluntary_context_switch_lite(current); \
 	} while (0)
 
+static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
+{
+	*nextevt = KTIME_MAX;
+	return 0;
+}
+
 /*
  * Take advantage of the fact that there is only one CPU, which
  * allows us to ignore virtualization-based context switches.
  */
-static inline void rcu_virt_note_context_switch(int cpu)
-{
-}
-
-/*
- * Return the number of grace periods started.
- */
-static inline unsigned long rcu_batches_started(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods started.
- */
-static inline unsigned long rcu_batches_started_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods started.
- */
-static inline unsigned long rcu_batches_started_sched(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of grace periods completed.
- */
-static inline unsigned long rcu_batches_completed(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of bottom-half grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_bh(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of sched grace periods completed.
- */
-static inline unsigned long rcu_batches_completed_sched(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of expedited grace periods completed.
- */
-static inline unsigned long rcu_exp_batches_completed(void)
-{
-	return 0;
-}
-
-/*
- * Return the number of expedited sched grace periods completed.
- */
-static inline unsigned long rcu_exp_batches_completed_sched(void)
-{
-	return 0;
-}
-
-static inline void rcu_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_bh_force_quiescent_state(void)
-{
-}
-
-static inline void rcu_sched_force_quiescent_state(void)
-{
-}
-
-static inline void show_rcu_gp_kthreads(void)
-{
-}
-
-static inline void rcu_cpu_stall_reset(void)
-{
-}
-
-static inline void rcu_idle_enter(void)
-{
-}
-
-static inline void rcu_idle_exit(void)
-{
-}
-
-static inline void rcu_irq_enter(void)
-{
-}
-
-static inline void rcu_irq_exit_irqson(void)
-{
-}
-
-static inline void rcu_irq_enter_irqson(void)
-{
-}
-
-static inline void rcu_irq_exit(void)
-{
-}
-
-static inline void exit_rcu(void)
-{
-}
+static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_cpu_stall_reset(void) { }
+static inline void rcu_idle_enter(void) { }
+static inline void rcu_idle_exit(void) { }
+static inline void rcu_irq_enter(void) { }
+static inline bool rcu_irq_enter_disabled(void) { return false; }
+static inline void rcu_irq_exit_irqson(void) { }
+static inline void rcu_irq_enter_irqson(void) { }
+static inline void rcu_irq_exit(void) { }
+static inline void exit_rcu(void) { }
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 extern int rcu_scheduler_active __read_mostly;
 void rcu_scheduler_starting(void);
 #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
-static inline void rcu_scheduler_starting(void)
-{
-}
+static inline void rcu_scheduler_starting(void) { }
 #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
+static inline void rcu_end_inkernel_boot(void) { }
+static inline bool rcu_is_watching(void) { return true; }
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-static inline bool rcu_is_watching(void)
-{
-	return __rcu_is_watching();
-}
-
-#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
-static inline bool rcu_is_watching(void)
-{
-	return true;
-}
-
-#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
-static inline void rcu_request_urgent_qs_task(struct task_struct *t)
-{
-}
-
-static inline void rcu_all_qs(void)
-{
-	barrier(); /* Avoid RCU read-side critical sections leaking across. */
-}
+/* Avoid RCU read-side critical sections leaking across. */
+static inline void rcu_all_qs(void) { barrier(); }
 
 /* RCUtree hotplug events */
 #define rcutree_prepare_cpu      NULL

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0bacb6b..37d6fd3 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h

@@ -79,37 +79,20 @@ void cond_synchronize_rcu(unsigned long oldstate);
 unsigned long get_state_synchronize_sched(void);
 void cond_synchronize_sched(unsigned long oldstate);
 
-extern unsigned long rcutorture_testseq;
-extern unsigned long rcutorture_vernum;
-unsigned long rcu_batches_started(void);
-unsigned long rcu_batches_started_bh(void);
-unsigned long rcu_batches_started_sched(void);
-unsigned long rcu_batches_completed(void);
-unsigned long rcu_batches_completed_bh(void);
-unsigned long rcu_batches_completed_sched(void);
-unsigned long rcu_exp_batches_completed(void);
-unsigned long rcu_exp_batches_completed_sched(void);
-void show_rcu_gp_kthreads(void);
-
-void rcu_force_quiescent_state(void);
-void rcu_bh_force_quiescent_state(void);
-void rcu_sched_force_quiescent_state(void);
-
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
 void rcu_irq_enter_irqson(void);
 void rcu_irq_exit_irqson(void);
+bool rcu_irq_enter_disabled(void);
 
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
-
+void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
-void rcu_request_urgent_qs_task(struct task_struct *t);
-
 void rcu_all_qs(void);
 
 /* RCUtree hotplug events */

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index b34aa64..591792c 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h

@@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
+#ifdef CONFIG_REFCOUNT_FULL
 extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
 extern void refcount_add(unsigned int i, refcount_t *r);
 
@@ -48,10 +49,45 @@ extern __must_check bool refcount_inc_not_zero(refcount_t *r);
 extern void refcount_inc(refcount_t *r);
 
 extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
-extern void refcount_sub(unsigned int i, refcount_t *r);
 
 extern __must_check bool refcount_dec_and_test(refcount_t *r);
 extern void refcount_dec(refcount_t *r);
+#else
+static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, i, 0);
+}
+
+static inline void refcount_add(unsigned int i, refcount_t *r)
+{
+	atomic_add(i, &r->refs);
+}
+
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+	return atomic_add_unless(&r->refs, 1, 0);
+}
+
+static inline void refcount_inc(refcount_t *r)
+{
+	atomic_inc(&r->refs);
+}
+
+static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	return atomic_sub_and_test(i, &r->refs);
+}
+
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+	return atomic_dec_and_test(&r->refs);
+}
+
+static inline void refcount_dec(refcount_t *r)
+{
+	atomic_dec(&r->refs);
+}
+#endif /* CONFIG_REFCOUNT_FULL */
 
 extern __must_check bool refcount_dec_if_one(refcount_t *r);
 extern __must_check bool refcount_dec_not_one(refcount_t *r);

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1abba5c..44fd002 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h

@@ -37,6 +37,9 @@ struct rt_mutex {
 	int			line;
 	void			*magic;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
 struct rt_mutex_waiter;
@@ -58,19 +61,33 @@ struct hrtimer_sleeper;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
 	, .name = #mutexname, .file = __FILE__, .line = __LINE__
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __func__)
+
+# define rt_mutex_init(mutex) \
+do { \
+	static struct lock_class_key __key; \
+	__rt_mutex_init(mutex, __func__, &__key); \
+} while (0)
+
  extern void rt_mutex_debug_task_free(struct task_struct *tsk);
 #else
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL)
+# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, NULL, NULL)
 # define rt_mutex_debug_task_free(t)			do { } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
+	, .dep_map = { .name = #mutexname }
+#else
+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
+#endif
+
 #define __RT_MUTEX_INITIALIZER(mutexname) \
 	{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
 	, .waiters = RB_ROOT \
 	, .owner = NULL \
-	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
+	__DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
+	__DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
 
 #define DEFINE_RT_MUTEX(mutexname) \
 	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
@@ -86,7 +103,7 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock)
 	return lock->owner != NULL;
 }
 
-extern void __rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
 extern void rt_mutex_lock(struct rt_mutex *lock);

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe..4b3286a 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h

@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
 			    const void *buf, size_t buflen, off_t skip);
 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 			  void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip);
 
 /*
  * Maximum number of entries that will be allocated in one piece, if

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b69fc6..1f0f427 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -421,7 +421,8 @@ struct sched_dl_entity {
 	u64				dl_runtime;	/* Maximum runtime for each instance	*/
 	u64				dl_deadline;	/* Relative deadline of each instance	*/
 	u64				dl_period;	/* Separation of two instances (period) */
-	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_bw;		/* dl_runtime / dl_period		*/
+	u64				dl_density;	/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
@@ -445,16 +446,33 @@ struct sched_dl_entity {
 	 *
 	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
+	 *
+	 * @dl_non_contending tells if the task is inactive while still
+	 * contributing to the active utilization. In other words, it
+	 * indicates if the inactive timer has been armed and its handler
+	 * has not been executed yet. This flag is useful to avoid race
+	 * conditions between the inactive timer handler and the wakeup
+	 * code.
 	 */
 	int				dl_throttled;
 	int				dl_boosted;
 	int				dl_yielded;
+	int				dl_non_contending;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer			dl_timer;
+
+	/*
+	 * Inactive timer, responsible for decreasing the active utilization
+	 * at the "0-lag time". When a -deadline task blocks, it contributes
+	 * to GRUB's active utilization until the "0-lag time", hence a
+	 * timer is needed to decrease the active utilization at the correct
+	 * time.
+	 */
+	struct hrtimer inactive_timer;
 };
 
 union rcu_special {
@@ -1096,8 +1114,6 @@ static inline struct pid *task_session(struct task_struct *task)
  *                     current.
  * task_xid_nr_ns()  : id seen from the ns specified;
  *
- * set_task_vxid()   : assigns a virtual id to a task;
- *
  * see also pid_nr() etc in include/linux/pid.h
  */
 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 34fe92c..a55600f 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h

@@ -23,10 +23,6 @@ extern u64 sched_clock_cpu(int cpu);
 extern void sched_clock_init(void);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
 static inline void sched_clock_tick(void)
 {
 }
@@ -39,7 +35,7 @@ static inline void sched_clock_idle_sleep_event(void)
 {
 }
 
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+static inline void sched_clock_idle_wakeup_event(void)
 {
 }
 
@@ -53,7 +49,6 @@ static inline u64 local_clock(void)
 	return sched_clock();
 }
 #else
-extern void sched_clock_init_late(void);
 extern int sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
@@ -63,10 +58,10 @@ extern void clear_sched_clock_stable(void);
  */
 extern u64 __sched_clock_offset;
 
-
 extern void sched_clock_tick(void);
+extern void sched_clock_tick_stable(void);
 extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+extern void sched_clock_idle_wakeup_event(void);
 
 /*
  * As outlined in clock.c, provides a fast, high resolution, nanosecond

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 4995b71..7d3f75d 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h

@@ -23,11 +23,11 @@ static inline void set_cpu_sd_state_idle(void) { }
 #endif
 
 #ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
+void calc_load_nohz_start(void);
+void calc_load_nohz_stop(void);
 #else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
+static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_stop(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a978d71..f0f065c 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h

@@ -95,8 +95,6 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index cda76c6..e69402d 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h

@@ -195,6 +195,7 @@ int serdev_device_open(struct serdev_device *);
 void serdev_device_close(struct serdev_device *);
 unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int);
 void serdev_device_set_flow_control(struct serdev_device *, bool);
+int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t);
 void serdev_device_wait_until_sent(struct serdev_device *, long);
 int serdev_device_get_tiocm(struct serdev_device *);
 int serdev_device_set_tiocm(struct serdev_device *, int, int);
@@ -236,6 +237,12 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev
 	return 0;
 }
 static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {}
+static inline int serdev_device_write_buf(struct serdev_device *serdev,
+					  const unsigned char *buf,
+					  size_t count)
+{
+	return -ENODEV;
+}
 static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {}
 static inline int serdev_device_get_tiocm(struct serdev_device *serdev)
 {
@@ -301,7 +308,7 @@ struct tty_driver;
 struct device *serdev_tty_port_register(struct tty_port *port,
 					struct device *parent,
 					struct tty_driver *drv, int idx);
-void serdev_tty_port_unregister(struct tty_port *port);
+int serdev_tty_port_unregister(struct tty_port *port);
 #else
 static inline struct device *serdev_tty_port_register(struct tty_port *port,
 					   struct device *parent,
@@ -309,14 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port,
 {
 	return ERR_PTR(-ENODEV);
 }
-static inline void serdev_tty_port_unregister(struct tty_port *port) {}
-#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
-
-static inline int serdev_device_write_buf(struct serdev_device *serdev,
-					  const unsigned char *data,
-					  size_t count)
+static inline int serdev_tty_port_unregister(struct tty_port *port)
 {
-	return serdev_device_write(serdev, data, count, 0);
+	return -ENODEV;
 }
+#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */
 
 #endif /*_LINUX_SERDEV_H */

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 07ef550..93315d6 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h

@@ -84,6 +84,7 @@ struct kmem_cache {
 	int red_left_pad;	/* Left redzone padding size */
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
+	struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 59248dc..d9510e8 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h

@@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
 })
 
+/**
+ * spin_unlock_wait - Interpose between successive critical sections
+ * @lock: the spinlock whose critical sections are to be interposed.
+ *
+ * Semantically this is equivalent to a spin_lock() immediately
+ * followed by a spin_unlock().  However, most architectures have
+ * more efficient implementations in which the spin_unlock_wait()
+ * cannot block concurrent lock acquisition, and in some cases
+ * where spin_unlock_wait() does not write to the lock variable.
+ * Nevertheless, spin_unlock_wait() can have high overhead, so if
+ * you feel the need to use it, please check to see if there is
+ * a better way to get your job done.
+ *
+ * The ordering guarantees provided by spin_unlock_wait() are:
+ *
+ * 1.  All accesses preceding the spin_unlock_wait() happen before
+ *     any accesses in later critical sections for this same lock.
+ * 2.  All accesses following the spin_unlock_wait() happen after
+ *     any accesses in earlier critical sections for this same lock.
+ */
 static __always_inline void spin_unlock_wait(spinlock_t *lock)
 {
 	raw_spin_unlock_wait(&lock->rlock);

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 167ad88..39af9bc 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h

@@ -60,32 +60,15 @@ int init_srcu_struct(struct srcu_struct *sp);
 #include <linux/srcutiny.h>
 #elif defined(CONFIG_TREE_SRCU)
 #include <linux/srcutree.h>
-#elif defined(CONFIG_CLASSIC_SRCU)
-#include <linux/srcuclassic.h>
-#else
+#elif defined(CONFIG_SRCU)
 #error "Unknown SRCU implementation specified to kernel configuration"
+#else
+/* Dummy definition for things like notifiers.  Actual use gets link error. */
+struct srcu_struct { };
 #endif
 
-/**
- * call_srcu() - Queue a callback for invocation after an SRCU grace period
- * @sp: srcu_struct in queue the callback
- * @head: structure to be used for queueing the SRCU callback.
- * @func: function to be invoked after the SRCU grace period
- *
- * The callback function will be invoked some time after a full SRCU
- * grace period elapses, in other words after all pre-existing SRCU
- * read-side critical sections have completed.  However, the callback
- * function might well execute concurrently with other SRCU read-side
- * critical sections that started after call_srcu() was invoked.  SRCU
- * read-side critical sections are delimited by srcu_read_lock() and
- * srcu_read_unlock(), and may be nested.
- *
- * The callback will be invoked from process context, but must nevertheless
- * be fast and must not block.
- */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-
 void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
@@ -172,9 +155,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval;
 
-	preempt_disable();
 	retval = __srcu_read_lock(sp);
-	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }

diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
deleted file mode 100644
index 5753f73..0000000
--- a/include/linux/srcuclassic.h
+++ /dev/null

@@ -1,115 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion,
- *	classic v4.11 variant.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright (C) IBM Corporation, 2017
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- */
-
-#ifndef _LINUX_SRCU_CLASSIC_H
-#define _LINUX_SRCU_CLASSIC_H
-
-struct srcu_array {
-	unsigned long lock_count[2];
-	unsigned long unlock_count[2];
-};
-
-struct rcu_batch {
-	struct rcu_head *head, **tail;
-};
-
-#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
-
-struct srcu_struct {
-	unsigned long completed;
-	struct srcu_array __percpu *per_cpu_ref;
-	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
-	bool running;
-	/* callbacks just queued */
-	struct rcu_batch batch_queue;
-	/* callbacks try to do the first check_zero */
-	struct rcu_batch batch_check0;
-	/* callbacks done with the first check_zero and the flip */
-	struct rcu_batch batch_check1;
-	struct rcu_batch batch_done;
-	struct delayed_work work;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-};
-
-void process_srcu(struct work_struct *work);
-
-#define __SRCU_STRUCT_INIT(name)					\
-	{								\
-		.completed = -300,					\
-		.per_cpu_ref = &name##_srcu_array,			\
-		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\
-		.running = false,					\
-		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\
-		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\
-		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\
-		.batch_done = RCU_BATCH_INIT(name.batch_done),		\
-		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
-
-/*
- * Define and initialize a srcu struct at build time.
- * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
- *
- * Note that although DEFINE_STATIC_SRCU() hides the name from other
- * files, the per-CPU variable rules nevertheless require that the
- * chosen name be globally unique.  These rules also prohibit use of
- * DEFINE_STATIC_SRCU() within a function.  If these rules are too
- * restrictive, declare the srcu_struct manually.  For example, in
- * each file:
- *
- *	static struct srcu_struct my_srcu;
- *
- * Then, before the first use of each my_srcu, manually initialize it:
- *
- *	init_srcu_struct(&my_srcu);
- *
- * See include/linux/percpu-defs.h for the rules on per-CPU variables.
- */
-#define __DEFINE_SRCU(name, is_static)					\
-	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
-#define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
-#define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
-
-void synchronize_srcu_expedited(struct srcu_struct *sp);
-void srcu_barrier(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
-
-static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
-{
-	if (test_type != SRCU_FLAVOR)
-		return;
-	*flags = 0;
-	*completed = sp->completed;
-	*gpnum = *completed;
-	if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
-		(*gpnum)++;
-}
-
-#endif

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 42311ee..cfbfc54 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h

@@ -27,15 +27,14 @@
 #include <linux/swait.h>
 
 struct srcu_struct {
-	int srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	short srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */
+	short srcu_idx;			/* Current reader array element. */
+	u8 srcu_gp_running;		/* GP workqueue running? */
+	u8 srcu_gp_waiting;		/* GP waiting for readers? */
 	struct swait_queue_head srcu_wq;
 					/* Last srcu_read_unlock() wakes GP. */
-	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */
-	struct rcu_segcblist srcu_cblist;
-					/* Pending SRCU callbacks. */
-	int srcu_idx;			/* Current reader array element. */
-	bool srcu_gp_running;		/* GP workqueue running? */
-	bool srcu_gp_waiting;		/* GP waiting for readers? */
+	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
+	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
@@ -47,7 +46,7 @@ void srcu_drive_gp(struct work_struct *wp);
 #define __SRCU_STRUCT_INIT(name)					\
 {									\
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
-	.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),	\
+	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
@@ -63,6 +62,21 @@ void srcu_drive_gp(struct work_struct *wp);
 
 void synchronize_srcu(struct srcu_struct *sp);
 
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct.  Can be invoked from irq/bh handlers, but the matching
+ * __srcu_read_unlock() must be in the same handler instance.  Returns an
+ * index that must be passed to the matching srcu_read_unlock().
+ */
+static inline int __srcu_read_lock(struct srcu_struct *sp)
+{
+	int idx;
+
+	idx = READ_ONCE(sp->srcu_idx);
+	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
+	return idx;
+}
+
 static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
 {
 	synchronize_srcu(sp);
@@ -73,21 +87,4 @@ static inline void srcu_barrier(struct srcu_struct *sp)
 	synchronize_srcu(sp);
 }
 
-static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
-{
-	return 0;
-}
-
-static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
-					   struct srcu_struct *sp, int *flags,
-					   unsigned long *gpnum,
-					   unsigned long *completed)
-{
-	if (test_type != SRCU_FLAVOR)
-		return;
-	*flags = 0;
-	*completed = sp->srcu_gp_seq;
-	*gpnum = *completed;
-}
-
 #endif

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 32e86d8..42973f7 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h

@@ -40,7 +40,7 @@ struct srcu_data {
 	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */
 
 	/* Update-side state. */
-	spinlock_t lock ____cacheline_internodealigned_in_smp;
+	raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
@@ -58,7 +58,7 @@ struct srcu_data {
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
 struct srcu_node {
-	spinlock_t lock;
+	raw_spinlock_t __private lock;
 	unsigned long srcu_have_cbs[4];		/* GP seq for children */
 						/*  having CBs, but only */
 						/*  is > ->srcu_gq_seq. */
@@ -78,7 +78,7 @@ struct srcu_struct {
 	struct srcu_node *level[RCU_NUM_LVLS + 1];
 						/* First node at each level. */
 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
-	spinlock_t gp_lock;			/* protect ->srcu_cblist */
+	raw_spinlock_t __private lock;		/* Protect counters */
 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
 	unsigned int srcu_idx;			/* Current rdr array element. */
 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
@@ -109,7 +109,7 @@ void process_srcu(struct work_struct *work);
 #define __SRCU_STRUCT_INIT(name)					\
 	{								\
 		.sda = &name##_srcu_data,				\
-		.gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock),		\
+		.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
 		.srcu_gp_seq_needed = 0 - 1,				\
 		__SRCU_DEP_MAP_INIT(name)				\
 	}
@@ -141,10 +141,5 @@ void process_srcu(struct work_struct *work);
 
 void synchronize_srcu_expedited(struct srcu_struct *sp);
 void srcu_barrier(struct srcu_struct *sp);
-unsigned long srcu_batches_completed(struct srcu_struct *sp);
-
-void srcutorture_get_gp_data(enum rcutorture_type test_type,
-			     struct srcu_struct *sp, int *flags,
-			     unsigned long *gpnum, unsigned long *completed);
 
 #endif

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c..9d7529f 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h

@@ -13,7 +13,7 @@
 #include <linux/ktime.h>
 #include <linux/sunrpc/types.h>
 #include <linux/spinlock.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/xdr.h>
 

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9463102..11cef5a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -336,7 +336,8 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p)
 {
 	char *cp = (char *)p;
 	struct kvec *vec = &rqstp->rq_arg.head[0];
-	return cp == (char *)vec->iov_base + vec->iov_len;
+	return cp >= (char*)vec->iov_base
+		&& cp <= (char*)vec->iov_base + vec->iov_len;
 }
 
 static inline int

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0b1cf32..d971837 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h

@@ -189,8 +189,6 @@ struct platform_suspend_ops {
 struct platform_freeze_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
-	void (*wake)(void);
-	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
@@ -430,8 +428,7 @@ extern unsigned int pm_wakeup_irq;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-extern void pm_system_cancel_wakeup(void);
-extern void pm_wakeup_clear(bool reset);
+extern void pm_wakeup_clear(void);
 extern void pm_system_irq_wakeup(unsigned int irq_number);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
@@ -481,7 +478,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
-static inline void pm_wakeup_clear(bool reset) {}
+static inline void pm_wakeup_clear(void) {}
 static inline void pm_system_irq_wakeup(unsigned int irq_number) {}
 
 static inline void lock_system_sleep(void) {}

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 110f453..f7043cc 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h

@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
  *			interval.
  * @xtime_remainder:	Shifted nano seconds left over when rounding
  *			@cycle_interval
- * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:	Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:		Difference between accumulated time and NTP time in ntp
  *			shifted nano seconds.
  * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
 	u64			cycle_interval;
 	u64			xtime_interval;
 	s64			xtime_remainder;
-	u32			raw_interval;
+	u64			raw_interval;
 	/* The ntp_tick_length() value currently being used.
 	 * This cached copy ensures we consistently apply the tick
 	 * length for an entire tick, as ntp_tick_length may change

diff --git a/include/linux/tty.h b/include/linux/tty.h
index d07cd21..eccb4ec 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h

@@ -558,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port,
 		struct tty_driver *driver, unsigned index,
 		struct device *device, void *drvdata,
 		const struct attribute_group **attr_grp);
+extern struct device *tty_port_register_device_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device);
+extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port,
+		struct tty_driver *driver, unsigned index,
+		struct device *device, void *drvdata,
+		const struct attribute_group **attr_grp);
+extern void tty_port_unregister_device(struct tty_port *port,
+		struct tty_driver *driver, unsigned index);
 extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_destroy(struct tty_port *port);

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7dffa56..9711637 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h

@@ -206,6 +206,7 @@ struct cdc_state {
 };
 
 extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf);
 extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 extern void usbnet_cdc_status(struct usbnet *, struct urb *);

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 4dff73a..d1defe4 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h

@@ -18,29 +18,16 @@
 
 #include <uapi/linux/uuid.h>
 
-/*
- * V1 (time-based) UUID definition [RFC 4122].
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- *   increments since midnight 15th October 1582
- *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- *     time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
- */
-struct uuid_v1 {
-	__be32		time_low;			/* low part of timestamp */
-	__be16		time_mid;			/* mid part of timestamp */
-	__be16		time_hi_and_version;		/* high part of timestamp and version  */
-#define UUID_TO_UNIX_TIME	0x01b21dd213814000ULL
-#define UUID_TIMEHI_MASK	0x0fff
-#define UUID_VERSION_TIME	0x1000	/* time-based UUID */
-#define UUID_VERSION_NAME	0x3000	/* name-based UUID */
-#define UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
-	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
-#define UUID_CLOCKHI_MASK	0x3f
-#define UUID_VARIANT_STD	0x80
-	u8		clock_seq_low;			/* clock seq low */
-	u8		node[6];			/* spatially unique node ID (MAC addr) */
-};
+typedef struct {
+	__u8 b[16];
+} uuid_t;
+
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((uuid_t)								\
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,					\
+   ((c) >> 8) & 0xff, (c) & 0xff,					\
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
@@ -48,27 +35,73 @@ struct uuid_v1 {
  */
 #define	UUID_STRING_LEN		36
 
-static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+extern const guid_t guid_null;
+extern const uuid_t uuid_null;
+
+static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
 {
-	return memcmp(&u1, &u2, sizeof(uuid_le));
+	return memcmp(u1, u2, sizeof(guid_t)) == 0;
 }
 
-static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
+static inline void guid_copy(guid_t *dst, const guid_t *src)
 {
-	return memcmp(&u1, &u2, sizeof(uuid_be));
+	memcpy(dst, src, sizeof(guid_t));
+}
+
+static inline bool guid_is_null(const guid_t *guid)
+{
+	return guid_equal(guid, &guid_null);
+}
+
+static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
+{
+	return memcmp(u1, u2, sizeof(uuid_t)) == 0;
+}
+
+static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
+{
+	memcpy(dst, src, sizeof(uuid_t));
+}
+
+static inline bool uuid_is_null(const uuid_t *uuid)
+{
+	return uuid_equal(uuid, &uuid_null);
 }
 
 void generate_random_uuid(unsigned char uuid[16]);
 
-extern void uuid_le_gen(uuid_le *u);
-extern void uuid_be_gen(uuid_be *u);
+extern void guid_gen(guid_t *u);
+extern void uuid_gen(uuid_t *u);
 
 bool __must_check uuid_is_valid(const char *uuid);
 
-extern const u8 uuid_le_index[16];
-extern const u8 uuid_be_index[16];
+extern const u8 guid_index[16];
+extern const u8 uuid_index[16];
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u);
-int uuid_be_to_bin(const char *uuid, uuid_be *u);
+int guid_parse(const char *uuid, guid_t *u);
+int uuid_parse(const char *uuid, uuid_t *u);
+
+/* backwards compatibility, don't use in new code */
+typedef uuid_t uuid_be;
+#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+	UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define NULL_UUID_BE 							\
+	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
+	     0x00, 0x00, 0x00, 0x00)
+
+#define uuid_le_gen(u)		guid_gen(u)
+#define uuid_be_gen(u)		uuid_gen(u)
+#define uuid_le_to_bin(guid, u)	guid_parse(guid, u)
+#define uuid_be_to_bin(uuid, u)	uuid_parse(uuid, u)
+
+static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(guid_t));
+}
+
+static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_t));
+}
 
 #endif

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index edf9b2c..f57076b 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h

@@ -183,7 +183,7 @@ struct virqfd {
 	void			(*thread)(void *, void *);
 	void			*data;
 	struct work_struct	inject;
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	poll_table		pt;
 	struct work_struct	shutdown;
 	struct virqfd		**pvirqfd;

diff --git a/include/linux/wait.h b/include/linux/wait.h
index db076ca..b289c96 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h

@@ -10,38 +10,30 @@
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
-typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+typedef struct wait_queue_entry wait_queue_entry_t;
 
-/* __wait_queue::flags */
+typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
+int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key);
+
+/* wait_queue_entry::flags */
 #define WQ_FLAG_EXCLUSIVE	0x01
 #define WQ_FLAG_WOKEN		0x02
 
-struct __wait_queue {
+/*
+ * A single wait-queue entry structure:
+ */
+struct wait_queue_entry {
 	unsigned int		flags;
 	void			*private;
 	wait_queue_func_t	func;
-	struct list_head	task_list;
+	struct list_head	entry;
 };
 
-struct wait_bit_key {
-	void			*flags;
-	int			bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR	-1
-	unsigned long		timeout;
-};
-
-struct wait_bit_queue {
-	struct wait_bit_key	key;
-	wait_queue_t		wait;
-};
-
-struct __wait_queue_head {
+struct wait_queue_head {
 	spinlock_t		lock;
-	struct list_head	task_list;
+	struct list_head	head;
 };
-typedef struct __wait_queue_head wait_queue_head_t;
+typedef struct wait_queue_head wait_queue_head_t;
 
 struct task_struct;
 
@@ -49,82 +41,76 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
-#define __WAITQUEUE_INITIALIZER(name, tsk) {				\
-	.private	= tsk,						\
-	.func		= default_wake_function,			\
-	.task_list	= { NULL, NULL } }
+#define __WAITQUEUE_INITIALIZER(name, tsk) {					\
+	.private	= tsk,							\
+	.func		= default_wake_function,				\
+	.entry		= { NULL, NULL } }
 
-#define DECLARE_WAITQUEUE(name, tsk)					\
-	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+#define DECLARE_WAITQUEUE(name, tsk)						\
+	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
 
-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
-	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
-	.task_list	= { &(name).task_list, &(name).task_list } }
+#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {					\
+	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),			\
+	.head		= { &(name).head, &(name).head } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
-#define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
-	{ .flags = word, .bit_nr = bit, }
+extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *);
 
-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)				\
-	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
-
-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
-
-#define init_waitqueue_head(q)				\
-	do {						\
-		static struct lock_class_key __key;	\
-							\
-		__init_waitqueue_head((q), #q, &__key);	\
+#define init_waitqueue_head(wq_head)						\
+	do {									\
+		static struct lock_class_key __key;				\
+										\
+		__init_waitqueue_head((wq_head), #wq_head, &__key);		\
 	} while (0)
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
 	({ init_waitqueue_head(&name); name; })
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
-	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
 #else
 # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
 #endif
 
-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p)
 {
-	q->flags	= 0;
-	q->private	= p;
-	q->func		= default_wake_function;
+	wq_entry->flags		= 0;
+	wq_entry->private	= p;
+	wq_entry->func		= default_wake_function;
 }
 
 static inline void
-init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
 {
-	q->flags	= 0;
-	q->private	= NULL;
-	q->func		= func;
+	wq_entry->flags		= 0;
+	wq_entry->private	= NULL;
+	wq_entry->func		= func;
 }
 
 /**
  * waitqueue_active -- locklessly test for waiters on the queue
- * @q: the waitqueue to test for waiters
+ * @wq_head: the waitqueue to test for waiters
  *
  * returns true if the wait list is not empty
  *
  * NOTE: this function is lockless and requires care, incorrect usage _will_
  * lead to sporadic and non-obvious failure.
  *
- * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * Use either while holding wait_queue_head::lock or when used for wakeups
  * with an extra smp_mb() like:
  *
  *      CPU0 - waker                    CPU1 - waiter
  *
  *                                      for (;;) {
- *      @cond = true;                     prepare_to_wait(&wq, &wait, state);
+ *      @cond = true;                     prepare_to_wait(&wq_head, &wait, state);
  *      smp_mb();                         // smp_mb() from set_current_state()
- *      if (waitqueue_active(wq))         if (@cond)
- *        wake_up(wq);                      break;
+ *      if (waitqueue_active(wq_head))         if (@cond)
+ *        wake_up(wq_head);                      break;
  *                                        schedule();
  *                                      }
- *                                      finish_wait(&wq, &wait);
+ *                                      finish_wait(&wq_head, &wait);
  *
  * Because without the explicit smp_mb() it's possible for the
  * waitqueue_active() load to get hoisted over the @cond store such that we'll
@@ -133,20 +119,20 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
  * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
  * which (when the lock is uncontended) are of roughly equal cost.
  */
-static inline int waitqueue_active(wait_queue_head_t *q)
+static inline int waitqueue_active(struct wait_queue_head *wq_head)
 {
-	return !list_empty(&q->task_list);
+	return !list_empty(&wq_head->head);
 }
 
 /**
  * wq_has_sleeper - check if there are any waiting processes
- * @wq: wait queue head
+ * @wq_head: wait queue head
  *
- * Returns true if wq has waiting processes
+ * Returns true if wq_head has waiting processes
  *
  * Please refer to the comment for waitqueue_active.
  */
-static inline bool wq_has_sleeper(wait_queue_head_t *wq)
+static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 {
 	/*
 	 * We need to be sure we are in sync with the
@@ -156,63 +142,51 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq)
 	 * waiting side.
 	 */
 	smp_mb();
-	return waitqueue_active(wq);
+	return waitqueue_active(wq_head);
 }
 
-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add(&new->task_list, &head->task_list);
+	list_add(&wq_entry->entry, &wq_head->head);
 }
 
 /*
  * Used for wake-one threads:
  */
 static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(wq_head, wq_entry);
 }
 
-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
-					 wait_queue_t *new)
+static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_add_tail(&new->task_list, &head->task_list);
+	list_add_tail(&wq_entry->entry, &wq_head->head);
 }
 
 static inline void
-__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
 }
 
 static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-	list_del(&old->task_list);
+	list_del(&wq_entry->entry);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-void wake_up_bit(void *, int);
-void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
-int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-wait_queue_head_t *bit_waitqueue(void *, int);
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -228,28 +202,28 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 /*
  * Wakeup macros to be used to report events to the targets.
  */
-#define wake_up_poll(x, m)						\
+#define wake_up_poll(x, m)							\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m)					\
+#define wake_up_locked_poll(x, m)						\
 	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m)				\
+#define wake_up_interruptible_poll(x, m)					\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define wake_up_interruptible_sync_poll(x, m)				\
+#define wake_up_interruptible_sync_poll(x, m)					\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
-#define ___wait_cond_timeout(condition)					\
-({									\
-	bool __cond = (condition);					\
-	if (__cond && !__ret)						\
-		__ret = 1;						\
-	__cond || !__ret;						\
+#define ___wait_cond_timeout(condition)						\
+({										\
+	bool __cond = (condition);						\
+	if (__cond && !__ret)							\
+		__ret = 1;							\
+	__cond || !__ret;							\
 })
 
-#define ___wait_is_interruptible(state)					\
-	(!__builtin_constant_p(state) ||				\
-		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
+#define ___wait_is_interruptible(state)						\
+	(!__builtin_constant_p(state) ||					\
+		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)		\
 
-extern void init_wait_entry(wait_queue_t *__wait, int flags);
+extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 
 /*
  * The below macro ___wait_event() has an explicit shadow of the __ret
@@ -263,108 +237,108 @@ extern void init_wait_entry(wait_queue_t *__wait, int flags);
  * otherwise.
  */
 
-#define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
-({									\
-	__label__ __out;						\
-	wait_queue_t __wait;						\
-	long __ret = ret;	/* explicit shadow */			\
-									\
-	init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
-	for (;;) {							\
-		long __int = prepare_to_wait_event(&wq, &__wait, state);\
-									\
-		if (condition)						\
-			break;						\
-									\
-		if (___wait_is_interruptible(state) && __int) {		\
-			__ret = __int;					\
-			goto __out;					\
-		}							\
-									\
-		cmd;							\
-	}								\
-	finish_wait(&wq, &__wait);					\
-__out:	__ret;								\
+#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)		\
+({										\
+	__label__ __out;							\
+	struct wait_queue_entry __wq_entry;					\
+	long __ret = ret;	/* explicit shadow */				\
+										\
+	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
+	for (;;) {								\
+		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
+										\
+		if (condition)							\
+			break;							\
+										\
+		if (___wait_is_interruptible(state) && __int) {			\
+			__ret = __int;						\
+			goto __out;						\
+		}								\
+										\
+		cmd;								\
+	}									\
+	finish_wait(&wq_head, &__wq_entry);					\
+__out:	__ret;									\
 })
 
-#define __wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    schedule())
 
 /**
  * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__wait_event(wq, condition);					\
+#define wait_event(wq_head, condition)						\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__wait_event(wq_head, condition);					\
 } while (0)
 
-#define __io_wait_event(wq, condition)					\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __io_wait_event(wq_head, condition)					\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    io_schedule())
 
 /*
  * io_wait_event() -- like wait_event() but with io_schedule()
  */
-#define io_wait_event(wq, condition)					\
-do {									\
-	might_sleep();							\
-	if (condition)							\
-		break;							\
-	__io_wait_event(wq, condition);					\
+#define io_wait_event(wq_head, condition)					\
+do {										\
+	might_sleep();								\
+	if (condition)								\
+		break;								\
+	__io_wait_event(wq_head, condition);					\
 } while (0)
 
-#define __wait_event_freezable(wq, condition)				\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_freezable(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			    schedule(); try_to_freeze())
 
 /**
  * wait_event_freezable - sleep (or freeze) until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
  * to system load) until the @condition evaluates to true. The
- * @condition is checked each time the waitqueue @wq is woken up.
+ * @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_freezable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable(wq, condition);		\
-	__ret;								\
+#define wait_event_freezable(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_timeout(wq, condition, timeout)			\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_timeout(wq_head, condition, timeout)			\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_UNINTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -375,83 +349,83 @@ do {									\
  * or the remaining jiffies (at least 1) if the @condition evaluated
  * to %true before the @timeout elapsed.
  */
-#define wait_event_timeout(wq, condition, timeout)			\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_timeout(wq_head, condition, timeout)				\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_timeout(wq_head, condition, timeout);	\
+	__ret;									\
 })
 
-#define __wait_event_freezable_timeout(wq, condition, timeout)		\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_freezable_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret); try_to_freeze())
 
 /*
  * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
  * increasing load and is freezable.
  */
-#define wait_event_freezable_timeout(wq, condition, timeout)		\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_freezable_timeout(wq, condition, timeout);	\
-	__ret;								\
+#define wait_event_freezable_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_freezable_timeout(wq_head, condition, timeout); \
+	__ret;									\
 })
 
-#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
+#define __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 1, 0,	\
 			    cmd1; schedule(); cmd2)
 /*
  * Just like wait_event_cmd(), except it sets exclusive flag
  */
-#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_exclusive_cmd(wq, condition, cmd1, cmd2);		\
+#define wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2)		\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2);		\
 } while (0)
 
-#define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+#define __wait_event_cmd(wq_head, condition, cmd1, cmd2)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    cmd1; schedule(); cmd2)
 
 /**
  * wait_event_cmd - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @cmd1: the command will be executed before sleep
  * @cmd2: the command will be executed after sleep
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event_cmd(wq, condition, cmd1, cmd2)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
+#define wait_event_cmd(wq_head, condition, cmd1, cmd2)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_cmd(wq_head, condition, cmd1, cmd2);			\
 } while (0)
 
-#define __wait_event_interruptible(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+#define __wait_event_interruptible(wq_head, condition)				\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
 
 /**
  * wait_event_interruptible - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -459,29 +433,29 @@ do {									\
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible(wq, condition);	\
-	__ret;								\
+#define wait_event_interruptible(wq_head, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible(wq_head, condition);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_timeout(wq, condition, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
+#define __wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
 		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, in jiffies
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -493,50 +467,49 @@ do {									\
  * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
  * interrupted by a signal.
  */
-#define wait_event_interruptible_timeout(wq, condition, timeout)	\
-({									\
-	long __ret = timeout;						\
-	might_sleep();							\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_timeout(wq,		\
-						condition, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_timeout(wq_head, condition, timeout)		\
+({										\
+	long __ret = timeout;							\
+	might_sleep();								\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_timeout(wq_head,		\
+						condition, timeout);		\
+	__ret;									\
 })
 
-#define __wait_event_hrtimeout(wq, condition, timeout, state)		\
-({									\
-	int __ret = 0;							\
-	struct hrtimer_sleeper __t;					\
-									\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
-			      HRTIMER_MODE_REL);			\
-	hrtimer_init_sleeper(&__t, current);				\
-	if ((timeout) != KTIME_MAX)				\
-		hrtimer_start_range_ns(&__t.timer, timeout,		\
-				       current->timer_slack_ns,		\
-				       HRTIMER_MODE_REL);		\
-									\
-	__ret = ___wait_event(wq, condition, state, 0, 0,		\
-		if (!__t.task) {					\
-			__ret = -ETIME;					\
-			break;						\
-		}							\
-		schedule());						\
-									\
-	hrtimer_cancel(&__t.timer);					\
-	destroy_hrtimer_on_stack(&__t.timer);				\
-	__ret;								\
+#define __wait_event_hrtimeout(wq_head, condition, timeout, state)		\
+({										\
+	int __ret = 0;								\
+	struct hrtimer_sleeper __t;						\
+										\
+	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
+	hrtimer_init_sleeper(&__t, current);					\
+	if ((timeout) != KTIME_MAX)						\
+		hrtimer_start_range_ns(&__t.timer, timeout,			\
+				       current->timer_slack_ns,			\
+				       HRTIMER_MODE_REL);			\
+										\
+	__ret = ___wait_event(wq_head, condition, state, 0, 0,			\
+		if (!__t.task) {						\
+			__ret = -ETIME;						\
+			break;							\
+		}								\
+		schedule());							\
+										\
+	hrtimer_cancel(&__t.timer);						\
+	destroy_hrtimer_on_stack(&__t.timer);					\
+	__ret;									\
 })
 
 /**
  * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -544,25 +517,25 @@ do {									\
  * The function returns 0 if @condition became true, or -ETIME if the timeout
  * elapsed.
  */
-#define wait_event_hrtimeout(wq, condition, timeout)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_UNINTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_hrtimeout(wq_head, condition, timeout)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq_head, condition, timeout,	\
+					       TASK_UNINTERRUPTIBLE);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -570,73 +543,73 @@ do {									\
  * The function returns 0 if @condition became true, -ERESTARTSYS if it was
  * interrupted by a signal, or -ETIME if the timeout elapsed.
  */
-#define wait_event_interruptible_hrtimeout(wq, condition, timeout)	\
-({									\
-	long __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
-					       TASK_INTERRUPTIBLE);	\
-	__ret;								\
+#define wait_event_interruptible_hrtimeout(wq, condition, timeout)		\
+({										\
+	long __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_hrtimeout(wq, condition, timeout,		\
+					       TASK_INTERRUPTIBLE);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_exclusive(wq, condition)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_interruptible_exclusive(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_interruptible_exclusive(wq, condition)		\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_interruptible_exclusive(wq, condition)			\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_exclusive(wq, condition);	\
+	__ret;									\
 })
 
-#define __wait_event_killable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,		\
+#define __wait_event_killable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_KILLABLE, 1, 0,			\
 		      schedule())
 
-#define wait_event_killable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable_exclusive(wq, condition);	\
-	__ret;								\
+#define wait_event_killable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable_exclusive(wq, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_freezable_exclusive(wq, condition)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+#define __wait_event_freezable_exclusive(wq, condition)				\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
 			schedule(); try_to_freeze())
 
-#define wait_event_freezable_exclusive(wq, condition)			\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_freezable_exclusive(wq, condition);\
-	__ret;								\
+#define wait_event_freezable_exclusive(wq, condition)				\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_freezable_exclusive(wq, condition);	\
+	__ret;									\
 })
 
-extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
-extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 
-#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
-({									\
-	int __ret;							\
-	DEFINE_WAIT(__wait);						\
-	if (exclusive)							\
-		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
-	do {								\
-		__ret = fn(&(wq), &__wait);				\
-		if (__ret)						\
-			break;						\
-	} while (!(condition));						\
-	__remove_wait_queue(&(wq), &__wait);				\
-	__set_current_state(TASK_RUNNING);				\
-	__ret;								\
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn)		\
+({										\
+	int __ret;								\
+	DEFINE_WAIT(__wait);							\
+	if (exclusive)								\
+		__wait.flags |= WQ_FLAG_EXCLUSIVE;				\
+	do {									\
+		__ret = fn(&(wq), &__wait);					\
+		if (__ret)							\
+			break;							\
+	} while (!(condition));							\
+	__remove_wait_queue(&(wq), &__wait);					\
+	__set_current_state(TASK_RUNNING);					\
+	__ret;									\
 })
 
 
@@ -663,8 +636,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked(wq, condition)			\
-	((condition)							\
+#define wait_event_interruptible_locked(wq, condition)				\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
@@ -690,8 +663,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_locked_irq(wq, condition)		\
-	((condition)							\
+#define wait_event_interruptible_locked_irq(wq, condition)			\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
@@ -721,8 +694,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
@@ -752,12 +725,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
-	((condition)							\
+#define wait_event_interruptible_exclusive_locked_irq(wq, condition)		\
+	((condition)								\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
-#define __wait_event_killable(wq, condition)				\
+#define __wait_event_killable(wq, condition)					\
 	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
 /**
@@ -775,21 +748,21 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * The function will return -ERESTARTSYS if it was interrupted by a
  * signal and 0 if @condition evaluated to true.
  */
-#define wait_event_killable(wq, condition)				\
-({									\
-	int __ret = 0;							\
-	might_sleep();							\
-	if (!(condition))						\
-		__ret = __wait_event_killable(wq, condition);		\
-	__ret;								\
+#define wait_event_killable(wq_head, condition)					\
+({										\
+	int __ret = 0;								\
+	might_sleep();								\
+	if (!(condition))							\
+		__ret = __wait_event_killable(wq_head, condition);		\
+	__ret;									\
 })
 
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
-			    spin_unlock_irq(&lock);			\
-			    cmd;					\
-			    schedule();					\
+#define __wait_event_lock_irq(wq_head, condition, lock, cmd)			\
+	(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    spin_unlock_irq(&lock);				\
+			    cmd;						\
+			    schedule();						\
 			    spin_lock_irq(&lock))
 
 /**
@@ -797,7 +770,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  *			     condition is checked under the lock. This
  *			     is expected to be called with the lock
  *			     taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd
  *	  and schedule() and reacquired afterwards.
@@ -806,7 +779,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -815,11 +788,11 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
  * dropped before invoking the cmd and going to sleep and is reacquired
  * afterwards.
  */
-#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+#define wait_event_lock_irq_cmd(wq_head, condition, lock, cmd)			\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, cmd);			\
 } while (0)
 
 /**
@@ -827,14 +800,14 @@ do {									\
  *			 condition is checked under the lock. This
  *			 is expected to be called with the lock
  *			 taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  * @condition evaluates to true. The @condition is checked each time
- * the waitqueue @wq is woken up.
+ * the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -842,26 +815,26 @@ do {									\
  * This is supposed to be called while holding the lock. The lock is
  * dropped before going to sleep and is reacquired afterwards.
  */
-#define wait_event_lock_irq(wq, condition, lock)			\
-do {									\
-	if (condition)							\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, );			\
+#define wait_event_lock_irq(wq_head, condition, lock)				\
+do {										\
+	if (condition)								\
+		break;								\
+	__wait_event_lock_irq(wq_head, condition, lock, );			\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
-		      spin_unlock_irq(&lock);				\
-		      cmd;						\
-		      schedule();					\
+#define __wait_event_interruptible_lock_irq(wq_head, condition, lock, cmd)	\
+	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+		      spin_unlock_irq(&lock);					\
+		      cmd;							\
+		      schedule();						\
 		      spin_lock_irq(&lock))
 
 /**
  * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected to
  *		be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before cmd and
  *	  schedule() and reacquired afterwards.
@@ -870,7 +843,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -882,27 +855,27 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock, cmd);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_cmd(wq_head, condition, lock, cmd)	\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock, cmd);		\
+	__ret;									\
 })
 
 /**
  * wait_event_interruptible_lock_irq - sleep until a condition gets true.
  *		The condition is checked under the lock. This is expected
  *		to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -913,28 +886,28 @@ do {									\
  * The macro will return -ERESTARTSYS if it was interrupted by a signal
  * and 0 if @condition evaluated to true.
  */
-#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
-({									\
-	int __ret = 0;							\
-	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock,);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq(wq_head, condition, lock)		\
+({										\
+	int __ret = 0;								\
+	if (!(condition))							\
+		__ret = __wait_event_interruptible_lock_irq(wq_head,		\
+						condition, lock,);		\
+	__ret;									\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
-						    lock, timeout)	\
-	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, timeout,			\
-		      spin_unlock_irq(&lock);				\
-		      __ret = schedule_timeout(__ret);			\
+#define __wait_event_interruptible_lock_irq_timeout(wq_head, condition,		\
+						    lock, timeout)		\
+	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
+		      TASK_INTERRUPTIBLE, 0, timeout,				\
+		      spin_unlock_irq(&lock);					\
+		      __ret = schedule_timeout(__ret);				\
 		      spin_lock_irq(&lock));
 
 /**
  * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
  *		true or a timeout elapses. The condition is checked under
  *		the lock. This is expected to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
  *	  and reacquired afterwards.
@@ -942,7 +915,7 @@ do {									\
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or signal is received. The @condition is
- * checked each time the waitqueue @wq is woken up.
+ * checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -954,263 +927,42 @@ do {									\
  * was interrupted by a signal, and the remaining jiffies otherwise
  * if the condition evaluated to true before the timeout elapsed.
  */
-#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
-						  timeout)		\
-({									\
-	long __ret = timeout;						\
-	if (!___wait_cond_timeout(condition))				\
-		__ret = __wait_event_interruptible_lock_irq_timeout(	\
-					wq, condition, lock, timeout);	\
-	__ret;								\
+#define wait_event_interruptible_lock_irq_timeout(wq_head, condition, lock,	\
+						  timeout)			\
+({										\
+	long __ret = timeout;							\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_interruptible_lock_irq_timeout(		\
+					wq_head, condition, lock, timeout);	\
+	__ret;									\
 })
 
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
 
-#define DEFINE_WAIT_FUNC(name, function)				\
-	wait_queue_t name = {						\
-		.private	= current,				\
-		.func		= function,				\
-		.task_list	= LIST_HEAD_INIT((name).task_list),	\
+#define DEFINE_WAIT_FUNC(name, function)					\
+	struct wait_queue_entry name = {					\
+		.private	= current,					\
+		.func		= function,					\
+		.entry		= LIST_HEAD_INIT((name).entry),			\
 	}
 
 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
 
-#define DEFINE_WAIT_BIT(name, word, bit)				\
-	struct wait_bit_queue name = {					\
-		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
-		.wait	= {						\
-			.private	= current,			\
-			.func		= wake_bit_function,		\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
-		},							\
-	}
-
-#define init_wait(wait)							\
-	do {								\
-		(wait)->private = current;				\
-		(wait)->func = autoremove_wake_function;		\
-		INIT_LIST_HEAD(&(wait)->task_list);			\
-		(wait)->flags = 0;					\
+#define init_wait(wait)								\
+	do {									\
+		(wait)->private = current;					\
+		(wait)->func = autoremove_wake_function;			\
+		INIT_LIST_HEAD(&(wait)->entry);					\
+		(wait)->flags = 0;						\
 	} while (0)
 
-
-extern int bit_wait(struct wait_bit_key *, int);
-extern int bit_wait_io(struct wait_bit_key *, int);
-extern int bit_wait_timeout(struct wait_bit_key *, int);
-extern int bit_wait_io_timeout(struct wait_bit_key *, int);
-
-/**
- * wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait,
-				       mode);
-}
-
-/**
- * wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared.  This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit,
-				       bit_wait_io,
-				       mode);
-}
-
-/**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- * @timeout: timeout, in jiffies
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
- *
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
- */
-static inline int
-wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
-		    unsigned long timeout)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_timeout(word, bit,
-					       bit_wait_timeout,
-					       mode, timeout);
-}
-
-/**
- * wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
- */
-static inline int
-wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
-		   unsigned mode)
-{
-	might_sleep();
-	if (!test_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit(word, bit, action, mode);
-}
-
-/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
-}
-
-/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it.  This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
-}
-
-/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
- *
- * Returns zero if the bit was (eventually) found to be clear and was
- * set.  Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
- */
-static inline int
-wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
-			unsigned mode)
-{
-	might_sleep();
-	if (!test_and_set_bit(bit, word))
-		return 0;
-	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
-}
-
-/**
- * wait_on_atomic_t - Wait for an atomic_t to become 0
- * @val: The atomic value being waited on, a kernel virtual address
- * @action: the function used to sleep, which may take special actions
- * @mode: the task state to sleep in
- *
- * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
- * the purpose of getting a waitqueue, but we set the key to a bit number
- * outside of the target 'word'.
- */
-static inline
-int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
-{
-	might_sleep();
-	if (atomic_read(val) == 0)
-		return 0;
-	return out_of_line_wait_on_atomic_t(val, action, mode);
-}
-
 #endif /* _LINUX_WAIT_H */

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
new file mode 100644
index 0000000..12b2666
--- /dev/null
+++ b/include/linux/wait_bit.h

@@ -0,0 +1,261 @@
+#ifndef _LINUX_WAIT_BIT_H
+#define _LINUX_WAIT_BIT_H
+
+/*
+ * Linux wait-bit related types and methods:
+ */
+#include <linux/wait.h>
+
+struct wait_bit_key {
+	void			*flags;
+	int			bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR	-1
+	unsigned long		timeout;
+};
+
+struct wait_bit_queue_entry {
+	struct wait_bit_key	key;
+	struct wait_queue_entry	wq_entry;
+};
+
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)					\
+	{ .flags = word, .bit_nr = bit, }
+
+#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p)					\
+	{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
+
+typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
+void wake_up_bit(void *word, int bit);
+void wake_up_atomic_t(atomic_t *p);
+int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode);
+struct wait_queue_head *bit_waitqueue(void *word, int bit);
+extern void __init wait_bit_init(void);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
+
+#define DEFINE_WAIT_BIT(name, word, bit)					\
+	struct wait_bit_queue_entry name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),			\
+		.wq_entry = {							\
+			.private	= current,				\
+			.func		= wake_bit_function,			\
+			.entry		=					\
+				LIST_HEAD_INIT((name).wq_entry.entry),		\
+		},								\
+	}
+
+extern int bit_wait(struct wait_bit_key *key, int bit);
+extern int bit_wait_io(struct wait_bit_key *key, int bit);
+extern int bit_wait_timeout(struct wait_bit_key *key, int bit);
+extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit);
+
+/**
+ * wait_on_bit - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit.
+ * For instance, if one were to have waiters on a bitflag, one would
+ * call wait_on_bit() in threads waiting for the bit to clear.
+ * One uses wait_on_bit() where one is waiting for the bit to clear,
+ * but has no intention of setting it.
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait,
+				       mode);
+}
+
+/**
+ * wait_on_bit_io - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared.  This is similar to wait_on_bit(), but calls
+ * io_schedule() instead of schedule() for the actual waiting.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait_io,
+				       mode);
+}
+
+/**
+ * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ * @timeout: timeout, in jiffies
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared. This is similar to wait_on_bit(), except also takes a
+ * timeout parameter.
+ *
+ * Returned value will be zero if the bit was cleared before the
+ * @timeout elapsed, or non-zero if the @timeout elapsed or process
+ * received a signal and the mode permitted wakeup on that signal.
+ */
+static inline int
+wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
+		    unsigned long timeout)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_timeout(word, bit,
+					       bit_wait_timeout,
+					       mode, timeout);
+}
+
+/**
+ * wait_on_bit_action - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared, and allow the waiting action to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
+		   unsigned mode)
+{
+	might_sleep();
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit, action, mode);
+}
+
+/**
+ * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that waits on a bit
+ * when one intends to set it, for instance, trying to lock bitflags.
+ * For instance, if one were to have waiters trying to set bitflag
+ * and waiting for it to clear before setting it, one would call
+ * wait_on_bit() in threads waiting to be able to set the bit.
+ * One uses wait_on_bit_lock() where one is waiting for the bit to
+ * clear with the intention of setting it, and when done, clearing it.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+}
+
+/**
+ * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to atomically set it.  This is similar
+ * to wait_on_bit(), but calls io_schedule() instead of schedule()
+ * for the actual waiting.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
+}
+
+/**
+ * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to set it, and allow the waiting action
+ * to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
+			unsigned mode)
+{
+	might_sleep();
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
+}
+
+/**
+ * wait_on_atomic_t - Wait for an atomic_t to become 0
+ * @val: The atomic value being waited on, a kernel virtual address
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Wait for an atomic_t to become 0.  We abuse the bit-wait waitqueue table for
+ * the purpose of getting a waitqueue, but we set the key to a bit number
+ * outside of the target 'word'.
+ */
+static inline
+int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
+{
+	might_sleep();
+	if (atomic_read(val) == 0)
+		return 0;
+	return out_of_line_wait_on_atomic_t(val, action, mode);
+}
+
+#endif /* _LINUX_WAIT_BIT_H */

diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index eb50ce5..298f996 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h

@@ -29,7 +29,7 @@ struct edid;
 struct cec_adapter;
 struct cec_notifier;
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#if IS_REACHABLE(CONFIG_CEC_CORE) && IS_ENABLED(CONFIG_CEC_NOTIFIER)
 
 /**
  * cec_notifier_get - find or create a new cec_notifier for the given device.
@@ -106,6 +106,16 @@ static inline void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 {
 }
 
+static inline void cec_notifier_register(struct cec_notifier *n,
+			 struct cec_adapter *adap,
+			 void (*callback)(struct cec_adapter *adap, u16 pa))
+{
+}
+
+static inline void cec_notifier_unregister(struct cec_notifier *n)
+{
+}
+
 #endif
 
 #endif

diff --git a/include/media/cec.h b/include/media/cec.h
index b8eb895..201f060 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h

@@ -173,7 +173,7 @@ struct cec_adapter {
 	bool passthrough;
 	struct cec_log_addrs log_addrs;
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 	struct cec_notifier *notifier;
 #endif
 
@@ -206,7 +206,7 @@ static inline bool cec_is_sink(const struct cec_adapter *adap)
 #define cec_phys_addr_exp(pa) \
 	((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf
 
-#if IS_ENABLED(CONFIG_CEC_CORE)
+#if IS_REACHABLE(CONFIG_CEC_CORE)
 struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
 		void *priv, const char *name, u32 caps, u8 available_las);
 int cec_register_adapter(struct cec_adapter *adap, struct device *parent);
@@ -300,7 +300,7 @@ u16 cec_phys_addr_for_input(u16 phys_addr, u8 input);
  */
 int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port);
 
-#ifdef CONFIG_MEDIA_CEC_NOTIFIER
+#ifdef CONFIG_CEC_NOTIFIER
 void cec_register_cec_notifier(struct cec_adapter *adap,
 			       struct cec_notifier *notifier);
 #endif

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd60ecc..75e612a 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h

@@ -62,7 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
-	wait_queue_t		peer_wake;
+	wait_queue_entry_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)

diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33..cfc0437 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h

@@ -107,10 +107,16 @@ struct dst_entry {
 	};
 };
 
+struct dst_metrics {
+	u32		metrics[RTAX_MAX];
+	atomic_t	refcnt;
+};
+extern const struct dst_metrics dst_default_metrics;
+
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
-extern const u32 dst_default_metrics[];
 
 #define DST_METRICS_READ_ONLY		0x1UL
+#define DST_METRICS_REFCOUNTED		0x2UL
 #define DST_METRICS_FLAGS		0x3UL
 #define __DST_METRICS_PTR(Y)	\
 	((u32 *)((Y) & ~DST_METRICS_FLAGS))

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6692c57..f7f6aa7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h

@@ -114,11 +114,11 @@ struct fib_info {
 	__be32			fib_prefsrc;
 	u32			fib_tb_id;
 	u32			fib_priority;
-	u32			*fib_metrics;
-#define fib_mtu fib_metrics[RTAX_MTU-1]
-#define fib_window fib_metrics[RTAX_WINDOW-1]
-#define fib_rtt fib_metrics[RTAX_RTT-1]
-#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
+	struct dst_metrics	*fib_metrics;
+#define fib_mtu fib_metrics->metrics[RTAX_MTU-1]
+#define fib_window fib_metrics->metrics[RTAX_WINDOW-1]
+#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
+#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
 	int			fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index dbf0abb..3e505bb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h

@@ -1007,6 +1007,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
  */
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
+extern const struct proto_ops inet6_sockraw_ops;
 
 struct group_source_req;
 struct group_filter;

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e04fa769..c519bb5b 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h

@@ -9,6 +9,7 @@
 
 #ifndef _NF_CONNTRACK_HELPER_H
 #define _NF_CONNTRACK_HELPER_H
+#include <linux/refcount.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -26,6 +27,7 @@ struct nf_conntrack_helper {
 	struct hlist_node hnode;	/* Internal use. */
 
 	char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */
+	refcount_t refcnt;
 	struct module *me;		/* pointer to self */
 	const struct nf_conntrack_expect_policy *expect_policy;
 
@@ -79,6 +81,8 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
 struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
 							       u16 l3num,
 							       u8 protonum);
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper);
+
 void nf_ct_helper_init(struct nf_conntrack_helper *helper,
 		       u16 l3num, u16 protonum, const char *name,
 		       u16 default_port, u16 spec_port, u32 id,

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 028faec8..8a8bab8 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h

@@ -176,7 +176,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type);
+void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
 

diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index f31fb63..3248bea 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h

@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <net/act_api.h>
+#include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum {
 	struct tc_action common;
@@ -11,4 +12,18 @@ struct tcf_csum {
 };
 #define to_tcf_csum(a) ((struct tcf_csum *)a)
 
+static inline bool is_tcf_csum(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_CSUM)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_csum_update_flags(const struct tc_action *a)
+{
+	return to_tcf_csum(a)->update_flags;
+}
+
 #endif /* __NET_TC_CSUM_H */

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 38a7427..be6223c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h

@@ -924,7 +924,7 @@ struct tcp_congestion_ops {
 	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
 	/* call when ack arrives (optional) */
 	void (*in_ack_event)(struct sock *sk, u32 flags);
-	/* new value of cwnd after loss (optional) */
+	/* new value of cwnd after loss (required) */
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);

diff --git a/include/net/wext.h b/include/net/wext.h
index 3459119..454ff76 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h

@@ -6,7 +6,7 @@
 struct net;
 
 #ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 		      void __user *arg);
 int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 			     unsigned long arg);
@@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 struct iw_statistics *get_wireless_stats(struct net_device *dev);
 int call_commit_handler(struct net_device *dev);
 #else
-static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 				    void __user *arg)
 {
 	return -EINVAL;

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6793a30c..62f5a25 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h

@@ -979,10 +979,6 @@ struct xfrm_dst {
 	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct flowi *origin;
-	struct xfrm_selector *partner;
-#endif
 	u32 xfrm_genid;
 	u32 policy_genid;
 	u32 route_mtu_cached;
@@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 	dst_release(xdst->route);
 	if (likely(xdst->u.dst.xfrm))
 		xfrm_state_put(xdst->u.dst.xfrm);
-#ifdef CONFIG_XFRM_SUB_POLICY
-	kfree(xdst->origin);
-	xdst->origin = NULL;
-	kfree(xdst->partner);
-	xdst->partner = NULL;
-#endif
 }
 #endif
 
@@ -1860,8 +1850,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 }
 #endif
 
-#ifdef CONFIG_XFRM_OFFLOAD
 void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
@@ -1887,10 +1878,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	}
 }
 #else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
 static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	return 0;

diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index f5f70e3..355b81f 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h

@@ -158,7 +158,6 @@ enum sa_path_rec_type {
 };
 
 struct sa_path_rec_ib {
-	__be64       service_id;
 	__be16       dlid;
 	__be16       slid;
 	u8           raw_traffic;
@@ -174,7 +173,6 @@ struct sa_path_rec_roce {
 };
 
 struct sa_path_rec_opa {
-	__be64       service_id;
 	__be32       dlid;
 	__be32       slid;
 	u8           raw_traffic;
@@ -189,6 +187,7 @@ struct sa_path_rec_opa {
 struct sa_path_rec {
 	union ib_gid dgid;
 	union ib_gid sgid;
+	__be64       service_id;
 	/* reserved */
 	__be32       flow_label;
 	u8           hop_limit;
@@ -262,7 +261,7 @@ static inline void path_conv_opa_to_ib(struct sa_path_rec *ib,
 		ib->ib.dlid	= htons(ntohl(opa->opa.dlid));
 		ib->ib.slid	= htons(ntohl(opa->opa.slid));
 	}
-	ib->ib.service_id	= opa->opa.service_id;
+	ib->service_id		= opa->service_id;
 	ib->ib.raw_traffic	= opa->opa.raw_traffic;
 }
 
@@ -281,7 +280,7 @@ static inline void path_conv_ib_to_opa(struct sa_path_rec *opa,
 	}
 	opa->opa.slid		= slid;
 	opa->opa.dlid		= dlid;
-	opa->opa.service_id	= ib->ib.service_id;
+	opa->service_id		= ib->service_id;
 	opa->opa.raw_traffic	= ib->ib.raw_traffic;
 }
 
@@ -591,15 +590,6 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec)
 		(rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2));
 }
 
-static inline void sa_path_set_service_id(struct sa_path_rec *rec,
-					  __be64 service_id)
-{
-	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
-		rec->ib.service_id = service_id;
-	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
-		rec->opa.service_id = service_id;
-}
-
 static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid)
 {
 	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
@@ -625,15 +615,6 @@ static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec,
 		rec->opa.raw_traffic = raw_traffic;
 }
 
-static inline __be64 sa_path_get_service_id(struct sa_path_rec *rec)
-{
-	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
-		return rec->ib.service_id;
-	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
-		return rec->opa.service_id;
-	return 0;
-}
-
 static inline __be32 sa_path_get_slid(struct sa_path_rec *rec)
 {
 	if (rec->rec_type == SA_PATH_REC_TYPE_IB)

diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 5852661..348c102 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h

@@ -10,9 +10,6 @@ struct ibnl_client_cbs {
 	struct module *module;
 };
 
-int ibnl_init(void);
-void ibnl_cleanup(void);
-
 /**
  * Add a a client to the list of IB netlink exporters.
  * @index: Index of the added client
@@ -77,11 +74,4 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
 int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
 			unsigned int group, gfp_t flags);
 
-/**
- * Check if there are any listeners to the netlink group
- * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
- */
-int ibnl_chk_listeners(unsigned int group);
-
 #endif /* _RDMA_NETLINK_H */

diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca8..a29d308 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h

@@ -157,7 +157,7 @@ struct osd_request {
 
 	osd_req_done_fn *async_done;
 	void *async_private;
-	int async_error;
+	blk_status_t async_error;
 	int req_errors;
 };
 

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93..da9bf2b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h

@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);

diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9..e0afa44 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h

@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
 		kfree(req->cmd);
 }
 
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
 
 #endif /* _SCSI_SCSI_REQUEST_H */

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 275581d..5f17fb7 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h

@@ -557,6 +557,7 @@ struct iscsi_conn {
 #define LOGIN_FLAGS_READ_ACTIVE		1
 #define LOGIN_FLAGS_CLOSED		2
 #define LOGIN_FLAGS_READY		4
+#define LOGIN_FLAGS_INITIAL_PDU		8
 	unsigned long		login_flags;
 	struct delayed_work	login_work;
 	struct delayed_work	login_cleanup_work;

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index e3facb3..91dc089 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h

@@ -742,6 +742,7 @@ TRACE_EVENT(rcu_torture_read,
  *	"OnlineQ": _rcu_barrier() found online CPU with callbacks.
  *	"OnlineNQ": _rcu_barrier() found online CPU, no callbacks.
  *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
+ *	"IRQNQ": An rcu_barrier_callback() callback found no callbacks.
  *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"LastCB": An rcu_barrier_callback() invoked the last callback.
  *	"Inc2": _rcu_barrier() piggyback check counter incremented.

diff --git a/include/uapi/linux/a.out.h b/include/uapi/linux/a.out.h
index 7caf44c..295cd3e 100644
--- a/include/uapi/linux/a.out.h
+++ b/include/uapi/linux/a.out.h

@@ -112,24 +112,7 @@ enum machine_type {
 #define N_TXTADDR(x) (N_MAGIC(x) == QMAGIC ? PAGE_SIZE : 0)
 #endif
 
-/* Address of data segment in memory after it is loaded.
-   Note that it is up to you to define SEGMENT_SIZE
-   on machines not listed here.  */
-#if defined(vax) || defined(hp300) || defined(pyr)
-#define SEGMENT_SIZE page_size
-#endif
-#ifdef	sony
-#define	SEGMENT_SIZE	0x2000
-#endif	/* Sony.  */
-#ifdef is68k
-#define SEGMENT_SIZE 0x20000
-#endif
-#if defined(m68k) && defined(PORTAR)
-#define PAGE_SIZE 0x400
-#define SEGMENT_SIZE PAGE_SIZE
-#endif
-
-#ifdef linux
+/* Address of data segment in memory after it is loaded. */
 #ifndef __KERNEL__
 #include <unistd.h>
 #endif
@@ -142,7 +125,6 @@ enum machine_type {
 #endif
 #endif
 #endif
-#endif
 
 #define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
 
@@ -260,13 +242,7 @@ struct relocation_info
   unsigned int r_extern:1;
   /* Four bits that aren't used, but when writing an object file
      it is desirable to clear them.  */
-#ifdef NS32K
-  unsigned r_bsr:1;
-  unsigned r_disp:1;
-  unsigned r_pad:2;
-#else
   unsigned int r_pad:4;
-#endif
 };
 #endif /* no N_RELOCATION_INFO_DECLARED.  */
 

diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f..a2d4a8a 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h

@@ -79,7 +79,7 @@ struct io_event {
 struct iocb {
 	/* these are internal to the kernel/libc. */
 	__u64	aio_data;	/* data to be returned in event's data */
-	__u32	PADDED(aio_key, aio_reserved1);
+	__u32	PADDED(aio_key, aio_rw_flags);
 				/* the kernel sets aio_key to the req # */
 
 	/* common fields */

diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index aa63451..1953f8d 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h

@@ -26,7 +26,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
+ * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed
  * back to the kernel via ioctl from userspace. On architectures where 32- and
  * 64-bit userspace binaries can be executed it's important that the size of
  * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
@@ -49,7 +49,7 @@ struct autofs_packet_hdr {
 
 struct autofs_packet_missing {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };	

diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 7c6da42..65b72d02 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h

@@ -108,7 +108,7 @@ enum autofs_notify {
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	int len;
 	char name[NAME_MAX+1];
 };
@@ -123,7 +123,7 @@ union autofs_packet_union {
 /* autofs v5 common packet struct */
 struct autofs_v5_packet {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_token;
+	autofs_wqt_t wait_queue_entry_token;
 	__u32 dev;
 	__u64 ino;
 	__u32 uid;

diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1e..2f6c77a 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h

@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	35
+#define DM_VERSION_MINOR	36
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2016-06-23)"
+#define DM_VERSION_EXTRA	"-ioctl (2017-06-09)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d179d77..7d4a594 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h

@@ -1486,8 +1486,10 @@ enum ethtool_link_mode_bit_indices {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */
-/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
 #define SPEED_10		10
 #define SPEED_100		100
 #define SPEED_1000		1000

diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6..ec69d55 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h

@@ -43,6 +43,27 @@
 /* (1U << 31) is reserved for signed error codes */
 
 /*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET	0
+#define RWH_WRITE_LIFE_NONE	1
+#define RWH_WRITE_LIFE_SHORT	2
+#define RWH_WRITE_LIFE_MEDIUM	3
+#define RWH_WRITE_LIFE_LONG	4
+#define RWH_WRITE_LIFE_EXTREME	5
+
+/*
  * Types of directory notifications that may be requested.
  */
 #define DN_ACCESS	0x00000001	/* File accessed */

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a5..27d8c36 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h

@@ -360,5 +360,9 @@ struct fscrypt_key {
 #define RWF_HIPRI			0x00000001 /* high priority request, poll if possible */
 #define RWF_DSYNC			0x00000002 /* per-IO O_DSYNC */
 #define RWF_SYNC			0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT			0x00000008 /* per-IO, return -EAGAIN if operation would block */
+
+#define RWF_SUPPORTED			(RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+					 RWF_NOWAIT)
 
 #endif /* _UAPI_LINUX_FS_H */

diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 201c6644..ef16df0 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h

@@ -70,8 +70,8 @@ struct keyctl_dh_params {
 };
 
 struct keyctl_kdf_params {
-	char *hashname;
-	char *otherinfo;
+	char __user *hashname;
+	char __user *otherinfo;
 	__u32 otherinfolen;
 	__u32 __spare[8];
 };

diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec..a3960f9 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h

@@ -22,6 +22,7 @@ enum {
 	LO_FLAGS_AUTOCLEAR	= 4,
 	LO_FLAGS_PARTSCAN	= 8,
 	LO_FLAGS_DIRECT_IO	= 16,
+	LO_FLAGS_BLOCKSIZE	= 32,
 };
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
 	__u64		   lo_init[2];
 };
 
+#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
+
 /*
  * Loop filter types
  */

diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 155e33f..a50527e 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h

@@ -41,10 +41,14 @@ enum {
 #define NBD_FLAG_HAS_FLAGS	(1 << 0) /* nbd-server supports flags */
 #define NBD_FLAG_READ_ONLY	(1 << 1) /* device is read-only */
 #define NBD_FLAG_SEND_FLUSH	(1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_SEND_FUA	(1 << 3) /* send FUA (forced unit access) */
 /* there is a gap here to match userspace */
 #define NBD_FLAG_SEND_TRIM	(1 << 5) /* send trim/discard */
 #define NBD_FLAG_CAN_MULTI_CONN	(1 << 8)	/* Server supports multiple connections per export. */
 
+/* values for cmd flags in the upper 16 bits of request type */
+#define NBD_CMD_FLAG_FUA	(1 << 16) /* FUA (forced unit access) op */
+
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT	(1 << 0) /* delete the nbd device on
 						    disconnect. */

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 61b7d36..156ee4c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h

@@ -343,6 +343,7 @@ enum ovs_key_attr {
 #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
 
 enum ovs_tunnel_key_attr {
+	/* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
 	OVS_TUNNEL_KEY_ATTR_ID,                 /* be64 Tunnel ID */
 	OVS_TUNNEL_KEY_ATTR_IPV4_SRC,           /* be32 src IP address. */
 	OVS_TUNNEL_KEY_ATTR_IPV4_DST,           /* be32 dst IP address. */

diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5f0fe01..e2a6c7b 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h

@@ -47,5 +47,6 @@
  * For the sched_{set,get}attr() calls
  */
 #define SCHED_FLAG_RESET_ON_FORK	0x01
+#define SCHED_FLAG_RECLAIM		0x02
 
 #endif /* _UAPI_LINUX_SCHED_H */

diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 3738e5f..8ef82f4 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h

@@ -22,33 +22,21 @@
 
 typedef struct {
 	__u8 b[16];
-} uuid_le;
+} guid_t;
 
-typedef struct {
-	__u8 b[16];
-} uuid_be;
-
-#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_le)								\
+#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)			\
+((guid_t)								\
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
    (b) & 0xff, ((b) >> 8) & 0xff,					\
    (c) & 0xff, ((c) >> 8) & 0xff,					\
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
-((uuid_be)								\
-{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
-   ((b) >> 8) & 0xff, (b) & 0xff,					\
-   ((c) >> 8) & 0xff, (c) & 0xff,					\
-   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-
+/* backwards compatibility, don't use in new code */
+typedef guid_t uuid_le;
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)		\
+	GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
 #define NULL_UUID_LE							\
 	UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
-#define NULL_UUID_BE							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-		0x00, 0x00, 0x00, 0x00)
-
+	     0x00, 0x00, 0x00, 0x00)
 
 #endif /* _UAPI_LINUX_UUID_H_ */

diff --git a/init/Kconfig b/init/Kconfig
index 1d3475f..ee0f03b 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -472,354 +472,7 @@
 
 endmenu # "CPU/Task time and stats accounting"
 
-menu "RCU Subsystem"
-
-config TREE_RCU
-	bool
-	default y if !PREEMPT && SMP
-	help
-	  This option selects the RCU implementation that is
-	  designed for very large SMP system with hundreds or
-	  thousands of CPUs.  It also scales down nicely to
-	  smaller systems.
-
-config PREEMPT_RCU
-	bool
-	default y if PREEMPT
-	help
-	  This option selects the RCU implementation that is
-	  designed for very large SMP systems with hundreds or
-	  thousands of CPUs, but for which real-time response
-	  is also required.  It also scales down nicely to
-	  smaller systems.
-
-	  Select this option if you are unsure.
-
-config TINY_RCU
-	bool
-	default y if !PREEMPT && !SMP
-	help
-	  This option selects the RCU implementation that is
-	  designed for UP systems from which real-time response
-	  is not required.  This option greatly reduces the
-	  memory footprint of RCU.
-
-config RCU_EXPERT
-	bool "Make expert-level adjustments to RCU configuration"
-	default n
-	help
-	  This option needs to be enabled if you wish to make
-	  expert-level adjustments to RCU configuration.  By default,
-	  no such adjustments can be made, which has the often-beneficial
-	  side-effect of preventing "make oldconfig" from asking you all
-	  sorts of detailed questions about how you would like numerous
-	  obscure RCU options to be set up.
-
-	  Say Y if you need to make expert-level adjustments to RCU.
-
-	  Say N if you are unsure.
-
-config SRCU
-	bool
-	default y
-	help
-	  This option selects the sleepable version of RCU. This version
-	  permits arbitrary sleeping or blocking within RCU read-side critical
-	  sections.
-
-config CLASSIC_SRCU
-	bool "Use v4.11 classic SRCU implementation"
-	default n
-	depends on RCU_EXPERT && SRCU
-	help
-	  This option selects the traditional well-tested classic SRCU
-	  implementation from v4.11, as might be desired for enterprise
-	  Linux distributions.  Without this option, the shiny new
-	  Tiny SRCU and Tree SRCU implementations are used instead.
-	  At some point, it is hoped that Tiny SRCU and Tree SRCU
-	  will accumulate enough test time and confidence to allow
-	  Classic SRCU to be dropped entirely.
-
-	  Say Y if you need a rock-solid SRCU.
-
-	  Say N if you would like help test Tree SRCU.
-
-config TINY_SRCU
-	bool
-	default y if SRCU && TINY_RCU && !CLASSIC_SRCU
-	help
-	  This option selects the single-CPU non-preemptible version of SRCU.
-
-config TREE_SRCU
-	bool
-	default y if SRCU && !TINY_RCU && !CLASSIC_SRCU
-	help
-	  This option selects the full-fledged version of SRCU.
-
-config TASKS_RCU
-	bool
-	default n
-	select SRCU
-	help
-	  This option enables a task-based RCU implementation that uses
-	  only voluntary context switch (not preemption!), idle, and
-	  user-mode execution as quiescent states.
-
-config RCU_STALL_COMMON
-	def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
-	help
-	  This option enables RCU CPU stall code that is common between
-	  the TINY and TREE variants of RCU.  The purpose is to allow
-	  the tiny variants to disable RCU CPU stall warnings, while
-	  making these warnings mandatory for the tree variants.
-
-config RCU_NEED_SEGCBLIST
-	def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU )
-
-config CONTEXT_TRACKING
-       bool
-
-config CONTEXT_TRACKING_FORCE
-	bool "Force context tracking"
-	depends on CONTEXT_TRACKING
-	default y if !NO_HZ_FULL
-	help
-	  The major pre-requirement for full dynticks to work is to
-	  support the context tracking subsystem. But there are also
-	  other dependencies to provide in order to make the full
-	  dynticks working.
-
-	  This option stands for testing when an arch implements the
-	  context tracking backend but doesn't yet fullfill all the
-	  requirements to make the full dynticks feature working.
-	  Without the full dynticks, there is no way to test the support
-	  for context tracking and the subsystems that rely on it: RCU
-	  userspace extended quiescent state and tickless cputime
-	  accounting. This option copes with the absence of the full
-	  dynticks subsystem by forcing the context tracking on all
-	  CPUs in the system.
-
-	  Say Y only if you're working on the development of an
-	  architecture backend for the context tracking.
-
-	  Say N otherwise, this option brings an overhead that you
-	  don't want in production.
-
-
-config RCU_FANOUT
-	int "Tree-based hierarchical RCU fanout value"
-	range 2 64 if 64BIT
-	range 2 32 if !64BIT
-	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
-	default 64 if 64BIT
-	default 32 if !64BIT
-	help
-	  This option controls the fanout of hierarchical implementations
-	  of RCU, allowing RCU to work efficiently on machines with
-	  large numbers of CPUs.  This value must be at least the fourth
-	  root of NR_CPUS, which allows NR_CPUS to be insanely large.
-	  The default value of RCU_FANOUT should be used for production
-	  systems, but if you are stress-testing the RCU implementation
-	  itself, small RCU_FANOUT values allow you to test large-system
-	  code paths on small(er) systems.
-
-	  Select a specific number if testing RCU itself.
-	  Take the default if unsure.
-
-config RCU_FANOUT_LEAF
-	int "Tree-based hierarchical RCU leaf-level fanout value"
-	range 2 64 if 64BIT
-	range 2 32 if !64BIT
-	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
-	default 16
-	help
-	  This option controls the leaf-level fanout of hierarchical
-	  implementations of RCU, and allows trading off cache misses
-	  against lock contention.  Systems that synchronize their
-	  scheduling-clock interrupts for energy-efficiency reasons will
-	  want the default because the smaller leaf-level fanout keeps
-	  lock contention levels acceptably low.  Very large systems
-	  (hundreds or thousands of CPUs) will instead want to set this
-	  value to the maximum value possible in order to reduce the
-	  number of cache misses incurred during RCU's grace-period
-	  initialization.  These systems tend to run CPU-bound, and thus
-	  are not helped by synchronized interrupts, and thus tend to
-	  skew them, which reduces lock contention enough that large
-	  leaf-level fanouts work well.  That said, setting leaf-level
-	  fanout to a large number will likely cause problematic
-	  lock contention on the leaf-level rcu_node structures unless
-	  you boot with the skew_tick kernel parameter.
-
-	  Select a specific number if testing RCU itself.
-
-	  Select the maximum permissible value for large systems, but
-	  please understand that you may also need to set the skew_tick
-	  kernel boot parameter to avoid contention on the rcu_node
-	  structure's locks.
-
-	  Take the default if unsure.
-
-config RCU_FAST_NO_HZ
-	bool "Accelerate last non-dyntick-idle CPU's grace periods"
-	depends on NO_HZ_COMMON && SMP && RCU_EXPERT
-	default n
-	help
-	  This option permits CPUs to enter dynticks-idle state even if
-	  they have RCU callbacks queued, and prevents RCU from waking
-	  these CPUs up more than roughly once every four jiffies (by
-	  default, you can adjust this using the rcutree.rcu_idle_gp_delay
-	  parameter), thus improving energy efficiency.  On the other
-	  hand, this option increases the duration of RCU grace periods,
-	  for example, slowing down synchronize_rcu().
-
-	  Say Y if energy efficiency is critically important, and you
-	  	don't care about increased grace-period durations.
-
-	  Say N if you are unsure.
-
-config TREE_RCU_TRACE
-	def_bool RCU_TRACE && ( TREE_RCU || PREEMPT_RCU )
-	select DEBUG_FS
-	help
-	  This option provides tracing for the TREE_RCU and
-	  PREEMPT_RCU implementations, permitting Makefile to
-	  trivially select kernel/rcutree_trace.c.
-
-config RCU_BOOST
-	bool "Enable RCU priority boosting"
-	depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
-	default n
-	help
-	  This option boosts the priority of preempted RCU readers that
-	  block the current preemptible RCU grace period for too long.
-	  This option also prevents heavy loads from blocking RCU
-	  callback invocation for all flavors of RCU.
-
-	  Say Y here if you are working with real-time apps or heavy loads
-	  Say N here if you are unsure.
-
-config RCU_KTHREAD_PRIO
-	int "Real-time priority to use for RCU worker threads"
-	range 1 99 if RCU_BOOST
-	range 0 99 if !RCU_BOOST
-	default 1 if RCU_BOOST
-	default 0 if !RCU_BOOST
-	depends on RCU_EXPERT
-	help
-	  This option specifies the SCHED_FIFO priority value that will be
-	  assigned to the rcuc/n and rcub/n threads and is also the value
-	  used for RCU_BOOST (if enabled). If you are working with a
-	  real-time application that has one or more CPU-bound threads
-	  running at a real-time priority level, you should set
-	  RCU_KTHREAD_PRIO to a priority higher than the highest-priority
-	  real-time CPU-bound application thread.  The default RCU_KTHREAD_PRIO
-	  value of 1 is appropriate in the common case, which is real-time
-	  applications that do not have any CPU-bound threads.
-
-	  Some real-time applications might not have a single real-time
-	  thread that saturates a given CPU, but instead might have
-	  multiple real-time threads that, taken together, fully utilize
-	  that CPU.  In this case, you should set RCU_KTHREAD_PRIO to
-	  a priority higher than the lowest-priority thread that is
-	  conspiring to prevent the CPU from running any non-real-time
-	  tasks.  For example, if one thread at priority 10 and another
-	  thread at priority 5 are between themselves fully consuming
-	  the CPU time on a given CPU, then RCU_KTHREAD_PRIO should be
-	  set to priority 6 or higher.
-
-	  Specify the real-time priority, or take the default if unsure.
-
-config RCU_BOOST_DELAY
-	int "Milliseconds to delay boosting after RCU grace-period start"
-	range 0 3000
-	depends on RCU_BOOST
-	default 500
-	help
-	  This option specifies the time to wait after the beginning of
-	  a given grace period before priority-boosting preempted RCU
-	  readers blocking that grace period.  Note that any RCU reader
-	  blocking an expedited RCU grace period is boosted immediately.
-
-	  Accept the default if unsure.
-
-config RCU_NOCB_CPU
-	bool "Offload RCU callback processing from boot-selected CPUs"
-	depends on TREE_RCU || PREEMPT_RCU
-	depends on RCU_EXPERT || NO_HZ_FULL
-	default n
-	help
-	  Use this option to reduce OS jitter for aggressive HPC or
-	  real-time workloads.	It can also be used to offload RCU
-	  callback invocation to energy-efficient CPUs in battery-powered
-	  asymmetric multiprocessors.
-
-	  This option offloads callback invocation from the set of
-	  CPUs specified at boot time by the rcu_nocbs parameter.
-	  For each such CPU, a kthread ("rcuox/N") will be created to
-	  invoke callbacks, where the "N" is the CPU being offloaded,
-	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
-	  "s" for RCU-sched.  Nothing prevents this kthread from running
-	  on the specified CPUs, but (1) the kthreads may be preempted
-	  between each callback, and (2) affinity or cgroups can be used
-	  to force the kthreads to run on whatever set of CPUs is desired.
-
-	  Say Y here if you want to help to debug reduced OS jitter.
-	  Say N here if you are unsure.
-
-choice
-	prompt "Build-forced no-CBs CPUs"
-	default RCU_NOCB_CPU_NONE
-	depends on RCU_NOCB_CPU
-	help
-	  This option allows no-CBs CPUs (whose RCU callbacks are invoked
-	  from kthreads rather than from softirq context) to be specified
-	  at build time.  Additional no-CBs CPUs may be specified by
-	  the rcu_nocbs= boot parameter.
-
-config RCU_NOCB_CPU_NONE
-	bool "No build_forced no-CBs CPUs"
-	help
-	  This option does not force any of the CPUs to be no-CBs CPUs.
-	  Only CPUs designated by the rcu_nocbs= boot parameter will be
-	  no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU
-	  kthreads whose names begin with "rcuo".  All other CPUs will
-	  invoke their own RCU callbacks in softirq context.
-
-	  Select this option if you want to choose no-CBs CPUs at
-	  boot time, for example, to allow testing of different no-CBs
-	  configurations without having to rebuild the kernel each time.
-
-config RCU_NOCB_CPU_ZERO
-	bool "CPU 0 is a build_forced no-CBs CPU"
-	help
-	  This option forces CPU 0 to be a no-CBs CPU, so that its RCU
-	  callbacks are invoked by a per-CPU kthread whose name begins
-	  with "rcuo".	Additional CPUs may be designated as no-CBs
-	  CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs.
-	  All other CPUs will invoke their own RCU callbacks in softirq
-	  context.
-
-	  Select this if CPU 0 needs to be a no-CBs CPU for real-time
-	  or energy-efficiency reasons, but the real reason it exists
-	  is to ensure that randconfig testing covers mixed systems.
-
-config RCU_NOCB_CPU_ALL
-	bool "All CPUs are build_forced no-CBs CPUs"
-	help
-	  This option forces all CPUs to be no-CBs CPUs.  The rcu_nocbs=
-	  boot parameter will be ignored.  All CPUs' RCU callbacks will
-	  be executed in the context of per-CPU rcuo kthreads created for
-	  this purpose.  Assuming that the kthreads whose names start with
-	  "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter
-	  on the remaining CPUs, but might decrease memory locality during
-	  RCU-callback invocation, thus potentially degrading throughput.
-
-	  Select this if all CPUs need to be no-CBs CPUs for real-time
-	  or energy-efficiency reasons.
-
-endchoice
-
-endmenu # "RCU Subsystem"
+source "kernel/rcu/Kconfig"
 
 config BUILD_BIN2C
 	bool
@@ -1156,6 +809,7 @@
 
 config CPUSETS
 	bool "Cpuset controller"
+	depends on SMP
 	help
 	  This option will let you create and manage CPUSETs which
 	  allow dynamically partitioning a system into sets of CPUs and

diff --git a/init/main.c b/init/main.c
index f866510..df58a41 100644
--- a/init/main.c
+++ b/init/main.c

@@ -389,6 +389,7 @@ static __initdata DECLARE_COMPLETION(kthreadd_done);
 
 static noinline void __ref rest_init(void)
 {
+	struct task_struct *tsk;
 	int pid;
 
 	rcu_scheduler_starting();
@@ -397,12 +398,32 @@ static noinline void __ref rest_init(void)
 	 * the init task will end up wanting to create kthreads, which, if
 	 * we schedule it before we create kthreadd, will OOPS.
 	 */
-	kernel_thread(kernel_init, NULL, CLONE_FS);
+	pid = kernel_thread(kernel_init, NULL, CLONE_FS);
+	/*
+	 * Pin init on the boot CPU. Task migration is not properly working
+	 * until sched_init_smp() has been run. It will set the allowed
+	 * CPUs for init to the non isolated CPUs.
+	 */
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
+	rcu_read_unlock();
+
 	numa_default_policy();
 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
 	rcu_read_lock();
 	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
 	rcu_read_unlock();
+
+	/*
+	 * Enable might_sleep() and smp_processor_id() checks.
+	 * They cannot be enabled earlier because with CONFIG_PRREMPT=y
+	 * kernel_thread() would trigger might_sleep() splats. With
+	 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
+	 * already, but it's stuck on the kthreadd_done completion.
+	 */
+	system_state = SYSTEM_SCHEDULING;
+
 	complete(&kthreadd_done);
 
 	/*
@@ -1015,10 +1036,6 @@ static noinline void __init kernel_init_freeable(void)
 	 * init can allocate pages on any node
 	 */
 	set_mems_allowed(node_states[N_MEMORY]);
-	/*
-	 * init can run on any cpu.
-	 */
-	set_cpus_allowed_ptr(current, cpu_all_mask);
 
 	cad_pid = task_pid(current);
 

diff --git a/kernel/async.c b/kernel/async.c
index d2edd6e..2cbd3dd 100644
--- a/kernel/async.c
+++ b/kernel/async.c

@@ -114,14 +114,14 @@ static void async_run_entry_fn(struct work_struct *work)
 	ktime_t uninitialized_var(calltime), delta, rettime;
 
 	/* 1) run (and print duration) */
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		pr_debug("calling  %lli_%pF @ %i\n",
 			(long long)entry->cookie,
 			entry->func, task_pid_nr(current));
 		calltime = ktime_get();
 	}
 	entry->func(entry->data, entry->cookie);
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		rettime = ktime_get();
 		delta = ktime_sub(rettime, calltime);
 		pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n",
@@ -284,14 +284,14 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain
 {
 	ktime_t uninitialized_var(starttime), delta, endtime;
 
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		pr_debug("async_waiting @ %i\n", task_pid_nr(current));
 		starttime = ktime_get();
 	}
 
 	wait_event(async_done, lowest_in_progress(domain) >= cookie);
 
-	if (initcall_debug && system_state == SYSTEM_BOOTING) {
+	if (initcall_debug && system_state < SYSTEM_RUNNING) {
 		endtime = ktime_get();
 		delta = ktime_sub(endtime, starttime);
 

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 5e00b23..172dc8e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c

@@ -86,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.key_size = attr->key_size;
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
+	array->map.map_flags = attr->map_flags;
 	array->elem_size = elem_size;
 
 	if (!percpu)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 39cfafd..b09185f 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c

@@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.key_size = attr->key_size;
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
+	trie->map.map_flags = attr->map_flags;
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4dfd6f2..31147d7 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c

@@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.key_size = attr->key_size;
 	smap->map.value_size = value_size;
 	smap->map.max_entries = attr->max_entries;
+	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1eddb71..a8a7256 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c

@@ -463,19 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+
+	memset(&regs[regno], 0, sizeof(regs[regno]));
+	regs[regno].type = NOT_INIT;
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		regs[i].type = NOT_INIT;
-		regs[i].imm = 0;
-		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
-		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
-		regs[i].min_align = 0;
-		regs[i].aux_off = 0;
-		regs[i].aux_off_align = 0;
-	}
+	for (i = 0; i < MAX_BPF_REG; i++)
+		mark_reg_not_init(regs, i);
 
 	/* frame pointer */
 	regs[BPF_REG_FP].type = FRAME_PTR;
@@ -808,11 +811,15 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 		reg_off += reg->aux_off;
 	}
 
-	/* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking
-	 * we force this to 2 which is universally what architectures use
-	 * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+	/* For platforms that do not have a Kconfig enabling
+	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
+	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
+	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
+	 * to this code only in strict mode where we want to emulate
+	 * the NET_IP_ALIGN==2 checking.  Therefore use an
+	 * unconditional IP align value of '2'.
 	 */
-	ip_align = strict ? 2 : NET_IP_ALIGN;
+	ip_align = 2;
 	if ((ip_align + reg_off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			ip_align, reg_off, off, size);
@@ -839,9 +846,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 {
 	bool strict = env->strict_alignment;
 
-	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		strict = true;
-
 	switch (reg->type) {
 	case PTR_TO_PACKET:
 		return check_pkt_ptr_alignment(reg, off, size, strict);
@@ -985,6 +989,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (err)
 		return err;
 
+	if (is_pointer_value(env, insn->src_reg)) {
+		verbose("R%d leaks addr into mem\n", insn->src_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1345,7 +1354,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs = state->regs;
-	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1412,11 +1420,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* update return register */
 	if (fn->ret_type == RET_INTEGER) {
@@ -2444,7 +2449,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -2477,11 +2481,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++) {
-		reg = regs + caller_saved[i];
-		reg->type = NOT_INIT;
-		reg->imm = 0;
-	}
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(regs, caller_saved[i]);
 
 	/* mark destination R0 register as readable, since it contains
 	 * the value fetched from the packet
@@ -2692,7 +2693,8 @@ static int check_cfg(struct bpf_verifier_env *env)
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *old,
 				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
@@ -2735,7 +2737,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
 	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
 	 * so they will be good with r=30 and we can prune the search.
 	 */
-	if (old->off <= cur->off &&
+	if (!env->strict_alignment && old->off <= cur->off &&
 	    old->off >= old->range && cur->off >= cur->range)
 		return true;
 
@@ -2806,7 +2808,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 			continue;
 
 		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(rold, rcur))
+		    compare_ptrs_to_packet(env, rold, rcur))
 			continue;
 
 		return false;
@@ -3584,10 +3586,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	} else {
 		log_level = 0;
 	}
-	if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT)
+
+	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
-	else
-		env->strict_alignment = false;
 
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
@@ -3693,7 +3695,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
 	mutex_lock(&bpf_verifier_lock);
 
 	log_level = 0;
+
 	env->strict_alignment = false;
+	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		env->strict_alignment = true;
 
 	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct bpf_verifier_state_list *),

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c3c9a0e..8d4e85e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c

@@ -4265,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
+	if (css->flags & CSS_DYING)
+		return;
+
+	css->flags |= CSS_DYING;
+
 	/*
 	 * This must happen before css is disassociated with its cgroup.
 	 * See seq_css() for details.

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6501f4f..ae64341 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c

@@ -176,9 +176,9 @@ typedef enum {
 } cpuset_flagbits_t;
 
 /* convenient tests for these bits */
-static inline bool is_cpuset_online(const struct cpuset *cs)
+static inline bool is_cpuset_online(struct cpuset *cs)
 {
-	return test_bit(CS_ONLINE, &cs->flags);
+	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
 }
 
 static inline int is_cpu_exclusive(const struct cpuset *cs)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9ae6fbe..cb51034 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -1658,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev,
 	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
 	mutex_unlock(&cpuhp_state_mutex);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (st->state < target)
 		ret = do_cpu_up(dev->id, target);
 	else
 		ret = do_cpu_down(dev->id, target);
-
+out:
 	unlock_device_hotplug();
 	return ret ? ret : count;
 }

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c..bc63f8d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -925,11 +925,6 @@ static inline int is_cgroup_event(struct perf_event *event)
 	return 0;
 }
 
-static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
-{
-	return 0;
-}
-
 static inline void update_cgrp_time_from_event(struct perf_event *event)
 {
 }
@@ -5729,9 +5724,6 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 	__output_copy(handle, values, n * sizeof(u64));
 }
 
-/*
- * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
- */
 static void perf_output_read_group(struct perf_output_handle *handle,
 			    struct perf_event *event,
 			    u64 enabled, u64 running)
@@ -5776,6 +5768,13 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
 				 PERF_FORMAT_TOTAL_TIME_RUNNING)
 
+/*
+ * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
+ *
+ * The problem is that its both hard and excessively expensive to iterate the
+ * child list, not to mention that its impossible to IPI the children running
+ * on another CPU, from interrupt/NMI context.
+ */
 static void perf_output_read(struct perf_output_handle *handle,
 			     struct perf_event *event)
 {
@@ -7316,6 +7315,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7337,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event,
 	ret = __perf_event_account_interrupt(event, throttle);
 
 	/*
+	 * For security, drop the skid kernel samples if necessary.
+	 */
+	if (!sample_is_allowed(event, regs))
+		return ret;
+
+	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
 	 */
@@ -9172,7 +9192,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 
 static struct pmu *perf_init_event(struct perf_event *event)
 {
-	struct pmu *pmu = NULL;
+	struct pmu *pmu;
 	int idx;
 	int ret;
 
@@ -9441,9 +9461,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	local64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
-	 * we currently do not support PERF_FORMAT_GROUP on inherited events
+	 * We currently do not support PERF_SAMPLE_READ on inherited events.
+	 * See perf_output_read().
 	 */
-	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
 		goto err_ns;
 
 	if (!has_branch_stack(event))
@@ -9456,9 +9477,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	}
 
 	pmu = perf_init_event(event);
-	if (!pmu)
-		goto err_ns;
-	else if (IS_ERR(pmu)) {
+	if (IS_ERR(pmu)) {
 		err = PTR_ERR(pmu);
 		goto err_ns;
 	}
@@ -9471,8 +9490,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
 						   sizeof(unsigned long),
 						   GFP_KERNEL);
-		if (!event->addr_filters_offs)
+		if (!event->addr_filters_offs) {
+			err = -ENOMEM;
 			goto err_per_task;
+		}
 
 		/* force hw sync on the address filters */
 		event->addr_filters_gen = 1;

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480..ee97196 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c

@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	int ret = -ENOMEM, max_order = 0;
 
 	if (!has_aux(event))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
 		/*

diff --git a/kernel/exit.c b/kernel/exit.c
index 516acdb..c632262 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -318,19 +318,6 @@ void rcuwait_wake_up(struct rcuwait *w)
 	rcu_read_unlock();
 }
 
-struct task_struct *try_get_task_struct(struct task_struct **ptask)
-{
-	struct task_struct *task;
-
-	rcu_read_lock();
-	task = task_rcu_dereference(ptask);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	return task;
-}
-
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
@@ -1004,7 +991,7 @@ struct wait_opts {
 	int __user		*wo_stat;
 	struct rusage __user	*wo_rusage;
 
-	wait_queue_t		child_wait;
+	wait_queue_entry_t		child_wait;
 	int			notask_error;
 };
 
@@ -1541,7 +1528,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
-static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
 				int sync, void *key)
 {
 	struct wait_opts *wo = container_of(wait, struct wait_opts,

diff --git a/kernel/extable.c b/kernel/extable.c
index 2676d7f..0fbdd85 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c

@@ -75,7 +75,7 @@ int core_kernel_text(unsigned long addr)
 	    addr < (unsigned long)_etext)
 		return 1;
 
-	if (system_state == SYSTEM_BOOTING &&
+	if (system_state < SYSTEM_RUNNING &&
 	    init_kernel_text(addr))
 		return 1;
 	return 0;

diff --git a/kernel/fork.c b/kernel/fork.c
index aa1076c..e53770d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -1577,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process(
 	if (!p)
 		goto fork_out;
 
+	/*
+	 * This _must_ happen before we call free_task(), i.e. before we jump
+	 * to any of the bad_fork_* labels. This is to avoid freeing
+	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
+	 * kernel threads (PF_KTHREAD).
+	 */
+	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+	/*
+	 * Clear TID on mm_release()?
+	 */
+	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
+
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -1743,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process(
 		}
 	}
 
-	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
-	/*
-	 * Clear TID on mm_release()?
-	 */
-	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 #ifdef CONFIG_BLOCK
 	p->plug = NULL;
 #endif

diff --git a/kernel/futex.c b/kernel/futex.c
index 357348a..d6cf71d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c

@@ -225,7 +225,7 @@ struct futex_pi_state {
  * @requeue_pi_key:	the requeue_pi target futex key
  * @bitset:		bitset for the optional bitmasked wakeup
  *
- * We use this hashed waitqueue, instead of a normal wait_queue_t, so
+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
  * we can wake only the relevant ones (hashed queues may be shared).
  *
  * A futex_q has a woken state, just like tasks have TASK_RUNNING.

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 070be98..425170d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c

@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			ret = __irq_set_trigger(desc,
 						new->flags & IRQF_TRIGGER_MASK);
 
-			if (ret)
+			if (ret) {
+				irq_release_resources(desc);
 				goto out_mask;
+			}
 		}
 
 		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index ae1a3ba..154ffb4 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c

@@ -38,6 +38,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/compiler.h>
 #include <linux/hugetlb.h>
+#include <linux/frame.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -874,7 +875,7 @@ int kexec_load_disabled;
  * only when panic_cpu holds the current CPU number; this is the only CPU
  * which processes crash_kexec routines.
  */
-void __crash_kexec(struct pt_regs *regs)
+void __noclone __crash_kexec(struct pt_regs *regs)
 {
 	/* Take the kexec_mutex here to prevent sys_kexec_load
 	 * running on one cpu from replacing the crash kernel
@@ -896,6 +897,7 @@ void __crash_kexec(struct pt_regs *regs)
 		mutex_unlock(&kexec_mutex);
 	}
 }
+STACK_FRAME_NON_STANDARD(__crash_kexec);
 
 void crash_kexec(struct pt_regs *regs)
 {

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2d2d3a5..adfe3b4 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c

@@ -122,7 +122,7 @@ static void *alloc_insn_page(void)
 	return module_alloc(PAGE_SIZE);
 }
 
-static void free_insn_page(void *page)
+void __weak free_insn_page(void *page)
 {
 	module_memfree(page);
 }

diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
index 0450225..ec45651 100644
--- a/kernel/livepatch/Kconfig
+++ b/kernel/livepatch/Kconfig

@@ -10,6 +10,7 @@
 	depends on SYSFS
 	depends on KALLSYMS_ALL
 	depends on HAVE_LIVEPATCH
+	depends on !TRIM_UNUSED_KSYMS
 	help
 	  Say Y here if you want to support kernel live patching.
 	  This option has no runtime impact until a kernel "patch"

diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index f826903..52c4e90 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c

@@ -59,7 +59,11 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	ops = container_of(fops, struct klp_ops, fops);
 
-	rcu_read_lock();
+	/*
+	 * A variant of synchronize_sched() is used to allow patching functions
+	 * where RCU is not watching, see klp_synchronize_transition().
+	 */
+	preempt_disable_notrace();
 
 	func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
 				      stack_node);
@@ -115,7 +119,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	klp_arch_set_pc(regs, (unsigned long)func->new_func);
 unlock:
-	rcu_read_unlock();
+	preempt_enable_notrace();
 }
 
 /*

diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index adc0cc6..b004a1f 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c

@@ -49,6 +49,28 @@ static void klp_transition_work_fn(struct work_struct *work)
 static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
 
 /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ */
+static void klp_sync(struct work_struct *work)
+{
+}
+
+/*
+ * We allow to patch also functions where RCU is not watching,
+ * e.g. before user_exit(). We can not rely on the RCU infrastructure
+ * to do the synchronization. Instead hard force the sched synchronization.
+ *
+ * This approach allows to use RCU functions for manipulating func_stack
+ * safely.
+ */
+static void klp_synchronize_transition(void)
+{
+	schedule_on_each_cpu(klp_sync);
+}
+
+/*
  * The transition to the target patch state is complete.  Clean up the data
  * structures.
  */
@@ -73,7 +95,7 @@ static void klp_complete_transition(void)
 		 * func->transition gets cleared, the handler may choose a
 		 * removed function.
 		 */
-		synchronize_rcu();
+		klp_synchronize_transition();
 	}
 
 	if (klp_transition_patch->immediate)
@@ -92,7 +114,7 @@ static void klp_complete_transition(void)
 
 	/* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */
 	if (klp_target_state == KLP_PATCHED)
-		synchronize_rcu();
+		klp_synchronize_transition();
 
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, task) {
@@ -136,7 +158,11 @@ void klp_cancel_transition(void)
  */
 void klp_update_patch_state(struct task_struct *task)
 {
-	rcu_read_lock();
+	/*
+	 * A variant of synchronize_sched() is used to allow patching functions
+	 * where RCU is not watching, see klp_synchronize_transition().
+	 */
+	preempt_disable_notrace();
 
 	/*
 	 * This test_and_clear_tsk_thread_flag() call also serves as a read
@@ -153,7 +179,7 @@ void klp_update_patch_state(struct task_struct *task)
 	if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
 		task->patch_state = READ_ONCE(klp_target_state);
 
-	rcu_read_unlock();
+	preempt_enable_notrace();
 }
 
 /*
@@ -539,7 +565,7 @@ void klp_reverse_transition(void)
 		clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
 
 	/* Let any remaining calls to klp_update_patch_state() complete */
-	synchronize_rcu();
+	klp_synchronize_transition();
 
 	klp_start_transition();
 }

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c0e31bf..7d2499b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c

@@ -1157,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("======================================================\n");
 	pr_warn("WARNING: possible circular locking dependency detected\n");
 	print_kernel_ident();
 	pr_warn("------------------------------------------------------\n");
-	printk("%s/%d is trying to acquire lock:\n",
+	pr_warn("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(check_src);
-	printk("\nbut task is already holding lock:\n");
+	pr_warn("\nbut task is already holding lock:\n");
 	print_lock(check_tgt);
-	printk("\nwhich lock already depends on the new lock.\n\n");
-	printk("\nthe existing dependency chain (in reverse order) is:\n");
+	pr_warn("\nwhich lock already depends on the new lock.\n\n");
+	pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
 
 	print_circular_bug_entry(entry, depth);
 
@@ -1495,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=====================================================\n");
 	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
 		irqclass, irqclass);
 	print_kernel_ident();
 	pr_warn("-----------------------------------------------------\n");
-	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+	pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
 		curr->comm, task_pid_nr(curr),
 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
 		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
@@ -1509,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr,
 		curr->softirqs_enabled);
 	print_lock(next);
 
-	printk("\nand this task is already holding:\n");
+	pr_warn("\nand this task is already holding:\n");
 	print_lock(prev);
-	printk("which would create a new lock dependency:\n");
+	pr_warn("which would create a new lock dependency:\n");
 	print_lock_name(hlock_class(prev));
-	printk(KERN_CONT " ->");
+	pr_cont(" ->");
 	print_lock_name(hlock_class(next));
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 
-	printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+	pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
 		irqclass);
 	print_lock_name(backwards_entry->class);
-	printk("\n... which became %s-irq-safe at:\n", irqclass);
+	pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
 
 	print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
 
-	printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+	pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
 	print_lock_name(forwards_entry->class);
-	printk("\n... which became %s-irq-unsafe at:\n", irqclass);
-	printk("...");
+	pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
+	pr_warn("...");
 
 	print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
 
-	printk("\nother info that might help us debug this:\n\n");
+	pr_warn("\nother info that might help us debug this:\n\n");
 	print_irq_lock_scenario(backwards_entry, forwards_entry,
 				hlock_class(prev), hlock_class(next));
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
+	pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
 	if (!save_trace(&prev_root->trace))
 		return 0;
 	print_shortest_lock_dependencies(backwards_entry, prev_root);
 
-	printk("\nthe dependencies between the lock to be acquired");
-	printk(" and %s-irq-unsafe lock:\n", irqclass);
+	pr_warn("\nthe dependencies between the lock to be acquired");
+	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
 	if (!save_trace(&next_root->trace))
 		return 0;
 	print_shortest_lock_dependencies(forwards_entry, next_root);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -1724,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("============================================\n");
 	pr_warn("WARNING: possible recursive locking detected\n");
 	print_kernel_ident();
 	pr_warn("--------------------------------------------\n");
-	printk("%s/%d is trying to acquire lock:\n",
+	pr_warn("%s/%d is trying to acquire lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(next);
-	printk("\nbut task is already holding lock:\n");
+	pr_warn("\nbut task is already holding lock:\n");
 	print_lock(prev);
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	print_deadlock_scenario(next, prev);
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -2074,21 +2074,21 @@ static void print_collision(struct task_struct *curr,
 			struct held_lock *hlock_next,
 			struct lock_chain *chain)
 {
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("============================\n");
 	pr_warn("WARNING: chain_key collision\n");
 	print_kernel_ident();
 	pr_warn("----------------------------\n");
-	printk("%s/%d: ", current->comm, task_pid_nr(current));
-	printk("Hash chain already cached but the contents don't match!\n");
+	pr_warn("%s/%d: ", current->comm, task_pid_nr(current));
+	pr_warn("Hash chain already cached but the contents don't match!\n");
 
-	printk("Held locks:");
+	pr_warn("Held locks:");
 	print_chain_keys_held_locks(curr, hlock_next);
 
-	printk("Locks in cached chain:");
+	pr_warn("Locks in cached chain:");
 	print_chain_keys_chain(chain);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 #endif
@@ -2373,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("================================\n");
 	pr_warn("WARNING: inconsistent lock state\n");
 	print_kernel_ident();
 	pr_warn("--------------------------------\n");
 
-	printk("inconsistent {%s} -> {%s} usage.\n",
+	pr_warn("inconsistent {%s} -> {%s} usage.\n",
 		usage_str[prev_bit], usage_str[new_bit]);
 
-	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+	pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
 		curr->comm, task_pid_nr(curr),
 		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
 		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
@@ -2390,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		trace_softirqs_enabled(curr));
 	print_lock(this);
 
-	printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+	pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
 	print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
 
 	print_irqtrace_events(curr);
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	print_usage_bug_scenario(this);
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -2438,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("========================================================\n");
 	pr_warn("WARNING: possible irq lock inversion dependency detected\n");
 	print_kernel_ident();
 	pr_warn("--------------------------------------------------------\n");
-	printk("%s/%d just changed the state of lock:\n",
+	pr_warn("%s/%d just changed the state of lock:\n",
 		curr->comm, task_pid_nr(curr));
 	print_lock(this);
 	if (forwards)
-		printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
+		pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
 	else
-		printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
+		pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
 	print_lock_name(other->class);
-	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+	pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 
 	/* Find a middle lock (if one exists) */
 	depth = get_lock_depth(other);
 	do {
 		if (depth == 0 && (entry != root)) {
-			printk("lockdep:%s bad path found in chain graph\n", __func__);
+			pr_warn("lockdep:%s bad path found in chain graph\n", __func__);
 			break;
 		}
 		middle = entry;
@@ -2475,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr,
 
 	lockdep_print_held_locks(curr);
 
-	printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
+	pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
 	if (!save_trace(&root->trace))
 		return 0;
 	print_shortest_lock_dependencies(other, root);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3189,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("==================================\n");
 	pr_warn("WARNING: Nested lock was not taken\n");
 	print_kernel_ident();
 	pr_warn("----------------------------------\n");
 
-	printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+	pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
 	print_lock(hlock);
 
-	printk("\nbut this task is not holding:\n");
-	printk("%s\n", hlock->nest_lock->name);
+	pr_warn("\nbut this task is not holding:\n");
+	pr_warn("%s\n", hlock->nest_lock->name);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3402,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=====================================\n");
 	pr_warn("WARNING: bad unlock balance detected!\n");
 	print_kernel_ident();
 	pr_warn("-------------------------------------\n");
-	printk("%s/%d is trying to release lock (",
+	pr_warn("%s/%d is trying to release lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
-	printk(KERN_CONT ") at:\n");
+	pr_cont(") at:\n");
 	print_ip_sym(ip);
-	printk("but there are no more locks to release!\n");
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("but there are no more locks to release!\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -3974,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 	if (debug_locks_silent)
 		return 0;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=================================\n");
 	pr_warn("WARNING: bad contention detected!\n");
 	print_kernel_ident();
 	pr_warn("---------------------------------\n");
-	printk("%s/%d is trying to contend lock (",
+	pr_warn("%s/%d is trying to contend lock (",
 		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
-	printk(KERN_CONT ") at:\n");
+	pr_cont(") at:\n");
 	print_ip_sym(ip);
-	printk("but there are no locks held!\n");
-	printk("\nother info that might help us debug this:\n");
+	pr_warn("but there are no locks held!\n");
+	pr_warn("\nother info that might help us debug this:\n");
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 
 	return 0;
@@ -4318,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
 	if (debug_locks_silent)
 		return;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=========================\n");
 	pr_warn("WARNING: held lock freed!\n");
 	print_kernel_ident();
 	pr_warn("-------------------------\n");
-	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+	pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
 		curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
 	print_lock(hlock);
 	lockdep_print_held_locks(curr);
 
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 
@@ -4376,14 +4376,14 @@ static void print_held_locks_bug(void)
 	if (debug_locks_silent)
 		return;
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("====================================\n");
 	pr_warn("WARNING: %s/%d still has locks held!\n",
 	       current->comm, task_pid_nr(current));
 	print_kernel_ident();
 	pr_warn("------------------------------------\n");
 	lockdep_print_held_locks(current);
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 
@@ -4402,10 +4402,10 @@ void debug_show_all_locks(void)
 	int unlock = 1;
 
 	if (unlikely(!debug_locks)) {
-		printk("INFO: lockdep is turned off.\n");
+		pr_warn("INFO: lockdep is turned off.\n");
 		return;
 	}
-	printk("\nShowing all locks held in the system:\n");
+	pr_warn("\nShowing all locks held in the system:\n");
 
 	/*
 	 * Here we try to get the tasklist_lock as hard as possible,
@@ -4416,18 +4416,18 @@ void debug_show_all_locks(void)
 retry:
 	if (!read_trylock(&tasklist_lock)) {
 		if (count == 10)
-			printk("hm, tasklist_lock locked, retrying... ");
+			pr_warn("hm, tasklist_lock locked, retrying... ");
 		if (count) {
 			count--;
-			printk(" #%d", 10-count);
+			pr_cont(" #%d", 10-count);
 			mdelay(200);
 			goto retry;
 		}
-		printk(" ignoring it.\n");
+		pr_cont(" ignoring it.\n");
 		unlock = 0;
 	} else {
 		if (count != 10)
-			printk(KERN_CONT " locked it.\n");
+			pr_cont(" locked it.\n");
 	}
 
 	do_each_thread(g, p) {
@@ -4445,7 +4445,7 @@ void debug_show_all_locks(void)
 				unlock = 1;
 	} while_each_thread(g, p);
 
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=============================================\n\n");
 
 	if (unlock)
@@ -4475,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
 	if (unlikely(curr->lockdep_depth)) {
 		if (!debug_locks_off())
 			return;
-		printk("\n");
+		pr_warn("\n");
 		pr_warn("================================================\n");
 		pr_warn("WARNING: lock held when returning to user space!\n");
 		print_kernel_ident();
 		pr_warn("------------------------------------------------\n");
-		printk("%s/%d is leaving the kernel with locks still held!\n",
+		pr_warn("%s/%d is leaving the kernel with locks still held!\n",
 				curr->comm, curr->pid);
 		lockdep_print_held_locks(curr);
 	}
@@ -4490,19 +4490,15 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 {
 	struct task_struct *curr = current;
 
-#ifndef CONFIG_PROVE_RCU_REPEATEDLY
-	if (!debug_locks_off())
-		return;
-#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
 	/* Note: the following can be executed concurrently, so be careful. */
-	printk("\n");
+	pr_warn("\n");
 	pr_warn("=============================\n");
 	pr_warn("WARNING: suspicious RCU usage\n");
 	print_kernel_ident();
 	pr_warn("-----------------------------\n");
-	printk("%s:%d %s!\n", file, line, s);
-	printk("\nother info that might help us debug this:\n\n");
-	printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+	pr_warn("%s:%d %s!\n", file, line, s);
+	pr_warn("\nother info that might help us debug this:\n\n");
+	pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
 	       !rcu_lockdep_current_cpu_online()
 			? "RCU used illegally from offline CPU!\n"
 			: !rcu_is_watching()
@@ -4529,10 +4525,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	 * rcu_read_lock_bh() and so on from extended quiescent states.
 	 */
 	if (!rcu_is_watching())
-		printk("RCU used illegally from extended quiescent state!\n");
+		pr_warn("RCU used illegally from extended quiescent state!\n");
 
 	lockdep_print_held_locks(curr);
-	printk("\nstack backtrace:\n");
+	pr_warn("\nstack backtrace:\n");
 	dump_stack();
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);

diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 58e366a..ac35e64 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c

@@ -166,12 +166,16 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 	memset(waiter, 0x22, sizeof(*waiter));
 }
 
-void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
+void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key)
 {
 	/*
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lock->name = name;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
 }
 

diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index b585af9..5078c6d 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h

@@ -11,7 +11,7 @@
 
 extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
-extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void debug_rt_mutex_lock(struct rt_mutex *lock);
 extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b955094..7806989 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c

@@ -1481,6 +1481,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
 	might_sleep();
 
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
@@ -1496,9 +1497,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
  */
 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
@@ -1526,11 +1534,18 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
 int
 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
 {
+	int ret;
+
 	might_sleep();
 
-	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
+	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
 				       RT_MUTEX_MIN_CHAINWALK,
 				       rt_mutex_slowlock);
+	if (ret)
+		mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 
@@ -1547,10 +1562,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  */
 int __sched rt_mutex_trylock(struct rt_mutex *lock)
 {
+	int ret;
+
 	if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
 		return 0;
 
-	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+	if (ret)
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
 
@@ -1561,6 +1582,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
  */
 void __sched rt_mutex_unlock(struct rt_mutex *lock)
 {
+	mutex_release(&lock->dep_map, 1, _RET_IP_);
 	rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
@@ -1620,7 +1642,6 @@ void rt_mutex_destroy(struct rt_mutex *lock)
 	lock->magic = NULL;
 #endif
 }
-
 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
 
 /**
@@ -1632,14 +1653,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  *
  * Initializing of a locked rt lock is not allowed
  */
-void __rt_mutex_init(struct rt_mutex *lock, const char *name)
+void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+		     struct lock_class_key *key)
 {
 	lock->owner = NULL;
 	raw_spin_lock_init(&lock->wait_lock);
 	lock->waiters = RB_ROOT;
 	lock->waiters_leftmost = NULL;
 
-	debug_rt_mutex_init(lock, name);
+	if (name && key)
+		debug_rt_mutex_init(lock, name, key);
 }
 EXPORT_SYMBOL_GPL(__rt_mutex_init);
 
@@ -1660,7 +1683,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				struct task_struct *proxy_owner)
 {
-	__rt_mutex_init(lock, NULL);
+	__rt_mutex_init(lock, NULL, NULL);
 	debug_rt_mutex_proxy_lock(lock, proxy_owner);
 	rt_mutex_set_owner(lock, proxy_owner);
 }
@@ -1785,12 +1808,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
 	int ret;
 
 	raw_spin_lock_irq(&lock->wait_lock);
-
-	set_current_state(TASK_INTERRUPTIBLE);
-
 	/* sleep on the mutex */
+	set_current_state(TASK_INTERRUPTIBLE);
 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
-
+	/*
+	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+	 * have to fix that up.
+	 */
+	fixup_rt_mutex_waiters(lock);
 	raw_spin_unlock_irq(&lock->wait_lock);
 
 	return ret;
@@ -1822,15 +1847,25 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
 
 	raw_spin_lock_irq(&lock->wait_lock);
 	/*
+	 * Do an unconditional try-lock, this deals with the lock stealing
+	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+	 * sets a NULL owner.
+	 *
+	 * We're not interested in the return value, because the subsequent
+	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+	 * we will own the lock and it will have removed the waiter. If we
+	 * failed the trylock, we're still not owner and we need to remove
+	 * ourselves.
+	 */
+	try_to_take_rt_mutex(lock, current, waiter);
+	/*
 	 * Unless we're the owner; we're still enqueued on the wait_list.
 	 * So check if we became owner, if not, take us off the wait_list.
 	 */
 	if (rt_mutex_owner(lock) != current) {
 		remove_waiter(lock, waiter);
-		fixup_rt_mutex_waiters(lock);
 		cleanup = true;
 	}
-
 	/*
 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
 	 * have to fix that up.

diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index 6607802..5c253ca 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h

@@ -17,7 +17,7 @@
 #define debug_rt_mutex_proxy_lock(l,p)			do { } while (0)
 #define debug_rt_mutex_proxy_unlock(l)			do { } while (0)
 #define debug_rt_mutex_unlock(l)			do { } while (0)
-#define debug_rt_mutex_init(m, n)			do { } while (0)
+#define debug_rt_mutex_init(m, n, k)			do { } while (0)
 #define debug_rt_mutex_deadlock(d, a ,l)		do { } while (0)
 #define debug_rt_mutex_print_deadlock(w)		do { } while (0)
 #define debug_rt_mutex_reset_waiter(w)			do { } while (0)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 78672d3..c7209f0 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c

@@ -132,7 +132,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
-	pm_wakeup_clear(true);
+	pm_wakeup_clear();
 	pr_info("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3b1e0f3..fa46606 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c

@@ -1425,7 +1425,7 @@ static unsigned int nr_meta_pages;
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
  */
-unsigned int alloc_normal, alloc_highmem;
+static unsigned int alloc_normal, alloc_highmem;
 /*
  * Memory bitmap used for marking saveable pages (during hibernation) or
  * hibernation image pages (during restore)

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c0248c7..15e6bae 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c

@@ -72,8 +72,6 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true);
-
 	spin_lock_irq(&suspend_freeze_lock);
 	if (pm_wakeup_pending())
 		goto out;
@@ -100,27 +98,6 @@ static void freeze_enter(void)
  out:
 	suspend_freeze_state = FREEZE_STATE_NONE;
 	spin_unlock_irq(&suspend_freeze_lock);
-
-	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false);
-}
-
-static void s2idle_loop(void)
-{
-	do {
-		freeze_enter();
-
-		if (freeze_ops && freeze_ops->wake)
-			freeze_ops->wake();
-
-		dpm_resume_noirq(PMSG_RESUME);
-		if (freeze_ops && freeze_ops->sync)
-			freeze_ops->sync();
-
-		if (pm_wakeup_pending())
-			break;
-
-		pm_wakeup_clear(false);
-	} while (!dpm_suspend_noirq(PMSG_SUSPEND));
 }
 
 void freeze_wake(void)
@@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
-		s2idle_loop();
-		goto Platform_early_resume;
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
+		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
+		goto Platform_wake;
 	}
 
 	error = disable_nonboot_cpus();

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33..57d2257 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c

@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
 struct hib_bio_batch {
 	atomic_t		count;
 	wait_queue_head_t	wait;
-	int			error;
+	blk_status_t		error;
 };
 
 static void hib_init_batch(struct hib_bio_batch *hb)
 {
 	atomic_set(&hb->count, 0);
 	init_waitqueue_head(&hb->wait);
-	hb->error = 0;
+	hb->error = BLK_STS_OK;
 }
 
 static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
 	struct hib_bio_batch *hb = bio->bi_private;
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
 		flush_icache_range((unsigned long)page_address(page),
 				   (unsigned long)page_address(page) + PAGE_SIZE);
 
-	if (bio->bi_error && !hb->error)
-		hb->error = bio->bi_error;
+	if (bio->bi_status && !hb->error)
+		hb->error = bio->bi_status;
 	if (atomic_dec_and_test(&hb->count))
 		wake_up(&hb->wait);
 
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
 	return error;
 }
 
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
 {
 	wait_event(hb->wait, atomic_read(&hb->count) == 0);
-	return hb->error;
+	return blk_status_to_errno(hb->error);
 }
 
 /*

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a1aecf4..bd53ea5 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c

@@ -269,7 +269,6 @@ static struct console *exclusive_console;
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
-static int console_cmdline_cnt;
 
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -1176,7 +1175,7 @@ static void boot_delay_msec(int level)
 	unsigned long long k;
 	unsigned long timeout;
 
-	if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
+	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
 		|| suppress_message_printing(level)) {
 		return;
 	}
@@ -1906,25 +1905,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0, c = console_cmdline; i < console_cmdline_cnt; i++, c++) {
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (strcmp(c->name, name) == 0 && c->index == idx) {
-			if (brl_options)
-				return 0;
-
-			/*
-			 * Maintain an invariant that will help to find if
-			 * the matching console is preferred, see
-			 * register_console():
-			 *
-			 * The last non-braille console is always
-			 * the preferred one.
-			 */
-			if (i != console_cmdline_cnt - 1)
-				swap(console_cmdline[i],
-				     console_cmdline[console_cmdline_cnt - 1]);
-
-			preferred_console = console_cmdline_cnt - 1;
-
+			if (!brl_options)
+				preferred_console = i;
 			return 0;
 		}
 	}
@@ -1937,7 +1923,6 @@ static int __add_preferred_console(char *name, int idx, char *options,
 	braille_set_options(c, brl_options);
 
 	c->index = idx;
-	console_cmdline_cnt++;
 	return 0;
 }
 /*
@@ -2477,23 +2462,12 @@ void register_console(struct console *newcon)
 	}
 
 	/*
-	 * See if this console matches one we selected on the command line.
-	 *
-	 * There may be several entries in the console_cmdline array matching
-	 * with the same console, one with newcon->match(), another by
-	 * name/index:
-	 *
-	 *	pl011,mmio,0x87e024000000,115200 -- added from SPCR
-	 *	ttyAMA0 -- added from command line
-	 *
-	 * Traverse the console_cmdline array in reverse order to be
-	 * sure that if this console is preferred then it will be the first
-	 * matching entry.  We use the invariant that is maintained in
-	 * __add_preferred_console().
+	 *	See if this console matches one we selected on
+	 *	the command line.
 	 */
-	for (i = console_cmdline_cnt - 1; i >= 0; i--) {
-		c = console_cmdline + i;
-
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
 		if (!newcon->match ||
 		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
 			/* default matching */

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 266ddcc..60f356d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c

@@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 }
 
 
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
+		   const struct cred *ptracer_cred)
+{
+	BUG_ON(!list_empty(&child->ptrace_entry));
+	list_add(&child->ptrace_entry, &new_parent->ptraced);
+	child->parent = new_parent;
+	child->ptracer_cred = get_cred(ptracer_cred);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
+static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
-	BUG_ON(!list_empty(&child->ptrace_entry));
-	list_add(&child->ptrace_entry, &new_parent->ptraced);
-	child->parent = new_parent;
 	rcu_read_lock();
-	child->ptracer_cred = get_cred(__task_cred(new_parent));
+	__ptrace_link(child, new_parent, __task_cred(new_parent));
 	rcu_read_unlock();
 }
 
@@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 		flags |= PT_SEIZED;
 	task->ptrace = flags;
 
-	__ptrace_link(task, current);
+	ptrace_link(task, current);
 
 	/* SEIZE doesn't trap tracee on attach */
 	if (!seize)
@@ -459,7 +465,7 @@ static int ptrace_traceme(void)
 		 */
 		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace = PT_PTRACED;
-			__ptrace_link(current, current->real_parent);
+			ptrace_link(current, current->real_parent);
 		}
 	}
 	write_unlock_irq(&tasklist_lock);

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
new file mode 100644
index 0000000..be90c94
--- /dev/null
+++ b/kernel/rcu/Kconfig

@@ -0,0 +1,242 @@
+#
+# RCU-related configuration options
+#
+
+menu "RCU Subsystem"
+
+config TREE_RCU
+	bool
+	default y if !PREEMPT && SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP system with hundreds or
+	  thousands of CPUs.  It also scales down nicely to
+	  smaller systems.
+
+config PREEMPT_RCU
+	bool
+	default y if PREEMPT
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP systems with hundreds or
+	  thousands of CPUs, but for which real-time response
+	  is also required.  It also scales down nicely to
+	  smaller systems.
+
+	  Select this option if you are unsure.
+
+config TINY_RCU
+	bool
+	default y if !PREEMPT && !SMP
+	help
+	  This option selects the RCU implementation that is
+	  designed for UP systems from which real-time response
+	  is not required.  This option greatly reduces the
+	  memory footprint of RCU.
+
+config RCU_EXPERT
+	bool "Make expert-level adjustments to RCU configuration"
+	default n
+	help
+	  This option needs to be enabled if you wish to make
+	  expert-level adjustments to RCU configuration.  By default,
+	  no such adjustments can be made, which has the often-beneficial
+	  side-effect of preventing "make oldconfig" from asking you all
+	  sorts of detailed questions about how you would like numerous
+	  obscure RCU options to be set up.
+
+	  Say Y if you need to make expert-level adjustments to RCU.
+
+	  Say N if you are unsure.
+
+config SRCU
+	bool
+	help
+	  This option selects the sleepable version of RCU. This version
+	  permits arbitrary sleeping or blocking within RCU read-side critical
+	  sections.
+
+config TINY_SRCU
+	bool
+	default y if SRCU && TINY_RCU
+	help
+	  This option selects the single-CPU non-preemptible version of SRCU.
+
+config TREE_SRCU
+	bool
+	default y if SRCU && !TINY_RCU
+	help
+	  This option selects the full-fledged version of SRCU.
+
+config TASKS_RCU
+	bool
+	default n
+	select SRCU
+	help
+	  This option enables a task-based RCU implementation that uses
+	  only voluntary context switch (not preemption!), idle, and
+	  user-mode execution as quiescent states.
+
+config RCU_STALL_COMMON
+	def_bool ( TREE_RCU || PREEMPT_RCU )
+	help
+	  This option enables RCU CPU stall code that is common between
+	  the TINY and TREE variants of RCU.  The purpose is to allow
+	  the tiny variants to disable RCU CPU stall warnings, while
+	  making these warnings mandatory for the tree variants.
+
+config RCU_NEED_SEGCBLIST
+	def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU )
+
+config CONTEXT_TRACKING
+       bool
+
+config CONTEXT_TRACKING_FORCE
+	bool "Force context tracking"
+	depends on CONTEXT_TRACKING
+	default y if !NO_HZ_FULL
+	help
+	  The major pre-requirement for full dynticks to work is to
+	  support the context tracking subsystem. But there are also
+	  other dependencies to provide in order to make the full
+	  dynticks working.
+
+	  This option stands for testing when an arch implements the
+	  context tracking backend but doesn't yet fullfill all the
+	  requirements to make the full dynticks feature working.
+	  Without the full dynticks, there is no way to test the support
+	  for context tracking and the subsystems that rely on it: RCU
+	  userspace extended quiescent state and tickless cputime
+	  accounting. This option copes with the absence of the full
+	  dynticks subsystem by forcing the context tracking on all
+	  CPUs in the system.
+
+	  Say Y only if you're working on the development of an
+	  architecture backend for the context tracking.
+
+	  Say N otherwise, this option brings an overhead that you
+	  don't want in production.
+
+
+config RCU_FANOUT
+	int "Tree-based hierarchical RCU fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+	default 64 if 64BIT
+	default 32 if !64BIT
+	help
+	  This option controls the fanout of hierarchical implementations
+	  of RCU, allowing RCU to work efficiently on machines with
+	  large numbers of CPUs.  This value must be at least the fourth
+	  root of NR_CPUS, which allows NR_CPUS to be insanely large.
+	  The default value of RCU_FANOUT should be used for production
+	  systems, but if you are stress-testing the RCU implementation
+	  itself, small RCU_FANOUT values allow you to test large-system
+	  code paths on small(er) systems.
+
+	  Select a specific number if testing RCU itself.
+	  Take the default if unsure.
+
+config RCU_FANOUT_LEAF
+	int "Tree-based hierarchical RCU leaf-level fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
+	default 16
+	help
+	  This option controls the leaf-level fanout of hierarchical
+	  implementations of RCU, and allows trading off cache misses
+	  against lock contention.  Systems that synchronize their
+	  scheduling-clock interrupts for energy-efficiency reasons will
+	  want the default because the smaller leaf-level fanout keeps
+	  lock contention levels acceptably low.  Very large systems
+	  (hundreds or thousands of CPUs) will instead want to set this
+	  value to the maximum value possible in order to reduce the
+	  number of cache misses incurred during RCU's grace-period
+	  initialization.  These systems tend to run CPU-bound, and thus
+	  are not helped by synchronized interrupts, and thus tend to
+	  skew them, which reduces lock contention enough that large
+	  leaf-level fanouts work well.  That said, setting leaf-level
+	  fanout to a large number will likely cause problematic
+	  lock contention on the leaf-level rcu_node structures unless
+	  you boot with the skew_tick kernel parameter.
+
+	  Select a specific number if testing RCU itself.
+
+	  Select the maximum permissible value for large systems, but
+	  please understand that you may also need to set the skew_tick
+	  kernel boot parameter to avoid contention on the rcu_node
+	  structure's locks.
+
+	  Take the default if unsure.
+
+config RCU_FAST_NO_HZ
+	bool "Accelerate last non-dyntick-idle CPU's grace periods"
+	depends on NO_HZ_COMMON && SMP && RCU_EXPERT
+	default n
+	help
+	  This option permits CPUs to enter dynticks-idle state even if
+	  they have RCU callbacks queued, and prevents RCU from waking
+	  these CPUs up more than roughly once every four jiffies (by
+	  default, you can adjust this using the rcutree.rcu_idle_gp_delay
+	  parameter), thus improving energy efficiency.  On the other
+	  hand, this option increases the duration of RCU grace periods,
+	  for example, slowing down synchronize_rcu().
+
+	  Say Y if energy efficiency is critically important, and you
+	  	don't care about increased grace-period durations.
+
+	  Say N if you are unsure.
+
+config RCU_BOOST
+	bool "Enable RCU priority boosting"
+	depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
+	default n
+	help
+	  This option boosts the priority of preempted RCU readers that
+	  block the current preemptible RCU grace period for too long.
+	  This option also prevents heavy loads from blocking RCU
+	  callback invocation for all flavors of RCU.
+
+	  Say Y here if you are working with real-time apps or heavy loads
+	  Say N here if you are unsure.
+
+config RCU_BOOST_DELAY
+	int "Milliseconds to delay boosting after RCU grace-period start"
+	range 0 3000
+	depends on RCU_BOOST
+	default 500
+	help
+	  This option specifies the time to wait after the beginning of
+	  a given grace period before priority-boosting preempted RCU
+	  readers blocking that grace period.  Note that any RCU reader
+	  blocking an expedited RCU grace period is boosted immediately.
+
+	  Accept the default if unsure.
+
+config RCU_NOCB_CPU
+	bool "Offload RCU callback processing from boot-selected CPUs"
+	depends on TREE_RCU || PREEMPT_RCU
+	depends on RCU_EXPERT || NO_HZ_FULL
+	default n
+	help
+	  Use this option to reduce OS jitter for aggressive HPC or
+	  real-time workloads.	It can also be used to offload RCU
+	  callback invocation to energy-efficient CPUs in battery-powered
+	  asymmetric multiprocessors.
+
+	  This option offloads callback invocation from the set of
+	  CPUs specified at boot time by the rcu_nocbs parameter.
+	  For each such CPU, a kthread ("rcuox/N") will be created to
+	  invoke callbacks, where the "N" is the CPU being offloaded,
+	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
+	  "s" for RCU-sched.  Nothing prevents this kthread from running
+	  on the specified CPUs, but (1) the kthreads may be preempted
+	  between each callback, and (2) affinity or cgroups can be used
+	  to force the kthreads to run on whatever set of CPUs is desired.
+
+	  Say Y here if you want to help to debug reduced OS jitter.
+	  Say N here if you are unsure.
+
+endmenu # "RCU Subsystem"

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
new file mode 100644
index 0000000..0ec7d1d
--- /dev/null
+++ b/kernel/rcu/Kconfig.debug

@@ -0,0 +1,82 @@
+#
+# RCU-related debugging configuration options
+#
+
+menu "RCU Debugging"
+
+config PROVE_RCU
+	def_bool PROVE_LOCKING
+
+config TORTURE_TEST
+	tristate
+	default n
+
+config RCU_PERF_TEST
+	tristate "performance tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs performance
+	  tests on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU performance tests to be built into
+	  the kernel.
+	  Say M if you want the RCU performance tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_TORTURE_TEST
+	tristate "torture tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs torture tests
+	  on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU torture tests to be built into
+	  the kernel.
+	  Say M if you want the RCU torture tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_CPU_STALL_TIMEOUT
+	int "RCU CPU stall timeout in seconds"
+	depends on RCU_STALL_COMMON
+	range 3 300
+	default 21
+	help
+	  If a given RCU grace period extends more than the specified
+	  number of seconds, a CPU stall warning is printed.  If the
+	  RCU grace period persists, additional CPU stall warnings are
+	  printed at more widely spaced intervals.
+
+config RCU_TRACE
+	bool "Enable tracing for RCU"
+	depends on DEBUG_KERNEL
+	default y if TREE_RCU
+	select TRACE_CLOCK
+	help
+	  This option enables additional tracepoints for ftrace-style
+	  event tracing.
+
+	  Say Y here if you want to enable RCU tracing
+	  Say N if you are unsure.
+
+config RCU_EQS_DEBUG
+	bool "Provide debugging asserts for adding NO_HZ support to an arch"
+	depends on DEBUG_KERNEL
+	help
+	  This option provides consistency checks in RCU's handling of
+	  NO_HZ.  These checks have proven quite helpful in detecting
+	  bugs in arch-specific NO_HZ code.
+
+	  Say N here if you need ultimate kernel/user switch latencies
+	  Say Y if you are unsure
+
+endmenu # "RCU Debugging"

diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 23803c7..13c0fc8 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile

@@ -3,13 +3,11 @@
 KCOV_INSTRUMENT := n
 
 obj-y += update.o sync.o
-obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
 obj-$(CONFIG_TREE_SRCU) += srcutree.o
 obj-$(CONFIG_TINY_SRCU) += srcutiny.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_PREEMPT_RCU) += tree.o
-obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
 obj-$(CONFIG_TINY_RCU) += tiny.o
 obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 73e16ec4..808b8c8 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h

@@ -212,6 +212,18 @@ int rcu_jiffies_till_stall_check(void);
  */
 #define TPS(x)  tracepoint_string(x)
 
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+	\
+	if (!atomic_read(&___rfd_beenhere) && \
+	    !atomic_xchg(&___rfd_beenhere, 1)) \
+		ftrace_dump(oops_dump_mode); \
+} while (0)
+
 void rcu_early_boot_tests(void);
 void rcu_test_sync_prims(void);
 
@@ -291,6 +303,271 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 	     cpu <= rnp->grphi; \
 	     cpu = cpumask_next((cpu), cpu_possible_mask))
 
+/*
+ * Wrappers for the rcu_node::lock acquire and release.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ *
+ * As ->lock of struct rcu_node is a __private field, therefore one should use
+ * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
+ */
+#define raw_spin_lock_rcu_node(p)					\
+do {									\
+	raw_spin_lock(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
+
+#define raw_spin_lock_irq_rcu_node(p)					\
+do {									\
+	raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock));			\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irq_rcu_node(p)					\
+	raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
+
+#define raw_spin_lock_irqsave_rcu_node(p, flags)			\
+do {									\
+	raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags);	\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irqrestore_rcu_node(p, flags)			\
+	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)	\
+
+#define raw_spin_trylock_rcu_node(p)					\
+({									\
+	bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock));	\
+									\
+	if (___locked)							\
+		smp_mb__after_unlock_lock();				\
+	___locked;							\
+})
+
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
+#ifdef CONFIG_TINY_RCU
+/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_normal(void)  /* Internal RCU use. */
+{
+	return true;
+}
+static inline bool rcu_gp_is_expedited(void)  /* Internal RCU use. */
+{
+	return false;
+}
+
+static inline void rcu_expedite_gp(void)
+{
+}
+
+static inline void rcu_unexpedite_gp(void)
+{
+}
+#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_normal(void);     /* Internal RCU use. */
+bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+void rcupdate_announce_bootup_oddness(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+#define RCU_SCHEDULER_INACTIVE	0
+#define RCU_SCHEDULER_INIT	1
+#define RCU_SCHEDULER_RUNNING	2
+
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
+#else /* #ifdef CONFIG_TINY_RCU */
+void rcu_request_urgent_qs_task(struct task_struct *t);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+enum rcutorture_type {
+	RCU_FLAVOR,
+	RCU_BH_FLAVOR,
+	RCU_SCHED_FLAVOR,
+	RCU_TASKS_FLAVOR,
+	SRCU_FLAVOR,
+	INVALID_RCU_FLAVOR
+};
+
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+			    unsigned long *gpnum, unsigned long *completed);
+void rcutorture_record_test_transition(void);
+void rcutorture_record_progress(unsigned long vernum);
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
+					  int *flags,
+					  unsigned long *gpnum,
+					  unsigned long *completed)
+{
+	*flags = 0;
+	*gpnum = 0;
+	*completed = 0;
+}
+static inline void rcutorture_record_test_transition(void)
+{
+}
+static inline void rcutorture_record_progress(unsigned long vernum)
+{
+}
+#ifdef CONFIG_RCU_TRACE
+void do_trace_rcu_torture_read(const char *rcutorturename,
+			       struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old,
+			       unsigned long c);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+#endif
+#endif
+
+#ifdef CONFIG_TINY_SRCU
+
+static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
+					   struct srcu_struct *sp, int *flags,
+					   unsigned long *gpnum,
+					   unsigned long *completed)
+{
+	if (test_type != SRCU_FLAVOR)
+		return;
+	*flags = 0;
+	*completed = sp->srcu_idx;
+	*gpnum = *completed;
+}
+
+#elif defined(CONFIG_TREE_SRCU)
+
+void srcutorture_get_gp_data(enum rcutorture_type test_type,
+			     struct srcu_struct *sp, int *flags,
+			     unsigned long *gpnum, unsigned long *completed);
+
+#endif
+
+#ifdef CONFIG_TINY_RCU
+
+/*
+ * Return the number of grace periods started.
+ */
+static inline unsigned long rcu_batches_started(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods started.
+ */
+static inline unsigned long rcu_batches_started_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods started.
+ */
+static inline unsigned long rcu_batches_started_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of bottom-half grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_bh(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of sched grace periods completed.
+ */
+static inline unsigned long rcu_batches_completed_sched(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
+static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
+{
+	return 0;
+}
+
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
+static inline void show_rcu_gp_kthreads(void)
+{
+}
+
+#else /* #ifdef CONFIG_TINY_RCU */
+extern unsigned long rcutorture_testseq;
+extern unsigned long rcutorture_vernum;
+unsigned long rcu_batches_started(void);
+unsigned long rcu_batches_started_bh(void);
+unsigned long rcu_batches_started_sched(void);
+unsigned long rcu_batches_completed(void);
+unsigned long rcu_batches_completed_bh(void);
+unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
+unsigned long srcu_batches_completed(struct srcu_struct *sp);
+void show_rcu_gp_kthreads(void);
+void rcu_force_quiescent_state(void);
+void rcu_bh_force_quiescent_state(void);
+void rcu_sched_force_quiescent_state(void);
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+#ifdef CONFIG_RCU_NOCB_CPU
+bool rcu_is_nocb_cpu(int cpu);
+#else
+static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
+#endif
+
 #endif /* __LINUX_RCU_H */

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a86fb..3cc1811 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c

@@ -48,6 +48,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 
@@ -59,12 +61,16 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
 #define VERBOSE_PERFOUT_ERRSTRING(s) \
 	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
 
+torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
+torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
-torture_param(int, nreaders, -1, "Number of RCU reader threads");
+torture_param(int, nreaders, 0, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
-torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
+torture_param(bool, shutdown, !IS_ENABLED(MODULE),
+	      "Shutdown at end of performance tests.");
 torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 
 static char *perf_type = "rcu";
 module_param(perf_type, charp, 0444);
@@ -86,13 +92,16 @@ static u64 t_rcu_perf_writer_started;
 static u64 t_rcu_perf_writer_finished;
 static unsigned long b_rcu_perf_writer_started;
 static unsigned long b_rcu_perf_writer_finished;
+static DEFINE_PER_CPU(atomic_t, n_async_inflight);
 
 static int rcu_perf_writer_state;
 #define RTWS_INIT		0
-#define RTWS_EXP_SYNC		1
-#define RTWS_SYNC		2
-#define RTWS_IDLE		2
-#define RTWS_STOPPING		3
+#define RTWS_ASYNC		1
+#define RTWS_BARRIER		2
+#define RTWS_EXP_SYNC		3
+#define RTWS_SYNC		4
+#define RTWS_IDLE		5
+#define RTWS_STOPPING		6
 
 #define MAX_MEAS 10000
 #define MIN_MEAS 100
@@ -114,6 +123,8 @@ struct rcu_perf_ops {
 	unsigned long (*started)(void);
 	unsigned long (*completed)(void);
 	unsigned long (*exp_completed)(void);
+	void (*async)(struct rcu_head *head, rcu_callback_t func);
+	void (*gp_barrier)(void);
 	void (*sync)(void);
 	void (*exp_sync)(void);
 	const char *name;
@@ -153,6 +164,8 @@ static struct rcu_perf_ops rcu_ops = {
 	.started	= rcu_batches_started,
 	.completed	= rcu_batches_completed,
 	.exp_completed	= rcu_exp_batches_completed,
+	.async		= call_rcu,
+	.gp_barrier	= rcu_barrier,
 	.sync		= synchronize_rcu,
 	.exp_sync	= synchronize_rcu_expedited,
 	.name		= "rcu"
@@ -181,6 +194,8 @@ static struct rcu_perf_ops rcu_bh_ops = {
 	.started	= rcu_batches_started_bh,
 	.completed	= rcu_batches_completed_bh,
 	.exp_completed	= rcu_exp_batches_completed_sched,
+	.async		= call_rcu_bh,
+	.gp_barrier	= rcu_barrier_bh,
 	.sync		= synchronize_rcu_bh,
 	.exp_sync	= synchronize_rcu_bh_expedited,
 	.name		= "rcu_bh"
@@ -208,6 +223,16 @@ static unsigned long srcu_perf_completed(void)
 	return srcu_batches_completed(srcu_ctlp);
 }
 
+static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+	call_srcu(srcu_ctlp, head, func);
+}
+
+static void srcu_rcu_barrier(void)
+{
+	srcu_barrier(srcu_ctlp);
+}
+
 static void srcu_perf_synchronize(void)
 {
 	synchronize_srcu(srcu_ctlp);
@@ -226,11 +251,42 @@ static struct rcu_perf_ops srcu_ops = {
 	.started	= NULL,
 	.completed	= srcu_perf_completed,
 	.exp_completed	= srcu_perf_completed,
+	.async		= srcu_call_rcu,
+	.gp_barrier	= srcu_rcu_barrier,
 	.sync		= srcu_perf_synchronize,
 	.exp_sync	= srcu_perf_synchronize_expedited,
 	.name		= "srcu"
 };
 
+static struct srcu_struct srcud;
+
+static void srcu_sync_perf_init(void)
+{
+	srcu_ctlp = &srcud;
+	init_srcu_struct(srcu_ctlp);
+}
+
+static void srcu_sync_perf_cleanup(void)
+{
+	cleanup_srcu_struct(srcu_ctlp);
+}
+
+static struct rcu_perf_ops srcud_ops = {
+	.ptype		= SRCU_FLAVOR,
+	.init		= srcu_sync_perf_init,
+	.cleanup	= srcu_sync_perf_cleanup,
+	.readlock	= srcu_perf_read_lock,
+	.readunlock	= srcu_perf_read_unlock,
+	.started	= NULL,
+	.completed	= srcu_perf_completed,
+	.exp_completed	= srcu_perf_completed,
+	.async		= srcu_call_rcu,
+	.gp_barrier	= srcu_rcu_barrier,
+	.sync		= srcu_perf_synchronize,
+	.exp_sync	= srcu_perf_synchronize_expedited,
+	.name		= "srcud"
+};
+
 /*
  * Definitions for sched perf testing.
  */
@@ -254,6 +310,8 @@ static struct rcu_perf_ops sched_ops = {
 	.started	= rcu_batches_started_sched,
 	.completed	= rcu_batches_completed_sched,
 	.exp_completed	= rcu_exp_batches_completed_sched,
+	.async		= call_rcu_sched,
+	.gp_barrier	= rcu_barrier_sched,
 	.sync		= synchronize_sched,
 	.exp_sync	= synchronize_sched_expedited,
 	.name		= "sched"
@@ -281,6 +339,8 @@ static struct rcu_perf_ops tasks_ops = {
 	.readunlock	= tasks_perf_read_unlock,
 	.started	= rcu_no_completed,
 	.completed	= rcu_no_completed,
+	.async		= call_rcu_tasks,
+	.gp_barrier	= rcu_barrier_tasks,
 	.sync		= synchronize_rcu_tasks,
 	.exp_sync	= synchronize_rcu_tasks,
 	.name		= "tasks"
@@ -344,6 +404,15 @@ rcu_perf_reader(void *arg)
 }
 
 /*
+ * Callback function for asynchronous grace periods from rcu_perf_writer().
+ */
+static void rcu_perf_async_cb(struct rcu_head *rhp)
+{
+	atomic_dec(this_cpu_ptr(&n_async_inflight));
+	kfree(rhp);
+}
+
+/*
  * RCU perf writer kthread.  Repeatedly does a grace period.
  */
 static int
@@ -352,6 +421,7 @@ rcu_perf_writer(void *arg)
 	int i = 0;
 	int i_max;
 	long me = (long)arg;
+	struct rcu_head *rhp = NULL;
 	struct sched_param sp;
 	bool started = false, done = false, alldone = false;
 	u64 t;
@@ -380,9 +450,27 @@ rcu_perf_writer(void *arg)
 	}
 
 	do {
+		if (writer_holdoff)
+			udelay(writer_holdoff);
 		wdp = &wdpp[i];
 		*wdp = ktime_get_mono_fast_ns();
-		if (gp_exp) {
+		if (gp_async) {
+retry:
+			if (!rhp)
+				rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
+			if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
+				rcu_perf_writer_state = RTWS_ASYNC;
+				atomic_inc(this_cpu_ptr(&n_async_inflight));
+				cur_ops->async(rhp, rcu_perf_async_cb);
+				rhp = NULL;
+			} else if (!kthread_should_stop()) {
+				rcu_perf_writer_state = RTWS_BARRIER;
+				cur_ops->gp_barrier();
+				goto retry;
+			} else {
+				kfree(rhp); /* Because we are stopping. */
+			}
+		} else if (gp_exp) {
 			rcu_perf_writer_state = RTWS_EXP_SYNC;
 			cur_ops->exp_sync();
 		} else {
@@ -429,6 +517,10 @@ rcu_perf_writer(void *arg)
 			i++;
 		rcu_perf_wait_shutdown();
 	} while (!torture_must_stop());
+	if (gp_async) {
+		rcu_perf_writer_state = RTWS_BARRIER;
+		cur_ops->gp_barrier();
+	}
 	rcu_perf_writer_state = RTWS_STOPPING;
 	writer_n_durations[me] = i_max;
 	torture_kthread_stopping("rcu_perf_writer");
@@ -452,6 +544,17 @@ rcu_perf_cleanup(void)
 	u64 *wdp;
 	u64 *wdpp;
 
+	/*
+	 * Would like warning at start, but everything is expedited
+	 * during the mid-boot phase, so have to wait till the end.
+	 */
+	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
+	if (rcu_gp_is_normal() && gp_exp)
+		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
+	if (gp_exp && gp_async)
+		VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
+
 	if (torture_cleanup_begin())
 		return;
 
@@ -554,7 +657,7 @@ rcu_perf_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_perf_ops *perf_ops[] = {
-		&rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+		&rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
 		RCUPERF_TASKS_OPS
 	};
 
@@ -624,16 +727,6 @@ rcu_perf_init(void)
 		firsterr = -ENOMEM;
 		goto unwind;
 	}
-	if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) {
-		VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
-		firsterr = -EINVAL;
-		goto unwind;
-	}
-	if (rcu_gp_is_normal() && gp_exp) {
-		VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
-		firsterr = -EINVAL;
-		goto unwind;
-	}
 	for (i = 0; i < nrealwriters; i++) {
 		writer_durations[i] =
 			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index ae6e574..b8f7f8c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c

@@ -52,6 +52,8 @@
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
 
+#include "rcu.h"
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
 
@@ -562,31 +564,19 @@ static void srcu_torture_stats(void)
 	int __maybe_unused cpu;
 	int idx;
 
-#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU)
 #ifdef CONFIG_TREE_SRCU
 	idx = srcu_ctlp->srcu_idx & 0x1;
-#else /* #ifdef CONFIG_TREE_SRCU */
-	idx = srcu_ctlp->completed & 0x1;
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
 		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		unsigned long l0, l1;
 		unsigned long u0, u1;
 		long c0, c1;
-#ifdef CONFIG_TREE_SRCU
 		struct srcu_data *counts;
 
 		counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
 		u0 = counts->srcu_unlock_count[!idx];
 		u1 = counts->srcu_unlock_count[idx];
-#else /* #ifdef CONFIG_TREE_SRCU */
-		struct srcu_array *counts;
-
-		counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
-		u0 = counts->unlock_count[!idx];
-		u1 = counts->unlock_count[idx];
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		/*
 		 * Make sure that a lock is always counted if the corresponding
@@ -594,13 +584,8 @@ static void srcu_torture_stats(void)
 		 */
 		smp_rmb();
 
-#ifdef CONFIG_TREE_SRCU
 		l0 = counts->srcu_lock_count[!idx];
 		l1 = counts->srcu_lock_count[idx];
-#else /* #ifdef CONFIG_TREE_SRCU */
-		l0 = counts->lock_count[!idx];
-		l1 = counts->lock_count[idx];
-#endif /* #else #ifdef CONFIG_TREE_SRCU */
 
 		c0 = l0 - u0;
 		c1 = l1 - u1;
@@ -609,7 +594,7 @@ static void srcu_torture_stats(void)
 	pr_cont("\n");
 #elif defined(CONFIG_TINY_SRCU)
 	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
-	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n",
+	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
 		 torture_type, TORTURE_FLAG, idx,
 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
deleted file mode 100644
index 584d8a9..0000000
--- a/kernel/rcu/srcu.c
+++ /dev/null

@@ -1,662 +0,0 @@
-/*
- * Sleepable Read-Copy Update mechanism for mutual exclusion.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright (C) IBM Corporation, 2006
- * Copyright (C) Fujitsu, 2012
- *
- * Author: Paul McKenney <paulmck@us.ibm.com>
- *	   Lai Jiangshan <laijs@cn.fujitsu.com>
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *		Documentation/RCU/ *.txt
- *
- */
-
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/percpu.h>
-#include <linux/preempt.h>
-#include <linux/rcupdate_wait.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/srcu.h>
-
-#include "rcu.h"
-
-/*
- * Initialize an rcu_batch structure to empty.
- */
-static inline void rcu_batch_init(struct rcu_batch *b)
-{
-	b->head = NULL;
-	b->tail = &b->head;
-}
-
-/*
- * Enqueue a callback onto the tail of the specified rcu_batch structure.
- */
-static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
-{
-	*b->tail = head;
-	b->tail = &head->next;
-}
-
-/*
- * Is the specified rcu_batch structure empty?
- */
-static inline bool rcu_batch_empty(struct rcu_batch *b)
-{
-	return b->tail == &b->head;
-}
-
-/*
- * Remove the callback at the head of the specified rcu_batch structure
- * and return a pointer to it, or return NULL if the structure is empty.
- */
-static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
-{
-	struct rcu_head *head;
-
-	if (rcu_batch_empty(b))
-		return NULL;
-
-	head = b->head;
-	b->head = head->next;
-	if (b->tail == &head->next)
-		rcu_batch_init(b);
-
-	return head;
-}
-
-/*
- * Move all callbacks from the rcu_batch structure specified by "from" to
- * the structure specified by "to".
- */
-static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
-{
-	if (!rcu_batch_empty(from)) {
-		*to->tail = from->head;
-		to->tail = from->tail;
-		rcu_batch_init(from);
-	}
-}
-
-static int init_srcu_struct_fields(struct srcu_struct *sp)
-{
-	sp->completed = 0;
-	spin_lock_init(&sp->queue_lock);
-	sp->running = false;
-	rcu_batch_init(&sp->batch_queue);
-	rcu_batch_init(&sp->batch_check0);
-	rcu_batch_init(&sp->batch_check1);
-	rcu_batch_init(&sp->batch_done);
-	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_array);
-	return sp->per_cpu_ref ? 0 : -ENOMEM;
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-int __init_srcu_struct(struct srcu_struct *sp, const char *name,
-		       struct lock_class_key *key)
-{
-	/* Don't re-initialize a lock while it is held. */
-	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
-	lockdep_init_map(&sp->dep_map, name, key, 0);
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(__init_srcu_struct);
-
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/**
- * init_srcu_struct - initialize a sleep-RCU structure
- * @sp: structure to initialize.
- *
- * Must invoke this on a given srcu_struct before passing that srcu_struct
- * to any other function.  Each srcu_struct represents a separate domain
- * of SRCU protection.
- */
-int init_srcu_struct(struct srcu_struct *sp)
-{
-	return init_srcu_struct_fields(sp);
-}
-EXPORT_SYMBOL_GPL(init_srcu_struct);
-
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/*
- * Returns approximate total of the readers' ->lock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Returns approximate total of the readers' ->unlock_count[] values for the
- * rank of per-CPU counters specified by idx.
- */
-static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->unlock_count[idx]);
-	}
-	return sum;
-}
-
-/*
- * Return true if the number of pre-existing readers is determined to
- * be zero.
- */
-static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
-{
-	unsigned long unlocks;
-
-	unlocks = srcu_readers_unlock_idx(sp, idx);
-
-	/*
-	 * Make sure that a lock is always counted if the corresponding unlock
-	 * is counted. Needs to be a smp_mb() as the read side may contain a
-	 * read from a variable that is written to before the synchronize_srcu()
-	 * in the write side. In this case smp_mb()s A and B act like the store
-	 * buffering pattern.
-	 *
-	 * This smp_mb() also pairs with smp_mb() C to prevent accesses after the
-	 * synchronize_srcu() from being executed before the grace period ends.
-	 */
-	smp_mb(); /* A */
-
-	/*
-	 * If the locks are the same as the unlocks, then there must have
-	 * been no readers on this index at some time in between. This does not
-	 * mean that there are no more readers, as one could have read the
-	 * current index but not have incremented the lock counter yet.
-	 *
-	 * Possible bug: There is no guarantee that there haven't been ULONG_MAX
-	 * increments of ->lock_count[] since the unlocks were counted, meaning
-	 * that this could return true even if there are still active readers.
-	 * Since there are no memory barriers around srcu_flip(), the CPU is not
-	 * required to increment ->completed before running
-	 * srcu_readers_unlock_idx(), which means that there could be an
-	 * arbitrarily large number of critical sections that execute after
-	 * srcu_readers_unlock_idx() but use the old value of ->completed.
-	 */
-	return srcu_readers_lock_idx(sp, idx) == unlocks;
-}
-
-/**
- * srcu_readers_active - returns true if there are readers. and false
- *                       otherwise
- * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
- *
- * Note that this is not an atomic primitive, and can therefore suffer
- * severe errors when invoked on an active srcu_struct.  That said, it
- * can be useful as an error check at cleanup time.
- */
-static bool srcu_readers_active(struct srcu_struct *sp)
-{
-	int cpu;
-	unsigned long sum = 0;
-
-	for_each_possible_cpu(cpu) {
-		struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
-
-		sum += READ_ONCE(cpuc->lock_count[0]);
-		sum += READ_ONCE(cpuc->lock_count[1]);
-		sum -= READ_ONCE(cpuc->unlock_count[0]);
-		sum -= READ_ONCE(cpuc->unlock_count[1]);
-	}
-	return sum;
-}
-
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this only after you are finished using a given srcu_struct
- * that was initialized via init_srcu_struct().  This code does some
- * probabalistic checking, spotting late uses of srcu_read_lock(),
- * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
- * If any such late uses are detected, the per-CPU memory associated with
- * the srcu_struct is simply leaked and WARN_ON() is invoked.  If the
- * caller frees the srcu_struct itself, a use-after-free crash will likely
- * ensue, but at least there will be a warning printed.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
-{
-	if (WARN_ON(srcu_readers_active(sp)))
-		return; /* Leakage unless caller handles error. */
-	free_percpu(sp->per_cpu_ref);
-	sp->per_cpu_ref = NULL;
-}
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
-
-/*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-	int idx;
-
-	idx = READ_ONCE(sp->completed) & 0x1;
-	__this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
-	smp_mb(); /* B */  /* Avoid leaking the critical section. */
-	return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
-/*
- * Removes the count for the old reader from the appropriate per-CPU
- * element of the srcu_struct.  Note that this may well be a different
- * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
- */
-void __srcu_read_unlock(struct srcu_struct *sp, int idx)
-{
-	smp_mb(); /* C */  /* Avoid leaking the critical section. */
-	this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
-}
-EXPORT_SYMBOL_GPL(__srcu_read_unlock);
-
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited().  We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections.  If there are still some readers after 10 microseconds,
- * we repeatedly block for 1-millisecond time periods.  This approach
- * has done well in testing, so there is no need for a config parameter.
- */
-#define SRCU_RETRY_CHECK_DELAY		5
-#define SYNCHRONIZE_SRCU_TRYCOUNT	2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT	12
-
-/*
- * @@@ Wait until all pre-existing readers complete.  Such readers
- * will have used the index specified by "idx".
- * the caller should ensures the ->completed is not changed while checking
- * and idx = (->completed & 1) ^ 1
- */
-static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
-{
-	for (;;) {
-		if (srcu_readers_active_idx_check(sp, idx))
-			return true;
-		if (--trycount <= 0)
-			return false;
-		udelay(SRCU_RETRY_CHECK_DELAY);
-	}
-}
-
-/*
- * Increment the ->completed counter so that future SRCU readers will
- * use the other rank of the ->(un)lock_count[] arrays.  This allows
- * us to wait for pre-existing readers in a starvation-free manner.
- */
-static void srcu_flip(struct srcu_struct *sp)
-{
-	WRITE_ONCE(sp->completed, sp->completed + 1);
-
-	/*
-	 * Ensure that if the updater misses an __srcu_read_unlock()
-	 * increment, that task's next __srcu_read_lock() will see the
-	 * above counter update.  Note that both this memory barrier
-	 * and the one in srcu_readers_active_idx_check() provide the
-	 * guarantee for __srcu_read_lock().
-	 */
-	smp_mb(); /* D */  /* Pairs with C. */
-}
-
-/*
- * Enqueue an SRCU callback on the specified srcu_struct structure,
- * initiating grace-period processing if it is not already running.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing SRCU read-side critical section.  On systems with
- * more than one CPU, this means that when "func()" is invoked, each CPU
- * is guaranteed to have executed a full memory barrier since the end of
- * its last corresponding SRCU read-side critical section whose beginning
- * preceded the call to call_rcu().  It also means that each CPU executing
- * an SRCU read-side critical section that continues beyond the start of
- * "func()" must have executed a memory barrier after the call_rcu()
- * but before the beginning of that SRCU read-side critical section.
- * Note that these guarantees include CPUs that are offline, idle, or
- * executing in user mode, as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting SRCU callback function "func()", then both CPU A and CPU
- * B are guaranteed to execute a full memory barrier during the time
- * interval between the call to call_rcu() and the invocation of "func()".
- * This guarantee applies even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
- *
- * Of course, these guarantees apply only for invocations of call_srcu(),
- * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
- * srcu_struct structure.
- */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
-	       rcu_callback_t func)
-{
-	unsigned long flags;
-
-	head->next = NULL;
-	head->func = func;
-	spin_lock_irqsave(&sp->queue_lock, flags);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	rcu_batch_queue(&sp->batch_queue, head);
-	if (!sp->running) {
-		sp->running = true;
-		queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
-	}
-	spin_unlock_irqrestore(&sp->queue_lock, flags);
-}
-EXPORT_SYMBOL_GPL(call_srcu);
-
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
-static void srcu_reschedule(struct srcu_struct *sp);
-
-/*
- * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
- */
-static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
-{
-	struct rcu_synchronize rcu;
-	struct rcu_head *head = &rcu.head;
-	bool done = false;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
-			 lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
-
-	might_sleep();
-	init_completion(&rcu.completion);
-
-	head->next = NULL;
-	head->func = wakeme_after_rcu;
-	spin_lock_irq(&sp->queue_lock);
-	smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
-	if (!sp->running) {
-		/* steal the processing owner */
-		sp->running = true;
-		rcu_batch_queue(&sp->batch_check0, head);
-		spin_unlock_irq(&sp->queue_lock);
-
-		srcu_advance_batches(sp, trycount);
-		if (!rcu_batch_empty(&sp->batch_done)) {
-			BUG_ON(sp->batch_done.head != head);
-			rcu_batch_dequeue(&sp->batch_done);
-			done = true;
-		}
-		/* give the processing owner to work_struct */
-		srcu_reschedule(sp);
-	} else {
-		rcu_batch_queue(&sp->batch_queue, head);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (!done) {
-		wait_for_completion(&rcu.completion);
-		smp_mb(); /* Caller's later accesses after GP. */
-	}
-
-}
-
-/**
- * synchronize_srcu - wait for prior SRCU read-side critical-section completion
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for the count to drain to zero of both indexes. To avoid the
- * possible starvation of synchronize_srcu(), it waits for the count of
- * the index=((->completed & 1) ^ 1) to drain to zero at first,
- * and then flip the completed and wait for the count of the other index.
- *
- * Can block; must be called from process context.
- *
- * Note that it is illegal to call synchronize_srcu() from the corresponding
- * SRCU read-side critical section; doing so will result in deadlock.
- * However, it is perfectly legal to call synchronize_srcu() on one
- * srcu_struct from some other srcu_struct's read-side critical section,
- * as long as the resulting graph of srcu_structs is acyclic.
- *
- * There are memory-ordering constraints implied by synchronize_srcu().
- * On systems with more than one CPU, when synchronize_srcu() returns,
- * each CPU is guaranteed to have executed a full memory barrier since
- * the end of its last corresponding SRCU-sched read-side critical section
- * whose beginning preceded the call to synchronize_srcu().  In addition,
- * each CPU having an SRCU read-side critical section that extends beyond
- * the return from synchronize_srcu() is guaranteed to have executed a
- * full memory barrier after the beginning of synchronize_srcu() and before
- * the beginning of that SRCU read-side critical section.  Note that these
- * guarantees include CPUs that are offline, idle, or executing in user mode,
- * as well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_srcu(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_srcu().  This guarantee applies even if CPU A and CPU B
- * are the same CPU, but again only if the system has more than one CPU.
- *
- * Of course, these memory-ordering guarantees apply only when
- * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
- * passed the same srcu_struct structure.
- */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
-			   ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
-			   : SYNCHRONIZE_SRCU_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu);
-
-/**
- * synchronize_srcu_expedited - Brute-force SRCU grace period
- * @sp: srcu_struct with which to synchronize.
- *
- * Wait for an SRCU grace period to elapse, but be more aggressive about
- * spinning rather than blocking when waiting.
- *
- * Note that synchronize_srcu_expedited() has the same deadlock and
- * memory-ordering properties as does synchronize_srcu().
- */
-void synchronize_srcu_expedited(struct srcu_struct *sp)
-{
-	__synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
-}
-EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
-
-/**
- * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
- * @sp: srcu_struct on which to wait for in-flight callbacks.
- */
-void srcu_barrier(struct srcu_struct *sp)
-{
-	synchronize_srcu(sp);
-}
-EXPORT_SYMBOL_GPL(srcu_barrier);
-
-/**
- * srcu_batches_completed - return batches completed.
- * @sp: srcu_struct on which to report batch completion.
- *
- * Report the number of batches, correlated with, but not necessarily
- * precisely the same as, the number of grace periods that have elapsed.
- */
-unsigned long srcu_batches_completed(struct srcu_struct *sp)
-{
-	return sp->completed;
-}
-EXPORT_SYMBOL_GPL(srcu_batches_completed);
-
-#define SRCU_CALLBACK_BATCH	10
-#define SRCU_INTERVAL		1
-
-/*
- * Move any new SRCU callbacks to the first stage of the SRCU grace
- * period pipeline.
- */
-static void srcu_collect_new(struct srcu_struct *sp)
-{
-	if (!rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
-		spin_unlock_irq(&sp->queue_lock);
-	}
-}
-
-/*
- * Core SRCU state machine.  Advance callbacks from ->batch_check0 to
- * ->batch_check1 and then to ->batch_done as readers drain.
- */
-static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
-{
-	int idx = 1 ^ (sp->completed & 1);
-
-	/*
-	 * Because readers might be delayed for an extended period after
-	 * fetching ->completed for their index, at any point in time there
-	 * might well be readers using both idx=0 and idx=1.  We therefore
-	 * need to wait for readers to clear from both index values before
-	 * invoking a callback.
-	 */
-
-	if (rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_check1))
-		return; /* no callbacks need to be advanced */
-
-	if (!try_check_zero(sp, idx, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have already done with their
-	 * first zero check and flip back when they were enqueued on
-	 * ->batch_check0 in a previous invocation of srcu_advance_batches().
-	 * (Presumably try_check_zero() returned false during that
-	 * invocation, leaving the callbacks stranded on ->batch_check1.)
-	 * They are therefore ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-
-	if (rcu_batch_empty(&sp->batch_check0))
-		return; /* no callbacks need to be advanced */
-	srcu_flip(sp);
-
-	/*
-	 * The callbacks in ->batch_check0 just finished their
-	 * first check zero and flip, so move them to ->batch_check1
-	 * for future checking on the other idx.
-	 */
-	rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
-
-	/*
-	 * SRCU read-side critical sections are normally short, so check
-	 * at least twice in quick succession after a flip.
-	 */
-	trycount = trycount < 2 ? 2 : trycount;
-	if (!try_check_zero(sp, idx^1, trycount))
-		return; /* failed to advance, will try after SRCU_INTERVAL */
-
-	/*
-	 * The callbacks in ->batch_check1 have now waited for all
-	 * pre-existing readers using both idx values.  They are therefore
-	 * ready to invoke, so move them to ->batch_done.
-	 */
-	rcu_batch_move(&sp->batch_done, &sp->batch_check1);
-}
-
-/*
- * Invoke a limited number of SRCU callbacks that have passed through
- * their grace period.  If there are more to do, SRCU will reschedule
- * the workqueue.  Note that needed memory barriers have been executed
- * in this task's context by srcu_readers_active_idx_check().
- */
-static void srcu_invoke_callbacks(struct srcu_struct *sp)
-{
-	int i;
-	struct rcu_head *head;
-
-	for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
-		head = rcu_batch_dequeue(&sp->batch_done);
-		if (!head)
-			break;
-		local_bh_disable();
-		head->func(head);
-		local_bh_enable();
-	}
-}
-
-/*
- * Finished one round of SRCU grace period.  Start another if there are
- * more SRCU callbacks queued, otherwise put SRCU into not-running state.
- */
-static void srcu_reschedule(struct srcu_struct *sp)
-{
-	bool pending = true;
-
-	if (rcu_batch_empty(&sp->batch_done) &&
-	    rcu_batch_empty(&sp->batch_check1) &&
-	    rcu_batch_empty(&sp->batch_check0) &&
-	    rcu_batch_empty(&sp->batch_queue)) {
-		spin_lock_irq(&sp->queue_lock);
-		if (rcu_batch_empty(&sp->batch_done) &&
-		    rcu_batch_empty(&sp->batch_check1) &&
-		    rcu_batch_empty(&sp->batch_check0) &&
-		    rcu_batch_empty(&sp->batch_queue)) {
-			sp->running = false;
-			pending = false;
-		}
-		spin_unlock_irq(&sp->queue_lock);
-	}
-
-	if (pending)
-		queue_delayed_work(system_power_efficient_wq,
-				   &sp->work, SRCU_INTERVAL);
-}
-
-/*
- * This is the work-queue function that handles SRCU grace periods.
- */
-void process_srcu(struct work_struct *work)
-{
-	struct srcu_struct *sp;
-
-	sp = container_of(work, struct srcu_struct, work.work);
-
-	srcu_collect_new(sp);
-	srcu_advance_batches(sp, 1);
-	srcu_invoke_callbacks(sp);
-	srcu_reschedule(sp);
-}
-EXPORT_SYMBOL_GPL(process_srcu);

diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 36e1f82..1a1c104 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c

@@ -38,8 +38,8 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_lock_nesting[0] = 0;
 	sp->srcu_lock_nesting[1] = 0;
 	init_swait_queue_head(&sp->srcu_wq);
-	sp->srcu_gp_seq = 0;
-	rcu_segcblist_init(&sp->srcu_cblist);
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
 	sp->srcu_gp_running = false;
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
@@ -88,31 +88,16 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 {
 	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
 	flush_work(&sp->srcu_work);
-	WARN_ON(rcu_seq_state(sp->srcu_gp_seq));
 	WARN_ON(sp->srcu_gp_running);
 	WARN_ON(sp->srcu_gp_waiting);
-	WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist));
+	WARN_ON(sp->srcu_cb_head);
+	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
- * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
- * Returns an index that must be passed to the matching srcu_read_unlock().
- */
-int __srcu_read_lock(struct srcu_struct *sp)
-{
-	int idx;
-
-	idx = READ_ONCE(sp->srcu_idx);
-	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
-	return idx;
-}
-EXPORT_SYMBOL_GPL(__srcu_read_lock);
-
-/*
  * Removes the count for the old reader from the appropriate element of
- * the srcu_struct.  Must be called from process context.
+ * the srcu_struct.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
@@ -132,52 +117,44 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 void srcu_drive_gp(struct work_struct *wp)
 {
 	int idx;
-	struct rcu_cblist ready_cbs;
-	struct srcu_struct *sp;
+	struct rcu_head *lh;
 	struct rcu_head *rhp;
+	struct srcu_struct *sp;
 
 	sp = container_of(wp, struct srcu_struct, srcu_work);
-	if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist))
+	if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head))
 		return; /* Already running or nothing to do. */
 
-	/* Tag recently arrived callbacks and wait for readers. */
+	/* Remove recently arrived callbacks and wait for readers. */
 	WRITE_ONCE(sp->srcu_gp_running, true);
-	rcu_segcblist_accelerate(&sp->srcu_cblist,
-				 rcu_seq_snap(&sp->srcu_gp_seq));
-	rcu_seq_start(&sp->srcu_gp_seq);
+	local_irq_disable();
+	lh = sp->srcu_cb_head;
+	sp->srcu_cb_head = NULL;
+	sp->srcu_cb_tail = &sp->srcu_cb_head;
+	local_irq_enable();
 	idx = sp->srcu_idx;
 	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
 	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
 	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
 	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
-	rcu_seq_end(&sp->srcu_gp_seq);
 
-	/* Update callback list based on GP, and invoke ready callbacks. */
-	rcu_segcblist_advance(&sp->srcu_cblist,
-			      rcu_seq_current(&sp->srcu_gp_seq));
-	if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) {
-		rcu_cblist_init(&ready_cbs);
-		local_irq_disable();
-		rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
-		local_irq_enable();
-		rhp = rcu_cblist_dequeue(&ready_cbs);
-		for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
-			local_bh_disable();
-			rhp->func(rhp);
-			local_bh_enable();
-		}
-		local_irq_disable();
-		rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
-		local_irq_enable();
+	/* Invoke the callbacks we removed above. */
+	while (lh) {
+		rhp = lh;
+		lh = lh->next;
+		local_bh_disable();
+		rhp->func(rhp);
+		local_bh_enable();
 	}
-	WRITE_ONCE(sp->srcu_gp_running, false);
 
 	/*
-	 * If more callbacks, reschedule ourselves.  This can race with
-	 * a call_srcu() at interrupt level, but the ->srcu_gp_running
-	 * checks will straighten that out.
+	 * Enable rescheduling, and if there are more callbacks,
+	 * reschedule ourselves.  This can race with a call_srcu()
+	 * at interrupt level, but the ->srcu_gp_running checks will
+	 * straighten that out.
 	 */
-	if (!rcu_segcblist_empty(&sp->srcu_cblist))
+	WRITE_ONCE(sp->srcu_gp_running, false);
+	if (READ_ONCE(sp->srcu_cb_head))
 		schedule_work(&sp->srcu_work);
 }
 EXPORT_SYMBOL_GPL(srcu_drive_gp);
@@ -186,14 +163,16 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp);
  * Enqueue an SRCU callback on the specified srcu_struct structure,
  * initiating grace-period processing if it is not already running.
  */
-void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
 	unsigned long flags;
 
-	head->func = func;
+	rhp->func = func;
+	rhp->next = NULL;
 	local_irq_save(flags);
-	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false);
+	*sp->srcu_cb_tail = rhp;
+	sp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
 	if (!READ_ONCE(sp->srcu_gp_running))
 		schedule_work(&sp->srcu_work);

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 3ae8474..d0ca524 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c

@@ -40,9 +40,15 @@
 #include "rcu.h"
 #include "rcu_segcblist.h"
 
-ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */
+/* Holdoff in nanoseconds for auto-expediting. */
+#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
+static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
 module_param(exp_holdoff, ulong, 0444);
 
+/* Overflow-check frequency.  N bits roughly says every 2**N grace periods. */
+static ulong counter_wrap_check = (ULONG_MAX >> 2);
+module_param(counter_wrap_check, ulong, 0444);
+
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 
@@ -70,7 +76,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 
 	/* Each pass through this loop initializes one srcu_node structure. */
 	rcu_for_each_node_breadth_first(sp, snp) {
-		spin_lock_init(&snp->lock);
+		raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
 		WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
 			     ARRAY_SIZE(snp->srcu_data_have_cbs));
 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -104,7 +110,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
 	snp_first = sp->level[level];
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		spin_lock_init(&sdp->lock);
+		raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
@@ -163,7 +169,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
 	/* Don't re-initialize a lock while it is held. */
 	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
 	lockdep_init_map(&sp->dep_map, name, key, 0);
-	spin_lock_init(&sp->gp_lock);
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -180,7 +186,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
  */
 int init_srcu_struct(struct srcu_struct *sp)
 {
-	spin_lock_init(&sp->gp_lock);
+	raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
 	return init_srcu_struct_fields(sp, false);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -191,7 +197,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
  * First-use initialization of statically allocated srcu_struct
  * structure.  Wiring up the combining tree is more than can be
  * done with compile-time initialization, so this check is added
- * to each update-side SRCU primitive.  Use ->gp_lock, which -is-
+ * to each update-side SRCU primitive.  Use sp->lock, which -is-
  * compile-time initialized, to resolve races involving multiple
  * CPUs trying to garner first-use privileges.
  */
@@ -203,13 +209,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
-		spin_unlock_irqrestore(&sp->gp_lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 		return;
 	}
 	init_srcu_struct_fields(sp, true);
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -275,15 +281,20 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
 	 * not mean that there are no more readers, as one could have read
 	 * the current index but not have incremented the lock counter yet.
 	 *
-	 * Possible bug: There is no guarantee that there haven't been
-	 * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were
-	 * counted, meaning that this could return true even if there are
-	 * still active readers.  Since there are no memory barriers around
-	 * srcu_flip(), the CPU is not required to increment ->srcu_idx
-	 * before running srcu_readers_unlock_idx(), which means that there
-	 * could be an arbitrarily large number of critical sections that
-	 * execute after srcu_readers_unlock_idx() but use the old value
-	 * of ->srcu_idx.
+	 * So suppose that the updater is preempted here for so long
+	 * that more than ULONG_MAX non-nested readers come and go in
+	 * the meantime.  It turns out that this cannot result in overflow
+	 * because if a reader modifies its unlock count after we read it
+	 * above, then that reader's next load of ->srcu_idx is guaranteed
+	 * to get the new value, which will cause it to operate on the
+	 * other bank of counters, where it cannot contribute to the
+	 * overflow of these counters.  This means that there is a maximum
+	 * of 2*NR_CPUS increments, which cannot overflow given current
+	 * systems, especially not on 64-bit systems.
+	 *
+	 * OK, how about nesting?  This does impose a limit on nesting
+	 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
+	 * especially on 64-bit systems.
 	 */
 	return srcu_readers_lock_idx(sp, idx) == unlocks;
 }
@@ -357,7 +368,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
- * srcu_struct.  Must be called from process context.
+ * srcu_struct.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
 int __srcu_read_lock(struct srcu_struct *sp)
@@ -365,7 +376,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
 	int idx;
 
 	idx = READ_ONCE(sp->srcu_idx) & 0x1;
-	__this_cpu_inc(sp->sda->srcu_lock_count[idx]);
+	this_cpu_inc(sp->sda->srcu_lock_count[idx]);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
 	return idx;
 }
@@ -375,7 +386,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
- * Must be called from process context.
  */
 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
@@ -401,8 +411,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
 	struct srcu_data *sdp = this_cpu_ptr(sp->sda);
 	int state;
 
-	RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock),
-			 "Invoked srcu_gp_start() without ->gp_lock!");
+	lockdep_assert_held(&sp->lock);
 	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
@@ -490,17 +499,20 @@ static void srcu_gp_end(struct srcu_struct *sp)
 {
 	unsigned long cbdelay;
 	bool cbs;
+	int cpu;
+	unsigned long flags;
 	unsigned long gpseq;
 	int idx;
 	int idxnext;
 	unsigned long mask;
+	struct srcu_data *sdp;
 	struct srcu_node *snp;
 
 	/* Prevent more than one additional grace period. */
 	mutex_lock(&sp->srcu_cb_mutex);
 
 	/* End the current grace period. */
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	idx = rcu_seq_state(sp->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
 	cbdelay = srcu_get_delay(sp);
@@ -509,7 +521,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
 		sp->srcu_gp_seq_needed_exp = gpseq;
-	spin_unlock_irq(&sp->gp_lock);
+	raw_spin_unlock_irq_rcu_node(sp);
 	mutex_unlock(&sp->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
 
@@ -517,7 +529,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
 	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
 	rcu_for_each_node_breadth_first(sp, snp) {
-		spin_lock_irq(&snp->lock);
+		raw_spin_lock_irq_rcu_node(snp);
 		cbs = false;
 		if (snp >= sp->level[rcu_num_lvls - 1])
 			cbs = snp->srcu_have_cbs[idx] == gpseq;
@@ -527,28 +539,37 @@ static void srcu_gp_end(struct srcu_struct *sp)
 			snp->srcu_gp_seq_needed_exp = gpseq;
 		mask = snp->srcu_data_have_cbs[idx];
 		snp->srcu_data_have_cbs[idx] = 0;
-		spin_unlock_irq(&snp->lock);
-		if (cbs) {
-			smp_mb(); /* GP end before CB invocation. */
+		raw_spin_unlock_irq_rcu_node(snp);
+		if (cbs)
 			srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
-		}
+
+		/* Occasionally prevent srcu_data counter wrap. */
+		if (!(gpseq & counter_wrap_check))
+			for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
+				sdp = per_cpu_ptr(sp->sda, cpu);
+				raw_spin_lock_irqsave_rcu_node(sdp, flags);
+				if (ULONG_CMP_GE(gpseq,
+						 sdp->srcu_gp_seq_needed + 100))
+					sdp->srcu_gp_seq_needed = gpseq;
+				raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
+			}
 	}
 
 	/* Callback initiation done, allow grace periods after next. */
 	mutex_unlock(&sp->srcu_cb_mutex);
 
 	/* Start a new grace period if needed. */
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	gpseq = rcu_seq_current(&sp->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
 	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
 		srcu_gp_start(sp);
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 		/* Throttle expedited grace periods: Should be rare! */
 		srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
 				    ? 0 : SRCU_INTERVAL);
 	} else {
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 	}
 }
 
@@ -568,18 +589,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
 		    ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
 			return;
-		spin_lock_irqsave(&snp->lock, flags);
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
-			spin_unlock_irqrestore(&snp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 			return;
 		}
 		WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
-		spin_unlock_irqrestore(&snp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
 		sp->srcu_gp_seq_needed_exp = s;
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -601,14 +622,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	for (; snp != NULL; snp = snp->srcu_parent) {
 		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
 			return; /* GP already done and CBs recorded. */
-		spin_lock_irqsave(&snp->lock, flags);
+		raw_spin_lock_irqsave_rcu_node(snp, flags);
 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
 			snp_seq = snp->srcu_have_cbs[idx];
 			if (snp == sdp->mynode && snp_seq == s)
 				snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
-			spin_unlock_irqrestore(&snp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 			if (snp == sdp->mynode && snp_seq != s) {
-				smp_mb(); /* CBs after GP! */
 				srcu_schedule_cbs_sdp(sdp, do_norm
 							   ? SRCU_INTERVAL
 							   : 0);
@@ -623,11 +643,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 			snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
 		if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
 			snp->srcu_gp_seq_needed_exp = s;
-		spin_unlock_irqrestore(&snp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(snp, flags);
 	}
 
 	/* Top of tree, must ensure the grace period will be started. */
-	spin_lock_irqsave(&sp->gp_lock, flags);
+	raw_spin_lock_irqsave_rcu_node(sp, flags);
 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
@@ -646,7 +666,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 		queue_delayed_work(system_power_efficient_wq, &sp->work,
 				   srcu_get_delay(sp));
 	}
-	spin_unlock_irqrestore(&sp->gp_lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sp, flags);
 }
 
 /*
@@ -672,6 +692,16 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
  */
 static void srcu_flip(struct srcu_struct *sp)
 {
+	/*
+	 * Ensure that if this updater saw a given reader's increment
+	 * from __srcu_read_lock(), that reader was using an old value
+	 * of ->srcu_idx.  Also ensure that if a given reader sees the
+	 * new value of ->srcu_idx, this updater's earlier scans cannot
+	 * have seen that reader's increments (which is OK, because this
+	 * grace period need not wait on that reader).
+	 */
+	smp_mb(); /* E */  /* Pairs with B and C. */
+
 	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
 
 	/*
@@ -746,6 +776,13 @@ static bool srcu_might_be_idle(struct srcu_struct *sp)
 }
 
 /*
+ * SRCU callback function to leak a callback.
+ */
+static void srcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
+/*
  * Enqueue an SRCU callback on the srcu_data structure associated with
  * the current CPU and the specified srcu_struct structure, initiating
  * grace-period processing if it is not already running.
@@ -783,10 +820,16 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	struct srcu_data *sdp;
 
 	check_init_srcu_struct(sp);
+	if (debug_rcu_head_queue(rhp)) {
+		/* Probable double call_srcu(), so leak the callback. */
+		WRITE_ONCE(rhp->func, srcu_leak_callback);
+		WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
+		return;
+	}
 	rhp->func = func;
 	local_irq_save(flags);
 	sdp = this_cpu_ptr(sp->sda);
-	spin_lock(&sdp->lock);
+	raw_spin_lock_rcu_node(sdp);
 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
@@ -800,13 +843,30 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 		sdp->srcu_gp_seq_needed_exp = s;
 		needexp = true;
 	}
-	spin_unlock_irqrestore(&sdp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
 	if (needgp)
 		srcu_funnel_gp_start(sp, sdp, s, do_norm);
 	else if (needexp)
 		srcu_funnel_exp_start(sp, sdp->mynode, s);
 }
 
+/**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
+ * @sp: srcu_struct in queue the callback
+ * @head: structure to be used for queueing the SRCU callback.
+ * @func: function to be invoked after the SRCU grace period
+ *
+ * The callback function will be invoked some time after a full SRCU
+ * grace period elapses, in other words after all pre-existing SRCU
+ * read-side critical sections have completed.  However, the callback
+ * function might well execute concurrently with other SRCU read-side
+ * critical sections that started after call_srcu() was invoked.  SRCU
+ * read-side critical sections are delimited by srcu_read_lock() and
+ * srcu_read_unlock(), and may be nested.
+ *
+ * The callback will be invoked from process context, but must nevertheless
+ * be fast and must not block.
+ */
 void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	       rcu_callback_t func)
 {
@@ -954,13 +1014,16 @@ void srcu_barrier(struct srcu_struct *sp)
 	 */
 	for_each_possible_cpu(cpu) {
 		sdp = per_cpu_ptr(sp->sda, cpu);
-		spin_lock_irq(&sdp->lock);
+		raw_spin_lock_irq_rcu_node(sdp);
 		atomic_inc(&sp->srcu_barrier_cpu_cnt);
 		sdp->srcu_barrier_head.func = srcu_barrier_cb;
+		debug_rcu_head_queue(&sdp->srcu_barrier_head);
 		if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
-					   &sdp->srcu_barrier_head, 0))
+					   &sdp->srcu_barrier_head, 0)) {
+			debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
 			atomic_dec(&sp->srcu_barrier_cpu_cnt);
-		spin_unlock_irq(&sdp->lock);
+		}
+		raw_spin_unlock_irq_rcu_node(sdp);
 	}
 
 	/* Remove the initial count, at which point reaching zero can happen. */
@@ -1009,17 +1072,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
 	 */
 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
-		spin_lock_irq(&sp->gp_lock);
+		raw_spin_lock_irq_rcu_node(sp);
 		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
-			spin_unlock_irq(&sp->gp_lock);
+			raw_spin_unlock_irq_rcu_node(sp);
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return;
 		}
 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(sp);
-		spin_unlock_irq(&sp->gp_lock);
+		raw_spin_unlock_irq_rcu_node(sp);
 		if (idx != SRCU_STATE_IDLE) {
 			mutex_unlock(&sp->srcu_gp_mutex);
 			return; /* Someone else started the grace period. */
@@ -1068,22 +1131,22 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	sdp = container_of(work, struct srcu_data, work.work);
 	sp = sdp->sp;
 	rcu_cblist_init(&ready_cbs);
-	spin_lock_irq(&sdp->lock);
-	smp_mb(); /* Old grace periods before callback invocation! */
+	raw_spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
 			      rcu_seq_current(&sp->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
-		spin_unlock_irq(&sdp->lock);
+		raw_spin_unlock_irq_rcu_node(sdp);
 		return;  /* Someone else on the job or nothing to do. */
 	}
 
 	/* We are on the job!  Extract and invoke ready callbacks. */
 	sdp->srcu_cblist_invoking = true;
 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
-	spin_unlock_irq(&sdp->lock);
+	raw_spin_unlock_irq_rcu_node(sdp);
 	rhp = rcu_cblist_dequeue(&ready_cbs);
 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
+		debug_rcu_head_unqueue(rhp);
 		local_bh_disable();
 		rhp->func(rhp);
 		local_bh_enable();
@@ -1093,13 +1156,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	 * Update counts, accelerate new callbacks, and if needed,
 	 * schedule another round of callback invocation.
 	 */
-	spin_lock_irq(&sdp->lock);
+	raw_spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
 				       rcu_seq_snap(&sp->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
-	spin_unlock_irq(&sdp->lock);
+	raw_spin_unlock_irq_rcu_node(sdp);
 	if (more)
 		srcu_schedule_cbs_sdp(sdp, 0);
 }
@@ -1112,7 +1175,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 {
 	bool pushgp = true;
 
-	spin_lock_irq(&sp->gp_lock);
+	raw_spin_lock_irq_rcu_node(sp);
 	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
 		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
@@ -1122,7 +1185,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
 		/* Outstanding request and no GP.  Start one. */
 		srcu_gp_start(sp);
 	}
-	spin_unlock_irq(&sp->gp_lock);
+	raw_spin_unlock_irq_rcu_node(sp);
 
 	if (pushgp)
 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
@@ -1153,3 +1216,12 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
 	*gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
+
+static int __init srcu_bootup_announce(void)
+{
+	pr_info("Hierarchical SRCU implementation.\n");
+	if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
+		pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+	return 0;
+}
+early_initcall(srcu_bootup_announce);

diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index e538573..f848896 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c

@@ -35,15 +35,26 @@
 #include <linux/time.h>
 #include <linux/cpu.h>
 #include <linux/prefetch.h>
-#include <linux/trace_events.h>
 
 #include "rcu.h"
 
-/* Forward declarations for tiny_plugin.h. */
-struct rcu_ctrlblk;
-static void __call_rcu(struct rcu_head *head,
-		       rcu_callback_t func,
-		       struct rcu_ctrlblk *rcp);
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
+	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
+	struct rcu_head **curtail;	/* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+	.donetail	= &rcu_sched_ctrlblk.rcucblist,
+	.curtail	= &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
+};
 
 #include "tiny_plugin.h"
 
@@ -59,19 +70,6 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL(rcu_barrier_sched);
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
-
-/*
- * Test whether RCU thinks that the current CPU is idle.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return true;
-}
-EXPORT_SYMBOL(__rcu_is_watching);
-
-#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
-
 /*
  * Helper function for rcu_sched_qs() and rcu_bh_qs().
  * Also irqs are disabled to avoid confusion due to interrupt handlers
@@ -79,7 +77,6 @@ EXPORT_SYMBOL(__rcu_is_watching);
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
-	RCU_TRACE(reset_cpu_stall_ticks(rcp);)
 	if (rcp->donetail != rcp->curtail) {
 		rcp->donetail = rcp->curtail;
 		return 1;
@@ -125,7 +122,6 @@ void rcu_bh_qs(void)
  */
 void rcu_check_callbacks(int user)
 {
-	RCU_TRACE(check_cpu_stalls();)
 	if (user)
 		rcu_sched_qs();
 	else if (!in_softirq())
@@ -140,10 +136,8 @@ void rcu_check_callbacks(int user)
  */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
-	const char *rn = NULL;
 	struct rcu_head *next, *list;
 	unsigned long flags;
-	RCU_TRACE(int cb_count = 0;)
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
@@ -152,7 +146,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 		local_irq_restore(flags);
 		return;
 	}
-	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);)
 	list = rcp->rcucblist;
 	rcp->rcucblist = *rcp->donetail;
 	*rcp->donetail = NULL;
@@ -162,22 +155,15 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
-	RCU_TRACE(rn = rcp->name;)
 	while (list) {
 		next = list->next;
 		prefetch(next);
 		debug_rcu_head_unqueue(list);
 		local_bh_disable();
-		__rcu_reclaim(rn, list);
+		__rcu_reclaim("", list);
 		local_bh_enable();
 		list = next;
-		RCU_TRACE(cb_count++;)
 	}
-	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);)
-	RCU_TRACE(trace_rcu_batch_end(rcp->name,
-				      cb_count, 0, need_resched(),
-				      is_idle_task(current),
-				      false));
 }
 
 static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
@@ -221,7 +207,6 @@ static void __call_rcu(struct rcu_head *head,
 	local_irq_save(flags);
 	*rcp->curtail = head;
 	rcp->curtail = &head->next;
-	RCU_TRACE(rcp->qlen++;)
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
@@ -254,8 +239,5 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);)
-	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);)
-
 	rcu_early_boot_tests();
 }

diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 371034e..f0a01b2 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h

@@ -22,36 +22,6 @@
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
-#include <linux/kthread.h>
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-	struct rcu_head *rcucblist;	/* List of pending callbacks (CBs). */
-	struct rcu_head **donetail;	/* ->next pointer of last "done" CB. */
-	struct rcu_head **curtail;	/* ->next pointer of last CB. */
-	RCU_TRACE(long qlen);		/* Number of pending CBs. */
-	RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
-	RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
-	RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
-	RCU_TRACE(const char *name);	/* Name of RCU type. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-	.donetail	= &rcu_sched_ctrlblk.rcucblist,
-	.curtail	= &rcu_sched_ctrlblk.rcucblist,
-	RCU_TRACE(.name = "rcu_sched")
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.donetail	= &rcu_bh_ctrlblk.rcucblist,
-	.curtail	= &rcu_bh_ctrlblk.rcucblist,
-	RCU_TRACE(.name = "rcu_bh")
-};
-
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
 #include <linux/kernel_stat.h>
 
@@ -75,96 +45,3 @@ void __init rcu_scheduler_starting(void)
 }
 
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
-
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	rcp->qlen -= n;
-	local_irq_restore(flags);
-}
-
-/*
- * Dump statistics for TINY_RCU, such as they are.
- */
-static int show_tiny_stats(struct seq_file *m, void *unused)
-{
-	seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
-	seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
-	return 0;
-}
-
-static int show_tiny_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_tiny_stats, NULL);
-}
-
-static const struct file_operations show_tiny_stats_fops = {
-	.owner = THIS_MODULE,
-	.open = show_tiny_stats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutiny_trace_init(void)
-{
-	struct dentry *retval;
-
-	rcudir = debugfs_create_dir("rcu", NULL);
-	if (!rcudir)
-		goto free_out;
-	retval = debugfs_create_file("rcudata", 0444, rcudir,
-				     NULL, &show_tiny_stats_fops);
-	if (!retval)
-		goto free_out;
-	return 0;
-free_out:
-	debugfs_remove_recursive(rcudir);
-	return 1;
-}
-device_initcall(rcutiny_trace_init);
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long j;
-	unsigned long js;
-
-	if (rcu_cpu_stall_suppress)
-		return;
-	rcp->ticks_this_gp++;
-	j = jiffies;
-	js = READ_ONCE(rcp->jiffies_stall);
-	if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
-		pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
-		       rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
-		       jiffies - rcp->gp_start, rcp->qlen);
-		dump_stack();
-		WRITE_ONCE(rcp->jiffies_stall,
-			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	} else if (ULONG_CMP_GE(j, js)) {
-		WRITE_ONCE(rcp->jiffies_stall,
-			   jiffies + rcu_jiffies_till_stall_check());
-	}
-}
-
-static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
-{
-	rcp->ticks_this_gp = 0;
-	rcp->gp_start = jiffies;
-	WRITE_ONCE(rcp->jiffies_stall,
-		   jiffies + rcu_jiffies_till_stall_check());
-}
-
-static void check_cpu_stalls(void)
-{
-	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);)
-	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);)
-}
-
-#endif /* #ifdef CONFIG_RCU_TRACE */

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e354e47..51d4c3a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c

@@ -168,35 +168,17 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp,
 static void sync_sched_exp_online_cleanup(int cpu);
 
 /* rcuc/rcub kthread realtime priority */
-#ifdef CONFIG_RCU_KTHREAD_PRIO
-static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
-#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */
 static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
-#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */
 module_param(kthread_prio, int, 0644);
 
 /* Delay in jiffies for grace-period initialization delays, debug only. */
 
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT
-static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY;
-module_param(gp_preinit_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
-static const int gp_preinit_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
-
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
-static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
-module_param(gp_init_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
-static const int gp_init_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
-
-#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP
-static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY;
-module_param(gp_cleanup_delay, int, 0644);
-#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
-static const int gp_cleanup_delay;
-#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
+static int gp_preinit_delay;
+module_param(gp_preinit_delay, int, 0444);
+static int gp_init_delay;
+module_param(gp_init_delay, int, 0444);
+static int gp_cleanup_delay;
+module_param(gp_cleanup_delay, int, 0444);
 
 /*
  * Number of grace periods between delays, normalized by the duration of
@@ -250,6 +232,7 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  */
 void rcu_sched_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
 	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
 		return;
 	trace_rcu_grace_period(TPS("rcu_sched"),
@@ -265,6 +248,7 @@ void rcu_sched_qs(void)
 
 void rcu_bh_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
 	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_bh"),
 				       __this_cpu_read(rcu_bh_data.gpnum),
@@ -286,10 +270,6 @@ void rcu_bh_qs(void)
 static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
 	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
-	.dynticks_idle = ATOMIC_INIT(1),
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 };
 
 /*
@@ -478,7 +458,7 @@ void rcu_note_context_switch(bool preempt)
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
-	rcu_preempt_note_context_switch();
+	rcu_preempt_note_context_switch(preempt);
 	/* Load rcu_urgent_qs before other flags. */
 	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
 		goto out;
@@ -534,9 +514,12 @@ void rcu_all_qs(void)
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
 
-static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */
-static long qhimark = 10000;	/* If this many pending, ignore blimit. */
-static long qlowmark = 100;	/* Once only this many pending, use blimit. */
+#define DEFAULT_RCU_BLIMIT 10     /* Maximum callbacks per rcu_do_batch. */
+static long blimit = DEFAULT_RCU_BLIMIT;
+#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
+static long qhimark = DEFAULT_RCU_QHIMARK;
+#define DEFAULT_RCU_QLOMARK 100   /* Once only this many pending, use blimit. */
+static long qlowmark = DEFAULT_RCU_QLOMARK;
 
 module_param(blimit, long, 0444);
 module_param(qhimark, long, 0444);
@@ -559,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644);
 
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 				  struct rcu_data *rdp);
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj);
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(void);
 
@@ -757,6 +737,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
 	int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
 	int *fp = &rnp->need_future_gp[idx];
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_future_needs_gp() invoked with irqs enabled!!!");
 	return READ_ONCE(*fp);
 }
 
@@ -768,6 +749,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
 static bool
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "cpu_needs_another_gp() invoked with irqs enabled!!!");
 	if (rcu_gp_in_progress(rsp))
 		return false;  /* No, a grace period is already in progress. */
 	if (rcu_future_needs_gp(rsp))
@@ -794,6 +776,7 @@ static void rcu_eqs_enter_common(bool user)
 	struct rcu_data *rdp;
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_enter_common() invoked with irqs enabled!!!");
 	trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 	    !user && !is_idle_task(current)) {
@@ -864,7 +847,6 @@ void rcu_idle_enter(void)
 
 	local_irq_save(flags);
 	rcu_eqs_enter(false);
-	rcu_sysidle_enter(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -914,7 +896,6 @@ void rcu_irq_exit(void)
 		trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
 		rdtp->dynticks_nesting--;
 	}
-	rcu_sysidle_enter(1);
 }
 
 /*
@@ -967,6 +948,7 @@ static void rcu_eqs_exit(bool user)
 	struct rcu_dynticks *rdtp;
 	long long oldval;
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_exit() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
 	oldval = rdtp->dynticks_nesting;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
@@ -995,7 +977,6 @@ void rcu_idle_exit(void)
 
 	local_irq_save(flags);
 	rcu_eqs_exit(false);
-	rcu_sysidle_exit(0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -1047,7 +1028,6 @@ void rcu_irq_enter(void)
 		trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
 	else
 		rcu_eqs_exit_common(oldval, true);
-	rcu_sysidle_exit(1);
 }
 
 /*
@@ -1130,22 +1110,11 @@ void rcu_nmi_exit(void)
 }
 
 /**
- * __rcu_is_watching - are RCU read-side critical sections safe?
- *
- * Return true if RCU is watching the running CPU, which means that
- * this CPU can safely enter RCU read-side critical sections.  Unlike
- * rcu_is_watching(), the caller of __rcu_is_watching() must have at
- * least disabled preemption.
- */
-bool notrace __rcu_is_watching(void)
-{
-	return !rcu_dynticks_curr_cpu_in_eqs();
-}
-
-/**
  * rcu_is_watching - see if RCU thinks that the current CPU is idle
  *
- * If the current CPU is in its idle loop and is neither in an interrupt
+ * Return true if RCU is watching the running CPU, which means that this
+ * CPU can safely enter RCU read-side critical sections.  In other words,
+ * if the current CPU is in its idle loop and is neither in an interrupt
  * or NMI handler, return true.
  */
 bool notrace rcu_is_watching(void)
@@ -1153,7 +1122,7 @@ bool notrace rcu_is_watching(void)
 	bool ret;
 
 	preempt_disable_notrace();
-	ret = __rcu_is_watching();
+	ret = !rcu_dynticks_curr_cpu_in_eqs();
 	preempt_enable_notrace();
 	return ret;
 }
@@ -1237,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
  * credit them with an implicit quiescent state.  Return 1 if this CPU
  * is in dynticks idle mode, which is an extended quiescent state.
  */
-static int dyntick_save_progress_counter(struct rcu_data *rdp,
-					 bool *isidle, unsigned long *maxj)
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
-	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
@@ -1258,8 +1225,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
  * idle state since the last call to dyntick_save_progress_counter()
  * for this same CPU, or by virtue of having been offline.
  */
-static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
-				    bool *isidle, unsigned long *maxj)
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 {
 	unsigned long jtsq;
 	bool *rnhqp;
@@ -1674,6 +1640,8 @@ void rcu_cpu_stall_reset(void)
 static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
 				       struct rcu_node *rnp)
 {
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * If RCU is idle, we just wait for the next grace period.
 	 * But we can only be sure that RCU is idle if we are looking
@@ -1719,6 +1687,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	bool ret = false;
 	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * Pick up grace-period number for new callbacks.  If this
 	 * grace period is already marked as needed, return to the caller.
@@ -1845,6 +1815,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 {
 	bool ret = false;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
 	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
@@ -1883,6 +1855,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			    struct rcu_data *rdp)
 {
+	lockdep_assert_held(&rnp->lock);
+
 	/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
 	if (!rcu_segcblist_pend_cbs(&rdp->cblist))
 		return false;
@@ -1909,6 +1883,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 	bool ret;
 	bool need_gp;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/* Handle the ends of any preceding grace periods first. */
 	if (rdp->completed == rnp->completed &&
 	    !unlikely(READ_ONCE(rdp->gpwrap))) {
@@ -2115,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
  */
 static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
 {
-	bool isidle = false;
-	unsigned long maxj;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 
 	WRITE_ONCE(rsp->gp_activity, jiffies);
 	rsp->n_force_qs++;
 	if (first_time) {
 		/* Collect dyntick-idle snapshots. */
-		if (is_sysidle_rcu_state(rsp)) {
-			isidle = true;
-			maxj = jiffies - ULONG_MAX / 4;
-		}
-		force_qs_rnp(rsp, dyntick_save_progress_counter,
-			     &isidle, &maxj);
-		rcu_sysidle_report_gp(rsp, isidle, maxj);
+		force_qs_rnp(rsp, dyntick_save_progress_counter);
 	} else {
 		/* Handle dyntick-idle and offline CPUs. */
-		isidle = true;
-		force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
+		force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
 	}
 	/* Clear flag to prevent immediate re-entry. */
 	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -2341,6 +2308,7 @@ static bool
 rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 		      struct rcu_data *rdp)
 {
+	lockdep_assert_held(&rnp->lock);
 	if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
 		/*
 		 * Either we have not yet spawned the grace-period
@@ -2402,6 +2370,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 	__releases(rcu_get_root(rsp)->lock)
 {
+	lockdep_assert_held(&rcu_get_root(rsp)->lock);
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
 	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
@@ -2426,6 +2395,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
 	unsigned long oldmask = 0;
 	struct rcu_node *rnp_c;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/* Walk up the rcu_node hierarchy. */
 	for (;;) {
 		if (!(rnp->qsmask & mask) || rnp->gpnum != gps) {
@@ -2486,6 +2457,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
 	unsigned long mask;
 	struct rcu_node *rnp_p;
 
+	lockdep_assert_held(&rnp->lock);
 	if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
 	    rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2599,6 +2571,8 @@ static void
 rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
 			  struct rcu_node *rnp, struct rcu_data *rdp)
 {
+	lockdep_assert_held(&rsp->orphan_lock);
+
 	/* No-CBs CPUs do not have orphanable callbacks. */
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
 		return;
@@ -2639,6 +2613,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
 {
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
+	lockdep_assert_held(&rsp->orphan_lock);
+
 	/* No-CBs CPUs are handled specially. */
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
 	    rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2705,6 +2681,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
 	long mask;
 	struct rcu_node *rnp = rnp_leaf;
 
+	lockdep_assert_held(&rnp->lock);
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
 	    rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
 		return;
@@ -2895,10 +2872,7 @@ void rcu_check_callbacks(int user)
  *
  * The caller must have suppressed start of new grace periods.
  */
-static void force_qs_rnp(struct rcu_state *rsp,
-			 int (*f)(struct rcu_data *rsp, bool *isidle,
-				  unsigned long *maxj),
-			 bool *isidle, unsigned long *maxj)
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 {
 	int cpu;
 	unsigned long flags;
@@ -2937,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
 		for_each_leaf_node_possible_cpu(rnp, cpu) {
 			unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
 			if ((rnp->qsmask & bit) != 0) {
-				if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
+				if (f(per_cpu_ptr(rsp->rda, cpu)))
 					mask |= bit;
 			}
 		}
@@ -3143,9 +3117,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
 
 	if (debug_rcu_head_queue(head)) {
-		/* Probable double call_rcu(), so leak the callback. */
+		/*
+		 * Probable double call_rcu(), so leak the callback.
+		 * Use rcu:rcu_callback trace event to find the previous
+		 * time callback was passed to __call_rcu().
+		 */
+		WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n",
+			  head, head->func);
 		WRITE_ONCE(head->func, rcu_leak_callback);
-		WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
 		return;
 	}
 	head->func = func;
@@ -3194,8 +3173,24 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	local_irq_restore(flags);
 }
 
-/*
- * Queue an RCU-sched callback for invocation after a grace period.
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ *  - anything that disables preemption.
+ *
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3203,8 +3198,26 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * Queue an RCU callback for invocation after a quicker grace period.
+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by :
+ *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
+ *  OR
+ *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
@@ -3280,12 +3293,6 @@ static inline int rcu_blocking_is_gp(void)
  * to have executed a full memory barrier during the execution of
  * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
  * again only if the system has more than one CPU).
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
  */
 void synchronize_sched(void)
 {
@@ -3578,8 +3585,14 @@ static void rcu_barrier_func(void *type)
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
 
 	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
-	atomic_inc(&rsp->barrier_cpu_count);
-	rsp->call(&rdp->barrier_head, rcu_barrier_callback);
+	rdp->barrier_head.func = rcu_barrier_callback;
+	debug_rcu_head_queue(&rdp->barrier_head);
+	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
+		atomic_inc(&rsp->barrier_cpu_count);
+	} else {
+		debug_rcu_head_unqueue(&rdp->barrier_head);
+		_rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
+	}
 }
 
 /*
@@ -3698,6 +3711,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
 	long mask;
 	struct rcu_node *rnp = rnp_leaf;
 
+	lockdep_assert_held(&rnp->lock);
 	for (;;) {
 		mask = rnp->grpmask;
 		rnp = rnp->parent;
@@ -3753,7 +3767,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	    !init_nocb_callback_list(rdp))
 		rcu_segcblist_init(&rdp->cblist);  /* Re-enable callbacks. */
 	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	rcu_dynticks_eqs_online();
 	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ba38262..9af0f31 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h

@@ -45,14 +45,6 @@ struct rcu_dynticks {
 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	long long dynticks_idle_nesting;
-				    /* irq/process nesting level from idle. */
-	atomic_t dynticks_idle;	    /* Even value for idle, else odd. */
-				    /*  "Idle" excludes userspace execution. */
-	unsigned long dynticks_idle_jiffies;
-				    /* End of last non-NMI non-idle period. */
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	bool all_lazy;		    /* Are all CPU's CBs lazy? */
 	unsigned long nonlazy_posted;
@@ -160,19 +152,6 @@ struct rcu_node {
 				/* Number of tasks boosted for expedited GP. */
 	unsigned long n_normal_boosts;
 				/* Number of tasks boosted for normal GP. */
-	unsigned long n_balk_blkd_tasks;
-				/* Refused to boost: no blocked tasks. */
-	unsigned long n_balk_exp_gp_tasks;
-				/* Refused to boost: nothing blocking GP. */
-	unsigned long n_balk_boost_tasks;
-				/* Refused to boost: already boosting. */
-	unsigned long n_balk_notblocked;
-				/* Refused to boost: RCU RS CS still running. */
-	unsigned long n_balk_notyet;
-				/* Refused to boost: not yet time. */
-	unsigned long n_balk_nos;
-				/* Refused to boost: not sure why, though. */
-				/*  This can happen due to race conditions. */
 #ifdef CONFIG_RCU_NOCB_CPU
 	struct swait_queue_head nocb_gp_wq[2];
 				/* Place for rcu_nocb_kthread() to wait GP. */
@@ -312,9 +291,9 @@ struct rcu_data {
 };
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
-#define RCU_NOGP_WAKE_NOT	0
-#define RCU_NOGP_WAKE		1
-#define RCU_NOGP_WAKE_FORCE	2
+#define RCU_NOCB_WAKE_NOT	0
+#define RCU_NOCB_WAKE		1
+#define RCU_NOCB_WAKE_FORCE	2
 
 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
 					/* For jiffies_till_first_fqs and */
@@ -477,7 +456,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
-static void rcu_preempt_note_context_switch(void);
+static void rcu_preempt_note_context_switch(bool preempt);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -529,15 +508,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
-static void rcu_sysidle_enter(int irq);
-static void rcu_sysidle_exit(int irq);
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj);
-static bool is_sysidle_rcu_state(struct rcu_state *rsp);
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj);
 static void rcu_bind_gp_kthread(void);
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
@@ -551,75 +522,3 @@ void srcu_offline_cpu(unsigned int cpu) { }
 #endif /* #else #ifdef CONFIG_SRCU */
 
 #endif /* #ifndef RCU_TREE_NONCORE */
-
-#ifdef CONFIG_RCU_TRACE
-/* Read out queue lengths for tracing. */
-static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
-{
-#ifdef CONFIG_RCU_NOCB_CPU
-	*ql = atomic_long_read(&rdp->nocb_q_count);
-	*qll = atomic_long_read(&rdp->nocb_q_count_lazy);
-#else /* #ifdef CONFIG_RCU_NOCB_CPU */
-	*ql = 0;
-	*qll = 0;
-#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
-}
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
-/*
- * Wrappers for the rcu_node::lock acquire and release.
- *
- * Because the rcu_nodes form a tree, the tree traversal locking will observe
- * different lock values, this in turn means that an UNLOCK of one level
- * followed by a LOCK of another level does not imply a full memory barrier;
- * and most importantly transitivity is lost.
- *
- * In order to restore full ordering between tree levels, augment the regular
- * lock acquire functions with smp_mb__after_unlock_lock().
- *
- * As ->lock of struct rcu_node is a __private field, therefore one should use
- * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
- */
-static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
-	smp_mb__after_unlock_lock();
-}
-
-static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
-}
-
-static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
-	smp_mb__after_unlock_lock();
-}
-
-static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
-{
-	raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
-}
-
-#define raw_spin_lock_irqsave_rcu_node(rnp, flags)			\
-do {									\
-	typecheck(unsigned long, flags);				\
-	raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags);	\
-	smp_mb__after_unlock_lock();					\
-} while (0)
-
-#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags)			\
-do {									\
-	typecheck(unsigned long, flags);				\
-	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags);	\
-} while (0)
-
-static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
-{
-	bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
-
-	if (locked)
-		smp_mb__after_unlock_lock();
-	return locked;
-}

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index e513b4a..dd21ca4 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h

@@ -147,7 +147,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  *
  * Caller must hold the rcu_state's exp_mutex.
  */
-static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
+static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
 	return rnp->exp_tasks == NULL &&
 	       READ_ONCE(rnp->expmask) == 0;

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c9a4865..908b309 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h

@@ -70,7 +70,7 @@ static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
 static void __init rcu_bootup_announce_oddness(void)
 {
 	if (IS_ENABLED(CONFIG_RCU_TRACE))
-		pr_info("\tRCU debugfs-based tracing is enabled.\n");
+		pr_info("\tRCU event tracing is enabled.\n");
 	if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
 	    (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
 		pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
@@ -90,8 +90,32 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-	if (IS_ENABLED(CONFIG_RCU_BOOST))
-		pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
+#ifdef CONFIG_RCU_BOOST
+	pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY);
+#endif
+	if (blimit != DEFAULT_RCU_BLIMIT)
+		pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
+	if (qhimark != DEFAULT_RCU_QHIMARK)
+		pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
+	if (qlowmark != DEFAULT_RCU_QLOMARK)
+		pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
+	if (jiffies_till_first_fqs != ULONG_MAX)
+		pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
+	if (jiffies_till_next_fqs != ULONG_MAX)
+		pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
+	if (rcu_kick_kthreads)
+		pr_info("\tKick kthreads if too-long grace period.\n");
+	if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
+		pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
+	if (gp_preinit_delay)
+		pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
+	if (gp_init_delay)
+		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
+	if (gp_cleanup_delay)
+		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
+		pr_info("\tRCU debug extended QS entry/exit.\n");
+	rcupdate_announce_bootup_oddness();
 }
 
 #ifdef CONFIG_PREEMPT_RCU
@@ -155,6 +179,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 			 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
 	struct task_struct *t = current;
 
+	lockdep_assert_held(&rnp->lock);
+
 	/*
 	 * Decide where to queue the newly blocked task.  In theory,
 	 * this could be an if-statement.  In practice, when I tried
@@ -263,6 +289,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
  */
 static void rcu_preempt_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
 				       __this_cpu_read(rcu_data_p->gpnum),
@@ -286,12 +313,14 @@ static void rcu_preempt_qs(void)
  *
  * Caller must disable interrupts.
  */
-static void rcu_preempt_note_context_switch(void)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
 	struct task_struct *t = current;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n");
+	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
 	if (t->rcu_read_lock_nesting > 0 &&
 	    !t->rcu_read_unlock_special.b.blocked) {
 
@@ -607,6 +636,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
 	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
 	if (rcu_preempt_has_tasks(rnp))
 		rnp->gp_tasks = rnp->blkd_tasks.next;
@@ -643,8 +673,37 @@ static void rcu_preempt_do_callbacks(void)
 
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
-/*
- * Queue a preemptible-RCU callback for invocation after a grace period.
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
@@ -663,8 +722,13 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
  *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
+ * See the description of synchronize_sched() for more detailed
+ * information on memory-ordering guarantees.  However, please note
+ * that -only- the memory-ordering guarantees apply.  For example,
+ * synchronize_rcu() is -not- guaranteed to wait on things like code
+ * protected by preempt_disable(), instead, synchronize_rcu() is -only-
+ * guaranteed to wait on RCU read-side critical sections, that is, sections
+ * of code protected by rcu_read_lock().
  */
 void synchronize_rcu(void)
 {
@@ -738,7 +802,7 @@ static void __init rcu_bootup_announce(void)
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_note_context_switch(void)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
 }
 
@@ -835,33 +899,6 @@ void exit_rcu(void)
 
 #include "../locking/rtmutex_common.h"
 
-#ifdef CONFIG_RCU_TRACE
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-	if (!rcu_preempt_has_tasks(rnp))
-		rnp->n_balk_blkd_tasks++;
-	else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
-		rnp->n_balk_exp_gp_tasks++;
-	else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
-		rnp->n_balk_boost_tasks++;
-	else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
-		rnp->n_balk_notblocked++;
-	else if (rnp->gp_tasks != NULL &&
-		 ULONG_CMP_LT(jiffies, rnp->boost_time))
-		rnp->n_balk_notyet++;
-	else
-		rnp->n_balk_nos++;
-}
-
-#else /* #ifdef CONFIG_RCU_TRACE */
-
-static void rcu_initiate_boost_trace(struct rcu_node *rnp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_TRACE */
-
 static void rcu_wake_cond(struct task_struct *t, int status)
 {
 	/*
@@ -992,8 +1029,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 {
 	struct task_struct *t;
 
+	lockdep_assert_held(&rnp->lock);
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
-		rnp->n_balk_exp_gp_tasks++;
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -1009,7 +1046,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 		if (t)
 			rcu_wake_cond(t, rnp->boost_kthread_status);
 	} else {
-		rcu_initiate_boost_trace(rnp);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
@@ -1260,8 +1296,7 @@ static void rcu_prepare_kthreads(int cpu)
 int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 {
 	*nextevt = KTIME_MAX;
-	return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
-	       ? 0 : rcu_cpu_has_callbacks(NULL);
+	return rcu_cpu_has_callbacks(NULL);
 }
 
 /*
@@ -1372,10 +1407,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 	unsigned long dj;
 
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
-		*nextevt = KTIME_MAX;
-		return 0;
-	}
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_needs_cpu() invoked with irqs enabled!!!");
 
 	/* Snapshot to detect later posting of non-lazy callback. */
 	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
@@ -1424,8 +1456,8 @@ static void rcu_prepare_for_idle(void)
 	struct rcu_state *rsp;
 	int tne;
 
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-	    rcu_is_nocb_cpu(smp_processor_id()))
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_prepare_for_idle() invoked with irqs enabled!!!");
+	if (rcu_is_nocb_cpu(smp_processor_id()))
 		return;
 
 	/* Handle nohz enablement switches conservatively. */
@@ -1479,8 +1511,8 @@ static void rcu_prepare_for_idle(void)
  */
 static void rcu_cleanup_after_idle(void)
 {
-	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
-	    rcu_is_nocb_cpu(smp_processor_id()))
+	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_cleanup_after_idle() invoked with irqs enabled!!!");
+	if (rcu_is_nocb_cpu(smp_processor_id()))
 		return;
 	if (rcu_try_advance_all_cbs())
 		invoke_rcu_core();
@@ -1747,7 +1779,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 	init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 /* Is the specified CPU a no-CBs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
@@ -1755,7 +1786,6 @@ bool rcu_is_nocb_cpu(int cpu)
 		return cpumask_test_cpu(cpu, rcu_nocb_mask);
 	return false;
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Kick the leader kthread for this NOCB group.
@@ -1769,6 +1799,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
 	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
+		smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
 		swake_up(&rdp_leader->nocb_wq);
 	}
 }
@@ -1860,7 +1891,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmpty"));
 		} else {
-			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE);
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE);
 			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
 			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -1874,7 +1905,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
-			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE);
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE);
 			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
 			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -2023,6 +2054,7 @@ static void nocb_leader_wait(struct rcu_data *my_rdp)
 	 * nocb_gp_head, where they await a grace period.
 	 */
 	gotcbs = false;
+	smp_mb(); /* wakeup before ->nocb_head reads. */
 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
 		rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
 		if (!rdp->nocb_gp_head)
@@ -2201,8 +2233,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 	if (!rcu_nocb_need_deferred_wakeup(rdp))
 		return;
 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
-	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
-	wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
+	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+	wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE);
 	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
 }
 
@@ -2212,10 +2244,6 @@ void __init rcu_init_nohz(void)
 	bool need_rcu_nocb_mask = true;
 	struct rcu_state *rsp;
 
-#ifdef CONFIG_RCU_NOCB_CPU_NONE
-	need_rcu_nocb_mask = false;
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-
 #if defined(CONFIG_NO_HZ_FULL)
 	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
 		need_rcu_nocb_mask = true;
@@ -2231,14 +2259,6 @@ void __init rcu_init_nohz(void)
 	if (!have_rcu_nocb_mask)
 		return;
 
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-	pr_info("\tOffload RCU callbacks from CPU 0\n");
-	cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-	pr_info("\tOffload RCU callbacks from all CPUs\n");
-	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
 #if defined(CONFIG_NO_HZ_FULL)
 	if (tick_nohz_full_running)
 		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
@@ -2491,421 +2511,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 }
 
-
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-
-static int full_sysidle_state;		/* Current system-idle state. */
-#define RCU_SYSIDLE_NOT		0	/* Some CPU is not idle. */
-#define RCU_SYSIDLE_SHORT	1	/* All CPUs idle for brief period. */
-#define RCU_SYSIDLE_LONG	2	/* All CPUs idle for long enough. */
-#define RCU_SYSIDLE_FULL	3	/* All CPUs idle, ready for sysidle. */
-#define RCU_SYSIDLE_FULL_NOTED	4	/* Actually entered sysidle state. */
-
-/*
- * Invoked to note exit from irq or task transition to idle.  Note that
- * usermode execution does -not- count as idle here!  After all, we want
- * to detect full-system idle states, not RCU quiescent states and grace
- * periods.  The caller must have disabled interrupts.
- */
-static void rcu_sysidle_enter(int irq)
-{
-	unsigned long j;
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for fully idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting--;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-		if (rdtp->dynticks_idle_nesting != 0)
-			return;  /* Still not fully idle. */
-	} else {
-		if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
-		    DYNTICK_TASK_NEST_VALUE) {
-			rdtp->dynticks_idle_nesting = 0;
-		} else {
-			rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
-			return;  /* Still not fully idle. */
-		}
-	}
-
-	/* Record start of fully idle period. */
-	j = jiffies;
-	WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
-}
-
-/*
- * Unconditionally force exit from full system-idle state.  This is
- * invoked when a normal CPU exits idle, but must be called separately
- * for the timekeeping CPU (tick_do_timer_cpu).  The reason for this
- * is that the timekeeping CPU is permitted to take scheduling-clock
- * interrupts while the system is in system-idle state, and of course
- * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
- * interrupt from any other type of interrupt.
- */
-void rcu_sysidle_force_exit(void)
-{
-	int oldstate = READ_ONCE(full_sysidle_state);
-	int newoldstate;
-
-	/*
-	 * Each pass through the following loop attempts to exit full
-	 * system-idle state.  If contention proves to be a problem,
-	 * a trylock-based contention tree could be used here.
-	 */
-	while (oldstate > RCU_SYSIDLE_SHORT) {
-		newoldstate = cmpxchg(&full_sysidle_state,
-				      oldstate, RCU_SYSIDLE_NOT);
-		if (oldstate == newoldstate &&
-		    oldstate == RCU_SYSIDLE_FULL_NOTED) {
-			rcu_kick_nohz_cpu(tick_do_timer_cpu);
-			return; /* We cleared it, done! */
-		}
-		oldstate = newoldstate;
-	}
-	smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
-}
-
-/*
- * Invoked to note entry to irq or task transition from idle.  Note that
- * usermode execution does -not- count as idle here!  The caller must
- * have disabled interrupts.
- */
-static void rcu_sysidle_exit(int irq)
-{
-	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
-
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/* Adjust nesting, check for already non-idle. */
-	if (irq) {
-		rdtp->dynticks_idle_nesting++;
-		WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-		if (rdtp->dynticks_idle_nesting != 1)
-			return; /* Already non-idle. */
-	} else {
-		/*
-		 * Allow for irq misnesting.  Yes, it really is possible
-		 * to enter an irq handler then never leave it, and maybe
-		 * also vice versa.  Handle both possibilities.
-		 */
-		if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
-			rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
-			WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
-			return; /* Already non-idle. */
-		} else {
-			rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
-		}
-	}
-
-	/* Record end of idle period. */
-	smp_mb__before_atomic();
-	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic();
-	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
-
-	/*
-	 * If we are the timekeeping CPU, we are permitted to be non-idle
-	 * during a system-idle state.  This must be the case, because
-	 * the timekeeping CPU has to take scheduling-clock interrupts
-	 * during the time that the system is transitioning to full
-	 * system-idle state.  This means that the timekeeping CPU must
-	 * invoke rcu_sysidle_force_exit() directly if it does anything
-	 * more than take a scheduling-clock interrupt.
-	 */
-	if (smp_processor_id() == tick_do_timer_cpu)
-		return;
-
-	/* Update system-idle state: We are clearly no longer fully idle! */
-	rcu_sysidle_force_exit();
-}
-
-/*
- * Check to see if the current CPU is idle.  Note that usermode execution
- * does not count as idle.  The caller must have disabled interrupts,
- * and must be running on tick_do_timer_cpu.
- */
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-	int cur;
-	unsigned long j;
-	struct rcu_dynticks *rdtp = rdp->dynticks;
-
-	/* If there are no nohz_full= CPUs, don't check system-wide idleness. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	/*
-	 * If some other CPU has already reported non-idle, if this is
-	 * not the flavor of RCU that tracks sysidle state, or if this
-	 * is an offline or the timekeeping CPU, nothing to do.
-	 */
-	if (!*isidle || rdp->rsp != rcu_state_p ||
-	    cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
-		return;
-	/* Verify affinity of current kthread. */
-	WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
-
-	/* Pick up current idle and NMI-nesting counter and check. */
-	cur = atomic_read(&rdtp->dynticks_idle);
-	if (cur & 0x1) {
-		*isidle = false; /* We are not idle! */
-		return;
-	}
-	smp_mb(); /* Read counters before timestamps. */
-
-	/* Pick up timestamps. */
-	j = READ_ONCE(rdtp->dynticks_idle_jiffies);
-	/* If this CPU entered idle more recently, update maxj timestamp. */
-	if (ULONG_CMP_LT(*maxj, j))
-		*maxj = j;
-}
-
-/*
- * Is this the flavor of RCU that is handling full-system idle?
- */
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return rsp == rcu_state_p;
-}
-
-/*
- * Return a delay in jiffies based on the number of CPUs, rcu_node
- * leaf fanout, and jiffies tick rate.  The idea is to allow larger
- * systems more time to transition to full-idle state in order to
- * avoid the cache thrashing that otherwise occur on the state variable.
- * Really small systems (less than a couple of tens of CPUs) should
- * instead use a single global atomically incremented counter, and later
- * versions of this will automatically reconfigure themselves accordingly.
- */
-static unsigned long rcu_sysidle_delay(void)
-{
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return 0;
-	return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
-}
-
-/*
- * Advance the full-system-idle state.  This is invoked when all of
- * the non-timekeeping CPUs are idle.
- */
-static void rcu_sysidle(unsigned long j)
-{
-	/* Check the current state. */
-	switch (READ_ONCE(full_sysidle_state)) {
-	case RCU_SYSIDLE_NOT:
-
-		/* First time all are idle, so note a short idle period. */
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
-		break;
-
-	case RCU_SYSIDLE_SHORT:
-
-		/*
-		 * Idle for a bit, time to advance to next state?
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
-		break;
-
-	case RCU_SYSIDLE_LONG:
-
-		/*
-		 * Do an additional check pass before advancing to full.
-		 * cmpxchg failure means race with non-idle, let them win.
-		 */
-		if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
-			(void)cmpxchg(&full_sysidle_state,
-				      RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
-		break;
-
-	default:
-		break;
-	}
-}
-
-/*
- * Found a non-idle non-timekeeping CPU, so kick the system-idle state
- * back to the beginning.
- */
-static void rcu_sysidle_cancel(void)
-{
-	smp_mb();
-	if (full_sysidle_state > RCU_SYSIDLE_SHORT)
-		WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
-}
-
-/*
- * Update the sysidle state based on the results of a force-quiescent-state
- * scan of the CPUs' dyntick-idle state.
- */
-static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
-			       unsigned long maxj, bool gpkt)
-{
-	if (rsp != rcu_state_p)
-		return;  /* Wrong flavor, ignore. */
-	if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
-		return;  /* Running state machine from timekeeping CPU. */
-	if (isidle)
-		rcu_sysidle(maxj);    /* More idle! */
-	else
-		rcu_sysidle_cancel(); /* Idle is over. */
-}
-
-/*
- * Wrapper for rcu_sysidle_report() when called from the grace-period
- * kthread's context.
- */
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-	/* If there are no nohz_full= CPUs, no need to track this. */
-	if (!tick_nohz_full_enabled())
-		return;
-
-	rcu_sysidle_report(rsp, isidle, maxj, true);
-}
-
-/* Callback and function for forcing an RCU grace period. */
-struct rcu_sysidle_head {
-	struct rcu_head rh;
-	int inuse;
-};
-
-static void rcu_sysidle_cb(struct rcu_head *rhp)
-{
-	struct rcu_sysidle_head *rshp;
-
-	/*
-	 * The following memory barrier is needed to replace the
-	 * memory barriers that would normally be in the memory
-	 * allocator.
-	 */
-	smp_mb();  /* grace period precedes setting inuse. */
-
-	rshp = container_of(rhp, struct rcu_sysidle_head, rh);
-	WRITE_ONCE(rshp->inuse, 0);
-}
-
-/*
- * Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.  This is not intended to be
- * called unless tick_nohz_full_enabled().
- */
-bool rcu_sys_is_idle(void)
-{
-	static struct rcu_sysidle_head rsh;
-	int rss = READ_ONCE(full_sysidle_state);
-
-	if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
-		return false;
-
-	/* Handle small-system case by doing a full scan of CPUs. */
-	if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
-		int oldrss = rss - 1;
-
-		/*
-		 * One pass to advance to each state up to _FULL.
-		 * Give up if any pass fails to advance the state.
-		 */
-		while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
-			int cpu;
-			bool isidle = true;
-			unsigned long maxj = jiffies - ULONG_MAX / 4;
-			struct rcu_data *rdp;
-
-			/* Scan all the CPUs looking for nonidle CPUs. */
-			for_each_possible_cpu(cpu) {
-				rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
-				rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
-				if (!isidle)
-					break;
-			}
-			rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
-			oldrss = rss;
-			rss = READ_ONCE(full_sysidle_state);
-		}
-	}
-
-	/* If this is the first observation of an idle period, record it. */
-	if (rss == RCU_SYSIDLE_FULL) {
-		rss = cmpxchg(&full_sysidle_state,
-			      RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
-		return rss == RCU_SYSIDLE_FULL;
-	}
-
-	smp_mb(); /* ensure rss load happens before later caller actions. */
-
-	/* If already fully idle, tell the caller (in case of races). */
-	if (rss == RCU_SYSIDLE_FULL_NOTED)
-		return true;
-
-	/*
-	 * If we aren't there yet, and a grace period is not in flight,
-	 * initiate a grace period.  Either way, tell the caller that
-	 * we are not there yet.  We use an xchg() rather than an assignment
-	 * to make up for the memory barriers that would otherwise be
-	 * provided by the memory allocator.
-	 */
-	if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
-	    !rcu_gp_in_progress(rcu_state_p) &&
-	    !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
-		call_rcu(&rsh.rh, rcu_sysidle_cb);
-	return false;
-}
-
-/*
- * Initialize dynticks sysidle state for CPUs coming online.
- */
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-	rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
-}
-
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
-static void rcu_sysidle_enter(int irq)
-{
-}
-
-static void rcu_sysidle_exit(int irq)
-{
-}
-
-static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
-				  unsigned long *maxj)
-{
-}
-
-static bool is_sysidle_rcu_state(struct rcu_state *rsp)
-{
-	return false;
-}
-
-static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
-				  unsigned long maxj)
-{
-}
-
-static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-
 /*
  * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
  * grace-period kthread will do force_quiescent_state() processing?
@@ -2936,13 +2541,7 @@ static void rcu_bind_gp_kthread(void)
 
 	if (!tick_nohz_full_enabled())
 		return;
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	cpu = tick_do_timer_cpu;
-	if (cpu >= 0 && cpu < nr_cpu_ids)
-		set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 	housekeeping_affine(current);
-#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
 
 /* Record the current task on dyntick-idle entry. */

diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
deleted file mode 100644
index 6cea17a..0000000
--- a/kernel/rcu/tree_trace.c
+++ /dev/null

@@ -1,494 +0,0 @@
-/*
- * Read-Copy Update tracing for hierarchical implementation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * Copyright IBM Corporation, 2008
- * Author: Paul E. McKenney
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- *		Documentation/RCU
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/prefetch.h>
-
-#define RCU_TREE_NONCORE
-#include "tree.h"
-#include "rcu.h"
-
-static int r_open(struct inode *inode, struct file *file,
-					const struct seq_operations *op)
-{
-	int ret = seq_open(file, op);
-	if (!ret) {
-		struct seq_file *m = (struct seq_file *)file->private_data;
-		m->private = inode->i_private;
-	}
-	return ret;
-}
-
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	*pos = cpumask_next(*pos - 1, cpu_possible_mask);
-	if ((*pos) < nr_cpu_ids)
-		return per_cpu_ptr(rsp->rda, *pos);
-	return NULL;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return r_start(m, pos);
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-}
-
-static int show_rcubarrier(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	seq_printf(m, "bcc: %d bseq: %lu\n",
-		   atomic_read(&rsp->barrier_cpu_count),
-		   rsp->barrier_sequence);
-	return 0;
-}
-
-static int rcubarrier_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcubarrier, inode->i_private);
-}
-
-static const struct file_operations rcubarrier_fops = {
-	.owner = THIS_MODULE,
-	.open = rcubarrier_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static char convert_kthread_status(unsigned int kthread_status)
-{
-	if (kthread_status > RCU_KTHREAD_MAX)
-		return '?';
-	return "SRWOY"[kthread_status];
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
-{
-	long ql, qll;
-
-	if (!rdp->beenonline)
-		return;
-	seq_printf(m, "%3d%cc=%ld g=%ld cnq=%d/%d:%d",
-		   rdp->cpu,
-		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
-		   ulong2long(rdp->completed), ulong2long(rdp->gpnum),
-		   rdp->cpu_no_qs.b.norm,
-		   rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu),
-		   rdp->core_needs_qs);
-	seq_printf(m, " dt=%d/%llx/%d df=%lu",
-		   rcu_dynticks_snap(rdp->dynticks),
-		   rdp->dynticks->dynticks_nesting,
-		   rdp->dynticks->dynticks_nmi_nesting,
-		   rdp->dynticks_fqs);
-	seq_printf(m, " of=%lu", rdp->offline_fqs);
-	rcu_nocb_q_lengths(rdp, &ql, &qll);
-	qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist);
-	ql += rcu_segcblist_n_cbs(&rdp->cblist);
-	seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
-		   qll, ql,
-		   ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)],
-		   ".R"[!rcu_segcblist_segempty(&rdp->cblist,
-						RCU_NEXT_READY_TAIL)],
-		   ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)],
-		   ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]);
-#ifdef CONFIG_RCU_BOOST
-	seq_printf(m, " kt=%d/%c ktl=%x",
-		   per_cpu(rcu_cpu_has_work, rdp->cpu),
-		   convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
-					  rdp->cpu)),
-		   per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
-#endif /* #ifdef CONFIG_RCU_BOOST */
-	seq_printf(m, " b=%ld", rdp->blimit);
-	seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
-		   rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
-		   rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
-}
-
-static int show_rcudata(struct seq_file *m, void *v)
-{
-	print_one_rcu_data(m, (struct rcu_data *)v);
-	return 0;
-}
-
-static const struct seq_operations rcudate_op = {
-	.start = r_start,
-	.next  = r_next,
-	.stop  = r_stop,
-	.show  = show_rcudata,
-};
-
-static int rcudata_open(struct inode *inode, struct file *file)
-{
-	return r_open(inode, file, &rcudate_op);
-}
-
-static const struct file_operations rcudata_fops = {
-	.owner = THIS_MODULE,
-	.open = rcudata_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = seq_release,
-};
-
-static int show_rcuexp(struct seq_file *m, void *v)
-{
-	int cpu;
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	struct rcu_data *rdp;
-	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
-
-	for_each_possible_cpu(cpu) {
-		rdp = per_cpu_ptr(rsp->rda, cpu);
-		s0 += atomic_long_read(&rdp->exp_workdone0);
-		s1 += atomic_long_read(&rdp->exp_workdone1);
-		s2 += atomic_long_read(&rdp->exp_workdone2);
-		s3 += atomic_long_read(&rdp->exp_workdone3);
-	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s0, s1, s2, s3,
-		   atomic_read(&rsp->expedited_need_qs),
-		   rsp->expedited_sequence / 2);
-	return 0;
-}
-
-static int rcuexp_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcuexp, inode->i_private);
-}
-
-static const struct file_operations rcuexp_fops = {
-	.owner = THIS_MODULE,
-	.open = rcuexp_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#ifdef CONFIG_RCU_BOOST
-
-static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
-{
-	seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
-		   rnp->grplo, rnp->grphi,
-		   "T."[list_empty(&rnp->blkd_tasks)],
-		   "N."[!rnp->gp_tasks],
-		   "E."[!rnp->exp_tasks],
-		   "B."[!rnp->boost_tasks],
-		   convert_kthread_status(rnp->boost_kthread_status),
-		   rnp->n_tasks_boosted, rnp->n_exp_boosts,
-		   rnp->n_normal_boosts);
-	seq_printf(m, "j=%04x bt=%04x\n",
-		   (int)(jiffies & 0xffff),
-		   (int)(rnp->boost_time & 0xffff));
-	seq_printf(m, "    balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
-		   rnp->n_balk_blkd_tasks,
-		   rnp->n_balk_exp_gp_tasks,
-		   rnp->n_balk_boost_tasks,
-		   rnp->n_balk_notblocked,
-		   rnp->n_balk_notyet,
-		   rnp->n_balk_nos);
-}
-
-static int show_rcu_node_boost(struct seq_file *m, void *unused)
-{
-	struct rcu_node *rnp;
-
-	rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
-		print_one_rcu_node_boost(m, rnp);
-	return 0;
-}
-
-static int rcu_node_boost_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcu_node_boost, NULL);
-}
-
-static const struct file_operations rcu_node_boost_fops = {
-	.owner = THIS_MODULE,
-	.open = rcu_node_boost_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
-{
-	unsigned long gpnum;
-	int level = 0;
-	struct rcu_node *rnp;
-
-	gpnum = rsp->gpnum;
-	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
-		   ulong2long(rsp->completed), ulong2long(gpnum),
-		   rsp->gp_state,
-		   (long)(rsp->jiffies_force_qs - jiffies),
-		   (int)(jiffies & 0xffff));
-	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
-		   rsp->n_force_qs, rsp->n_force_qs_ngp,
-		   rsp->n_force_qs - rsp->n_force_qs_ngp,
-		   READ_ONCE(rsp->n_force_qs_lh),
-		   rsp->orphan_done.len_lazy,
-		   rsp->orphan_done.len);
-	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
-		if (rnp->level != level) {
-			seq_puts(m, "\n");
-			level = rnp->level;
-		}
-		seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d    ",
-			   rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext,
-			   ".G"[rnp->gp_tasks != NULL],
-			   ".E"[rnp->exp_tasks != NULL],
-			   ".T"[!list_empty(&rnp->blkd_tasks)],
-			   rnp->grplo, rnp->grphi, rnp->grpnum);
-	}
-	seq_puts(m, "\n");
-}
-
-static int show_rcuhier(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	print_one_rcu_state(m, rsp);
-	return 0;
-}
-
-static int rcuhier_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcuhier, inode->i_private);
-}
-
-static const struct file_operations rcuhier_fops = {
-	.owner = THIS_MODULE,
-	.open = rcuhier_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
-{
-	unsigned long flags;
-	unsigned long completed;
-	unsigned long gpnum;
-	unsigned long gpage;
-	unsigned long gpmax;
-	struct rcu_node *rnp = &rsp->node[0];
-
-	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	completed = READ_ONCE(rsp->completed);
-	gpnum = READ_ONCE(rsp->gpnum);
-	if (completed == gpnum)
-		gpage = 0;
-	else
-		gpage = jiffies - rsp->gp_start;
-	gpmax = rsp->gp_max;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	seq_printf(m, "completed=%ld  gpnum=%ld  age=%ld  max=%ld\n",
-		   ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
-}
-
-static int show_rcugp(struct seq_file *m, void *v)
-{
-	struct rcu_state *rsp = (struct rcu_state *)m->private;
-	show_one_rcugp(m, rsp);
-	return 0;
-}
-
-static int rcugp_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcugp, inode->i_private);
-}
-
-static const struct file_operations rcugp_fops = {
-	.owner = THIS_MODULE,
-	.open = rcugp_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = single_release,
-};
-
-static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
-{
-	if (!rdp->beenonline)
-		return;
-	seq_printf(m, "%3d%cnp=%ld ",
-		   rdp->cpu,
-		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
-		   rdp->n_rcu_pending);
-	seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
-		   rdp->n_rp_core_needs_qs,
-		   rdp->n_rp_report_qs,
-		   rdp->n_rp_cb_ready,
-		   rdp->n_rp_cpu_needs_gp);
-	seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
-		   rdp->n_rp_gp_completed,
-		   rdp->n_rp_gp_started,
-		   rdp->n_rp_nocb_defer_wakeup,
-		   rdp->n_rp_need_nothing);
-}
-
-static int show_rcu_pending(struct seq_file *m, void *v)
-{
-	print_one_rcu_pending(m, (struct rcu_data *)v);
-	return 0;
-}
-
-static const struct seq_operations rcu_pending_op = {
-	.start = r_start,
-	.next  = r_next,
-	.stop  = r_stop,
-	.show  = show_rcu_pending,
-};
-
-static int rcu_pending_open(struct inode *inode, struct file *file)
-{
-	return r_open(inode, file, &rcu_pending_op);
-}
-
-static const struct file_operations rcu_pending_fops = {
-	.owner = THIS_MODULE,
-	.open = rcu_pending_open,
-	.read = seq_read,
-	.llseek = no_llseek,
-	.release = seq_release,
-};
-
-static int show_rcutorture(struct seq_file *m, void *unused)
-{
-	seq_printf(m, "rcutorture test sequence: %lu %s\n",
-		   rcutorture_testseq >> 1,
-		   (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
-	seq_printf(m, "rcutorture update version number: %lu\n",
-		   rcutorture_vernum);
-	return 0;
-}
-
-static int rcutorture_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_rcutorture, NULL);
-}
-
-static const struct file_operations rcutorture_fops = {
-	.owner = THIS_MODULE,
-	.open = rcutorture_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static struct dentry *rcudir;
-
-static int __init rcutree_trace_init(void)
-{
-	struct rcu_state *rsp;
-	struct dentry *retval;
-	struct dentry *rspdir;
-
-	rcudir = debugfs_create_dir("rcu", NULL);
-	if (!rcudir)
-		goto free_out;
-
-	for_each_rcu_flavor(rsp) {
-		rspdir = debugfs_create_dir(rsp->name, rcudir);
-		if (!rspdir)
-			goto free_out;
-
-		retval = debugfs_create_file("rcudata", 0444,
-				rspdir, rsp, &rcudata_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcuexp", 0444,
-				rspdir, rsp, &rcuexp_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcu_pending", 0444,
-				rspdir, rsp, &rcu_pending_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcubarrier", 0444,
-				rspdir, rsp, &rcubarrier_fops);
-		if (!retval)
-			goto free_out;
-
-#ifdef CONFIG_RCU_BOOST
-		if (rsp == &rcu_preempt_state) {
-			retval = debugfs_create_file("rcuboost", 0444,
-				rspdir, NULL, &rcu_node_boost_fops);
-			if (!retval)
-				goto free_out;
-		}
-#endif
-
-		retval = debugfs_create_file("rcugp", 0444,
-				rspdir, rsp, &rcugp_fops);
-		if (!retval)
-			goto free_out;
-
-		retval = debugfs_create_file("rcuhier", 0444,
-				rspdir, rsp, &rcuhier_fops);
-		if (!retval)
-			goto free_out;
-	}
-
-	retval = debugfs_create_file("rcutorture", 0444, rcudir,
-						NULL, &rcutorture_fops);
-	if (!retval)
-		goto free_out;
-	return 0;
-free_out:
-	debugfs_remove_recursive(rcudir);
-	return 1;
-}
-device_initcall(rcutree_trace_init);

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 273e869..00e77c4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c

@@ -62,7 +62,9 @@
 #define MODULE_PARAM_PREFIX "rcupdate."
 
 #ifndef CONFIG_TINY_RCU
+extern int rcu_expedited; /* from sysctl */
 module_param(rcu_expedited, int, 0);
+extern int rcu_normal; /* from sysctl */
 module_param(rcu_normal, int, 0);
 static int rcu_normal_after_boot;
 module_param(rcu_normal_after_boot, int, 0);
@@ -379,6 +381,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		   struct rcu_synchronize *rs_array)
 {
 	int i;
+	int j;
 
 	/* Initialize and register callbacks for each flavor specified. */
 	for (i = 0; i < n; i++) {
@@ -390,7 +393,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		}
 		init_rcu_head_on_stack(&rs_array[i].head);
 		init_completion(&rs_array[i].completion);
-		(crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
+		for (j = 0; j < i; j++)
+			if (crcu_array[j] == crcu_array[i])
+				break;
+		if (j == i)
+			(crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
 	}
 
 	/* Wait for all callbacks to be invoked. */
@@ -399,7 +406,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
 		    (crcu_array[i] == call_rcu ||
 		     crcu_array[i] == call_rcu_bh))
 			continue;
-		wait_for_completion(&rs_array[i].completion);
+		for (j = 0; j < i; j++)
+			if (crcu_array[j] == crcu_array[i])
+				break;
+		if (j == i)
+			wait_for_completion(&rs_array[i].completion);
 		destroy_rcu_head_on_stack(&rs_array[i].head);
 	}
 }
@@ -560,15 +571,30 @@ static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 DEFINE_SRCU(tasks_rcu_exit_srcu);
 
 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
-static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
+#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
+static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
 
 static void rcu_spawn_tasks_kthread(void);
 static struct task_struct *rcu_tasks_kthread_ptr;
 
-/*
- * Post an RCU-tasks callback.  First call must be from process context
- * after the scheduler if fully operational.
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), entry into idle, or transition to usermode
+ * execution.  As such, there are no read-side primitives analogous to
+ * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
+ * to determine that all tasks have passed through a safe state, not so
+ * much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
  */
 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
 {
@@ -851,6 +877,23 @@ static void rcu_spawn_tasks_kthread(void)
 
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any non-default Tasks RCU settings.
+ */
+static void __init rcu_tasks_bootup_oddness(void)
+{
+#ifdef CONFIG_TASKS_RCU
+	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
+		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+	else
+		pr_info("\tTasks RCU enabled.\n");
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */
+
 #ifdef CONFIG_PROVE_RCU
 
 /*
@@ -935,3 +978,25 @@ late_initcall(rcu_verify_early_boot_tests);
 #else
 void rcu_early_boot_tests(void) {}
 #endif /* CONFIG_PROVE_RCU */
+
+#ifndef CONFIG_TINY_RCU
+
+/*
+ * Print any significant non-default boot-time settings.
+ */
+void __init rcupdate_announce_bootup_oddness(void)
+{
+	if (rcu_normal)
+		pr_info("\tNo expedited grace period (rcu_normal).\n");
+	else if (rcu_normal_after_boot)
+		pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n");
+	else if (rcu_expedited)
+		pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
+	if (rcu_cpu_stall_suppress)
+		pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
+	if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
+		pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
+	rcu_tasks_bootup_oddness();
+}
+
+#endif /* #ifndef CONFIG_TINY_RCU */

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 89ab675..53f0164 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -16,9 +16,9 @@
 endif
 
 obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
-obj-y += wait.o swait.o completion.o idle.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o
+obj-y += idle_task.o fair.o rt.o deadline.o
+obj-y += wait.o wait_bit.o swait.o completion.o idle.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 00a45c4..ca0f8fc 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c

@@ -64,6 +64,7 @@
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
 #include <linux/tick.h>
+#include <linux/init.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -124,14 +125,27 @@ int sched_clock_stable(void)
 	return static_branch_likely(&__sched_clock_stable);
 }
 
+static void __scd_stamp(struct sched_clock_data *scd)
+{
+	scd->tick_gtod = ktime_get_ns();
+	scd->tick_raw = sched_clock();
+}
+
 static void __set_sched_clock_stable(void)
 {
-	struct sched_clock_data *scd = this_scd();
+	struct sched_clock_data *scd;
 
 	/*
+	 * Since we're still unstable and the tick is already running, we have
+	 * to disable IRQs in order to get a consistent scd->tick* reading.
+	 */
+	local_irq_disable();
+	scd = this_scd();
+	/*
 	 * Attempt to make the (initial) unstable->stable transition continuous.
 	 */
 	__sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
+	local_irq_enable();
 
 	printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
 			scd->tick_gtod, __gtod_offset,
@@ -141,8 +155,38 @@ static void __set_sched_clock_stable(void)
 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
+/*
+ * If we ever get here, we're screwed, because we found out -- typically after
+ * the fact -- that TSC wasn't good. This means all our clocksources (including
+ * ktime) could have reported wrong values.
+ *
+ * What we do here is an attempt to fix up and continue sort of where we left
+ * off in a coherent manner.
+ *
+ * The only way to fully avoid random clock jumps is to boot with:
+ * "tsc=unstable".
+ */
 static void __sched_clock_work(struct work_struct *work)
 {
+	struct sched_clock_data *scd;
+	int cpu;
+
+	/* take a current timestamp and set 'now' */
+	preempt_disable();
+	scd = this_scd();
+	__scd_stamp(scd);
+	scd->clock = scd->tick_gtod + __gtod_offset;
+	preempt_enable();
+
+	/* clone to all CPUs */
+	for_each_possible_cpu(cpu)
+		per_cpu(sched_clock_data, cpu) = *scd;
+
+	printk(KERN_WARNING "TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.\n");
+	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
+			scd->tick_gtod, __gtod_offset,
+			scd->tick_raw,  __sched_clock_offset);
+
 	static_branch_disable(&__sched_clock_stable);
 }
 
@@ -150,27 +194,11 @@ static DECLARE_WORK(sched_clock_work, __sched_clock_work);
 
 static void __clear_sched_clock_stable(void)
 {
-	struct sched_clock_data *scd = this_scd();
-
-	/*
-	 * Attempt to make the stable->unstable transition continuous.
-	 *
-	 * Trouble is, this is typically called from the TSC watchdog
-	 * timer, which is late per definition. This means the tick
-	 * values can already be screwy.
-	 *
-	 * Still do what we can.
-	 */
-	__gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
-
-	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
-			scd->tick_gtod, __gtod_offset,
-			scd->tick_raw,  __sched_clock_offset);
+	if (!sched_clock_stable())
+		return;
 
 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-
-	if (sched_clock_stable())
-		schedule_work(&sched_clock_work);
+	schedule_work(&sched_clock_work);
 }
 
 void clear_sched_clock_stable(void)
@@ -183,7 +211,11 @@ void clear_sched_clock_stable(void)
 		__clear_sched_clock_stable();
 }
 
-void sched_clock_init_late(void)
+/*
+ * We run this as late_initcall() such that it runs after all built-in drivers,
+ * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
+ */
+static int __init sched_clock_init_late(void)
 {
 	sched_clock_running = 2;
 	/*
@@ -197,7 +229,10 @@ void sched_clock_init_late(void)
 
 	if (__sched_clock_stable_early)
 		__set_sched_clock_stable();
+
+	return 0;
 }
+late_initcall(sched_clock_init_late);
 
 /*
  * min, max except they take wrapping into account
@@ -347,21 +382,38 @@ void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd;
 
+	if (sched_clock_stable())
+		return;
+
+	if (unlikely(!sched_clock_running))
+		return;
+
 	WARN_ON_ONCE(!irqs_disabled());
 
-	/*
-	 * Update these values even if sched_clock_stable(), because it can
-	 * become unstable at any point in time at which point we need some
-	 * values to fall back on.
-	 *
-	 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
-	 */
 	scd = this_scd();
-	scd->tick_raw  = sched_clock();
-	scd->tick_gtod = ktime_get_ns();
+	__scd_stamp(scd);
+	sched_clock_local(scd);
+}
 
-	if (!sched_clock_stable() && likely(sched_clock_running))
-		sched_clock_local(scd);
+void sched_clock_tick_stable(void)
+{
+	u64 gtod, clock;
+
+	if (!sched_clock_stable())
+		return;
+
+	/*
+	 * Called under watchdog_lock.
+	 *
+	 * The watchdog just found this TSC to (still) be stable, so now is a
+	 * good moment to update our __gtod_offset. Because once we find the
+	 * TSC to be unstable, any computation will be computing crap.
+	 */
+	local_irq_disable();
+	gtod = ktime_get_ns();
+	clock = sched_clock();
+	__gtod_offset = (clock + __sched_clock_offset) - gtod;
+	local_irq_enable();
 }
 
 /*
@@ -374,15 +426,21 @@ void sched_clock_idle_sleep_event(void)
 EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 
 /*
- * We just idled delta nanoseconds (called with irqs disabled):
+ * We just idled; resync with ktime.
  */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
+void sched_clock_idle_wakeup_event(void)
 {
-	if (timekeeping_suspended)
+	unsigned long flags;
+
+	if (sched_clock_stable())
 		return;
 
+	if (unlikely(timekeeping_suspended))
+		return;
+
+	local_irq_save(flags);
 	sched_clock_tick();
-	touch_softlockup_watchdog_sched();
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 

diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 53f9558..13fc5ae 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c

@@ -66,7 +66,7 @@ do_wait_for_common(struct completion *x,
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		__add_wait_queue_tail_exclusive(&x->wait, &wait);
+		__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 803c3bc..17c667b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -10,6 +10,7 @@
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/hotplug.h>
+#include <linux/wait_bit.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
@@ -788,36 +789,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 	dequeue_task(rq, p, flags);
 }
 
-void sched_set_stop_task(int cpu, struct task_struct *stop)
-{
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
-	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-
-	if (stop) {
-		/*
-		 * Make it appear like a SCHED_FIFO task, its something
-		 * userspace knows about and won't get confused about.
-		 *
-		 * Also, it will make PI more or less work without too
-		 * much confusion -- but then, stop work should not
-		 * rely on PI working anyway.
-		 */
-		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
-
-		stop->sched_class = &stop_sched_class;
-	}
-
-	cpu_rq(cpu)->stop = stop;
-
-	if (old_stop) {
-		/*
-		 * Reset it back to a normal scheduling class so that
-		 * it can die in pieces.
-		 */
-		old_stop->sched_class = &rt_sched_class;
-	}
-}
-
 /*
  * __normal_prio - return the priority that is based on the static prio
  */
@@ -1588,6 +1559,36 @@ static void update_avg(u64 *avg, u64 sample)
 	*avg += diff >> 3;
 }
 
+void sched_set_stop_task(int cpu, struct task_struct *stop)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
+
+	if (stop) {
+		/*
+		 * Make it appear like a SCHED_FIFO task, its something
+		 * userspace knows about and won't get confused about.
+		 *
+		 * Also, it will make PI more or less work without too
+		 * much confusion -- but then, stop work should not
+		 * rely on PI working anyway.
+		 */
+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
+
+		stop->sched_class = &stop_sched_class;
+	}
+
+	cpu_rq(cpu)->stop = stop;
+
+	if (old_stop) {
+		/*
+		 * Reset it back to a normal scheduling class so that
+		 * it can die in pieces.
+		 */
+		old_stop->sched_class = &rt_sched_class;
+	}
+}
+
 #else
 
 static inline int __set_cpus_allowed_ptr(struct task_struct *p,
@@ -1731,7 +1732,7 @@ void sched_ttwu_pending(void)
 {
 	struct rq *rq = this_rq();
 	struct llist_node *llist = llist_del_all(&rq->wake_list);
-	struct task_struct *p;
+	struct task_struct *p, *t;
 	struct rq_flags rf;
 
 	if (!llist)
@@ -1740,17 +1741,8 @@ void sched_ttwu_pending(void)
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
 
-	while (llist) {
-		int wake_flags = 0;
-
-		p = llist_entry(llist, struct task_struct, wake_entry);
-		llist = llist_next(llist);
-
-		if (p->sched_remote_wakeup)
-			wake_flags = WF_MIGRATED;
-
-		ttwu_do_activate(rq, p, wake_flags, &rf);
-	}
+	llist_for_each_entry_safe(p, t, llist, wake_entry)
+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
 
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -2148,23 +2140,6 @@ int wake_up_state(struct task_struct *p, unsigned int state)
 }
 
 /*
- * This function clears the sched_dl_entity static params.
- */
-void __dl_clear_params(struct task_struct *p)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	dl_se->dl_runtime = 0;
-	dl_se->dl_deadline = 0;
-	dl_se->dl_period = 0;
-	dl_se->flags = 0;
-	dl_se->dl_bw = 0;
-
-	dl_se->dl_throttled = 0;
-	dl_se->dl_yielded = 0;
-}
-
-/*
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  *
@@ -2193,6 +2168,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
 	init_dl_task_timer(&p->dl);
+	init_dl_inactive_task_timer(&p->dl);
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
@@ -2430,7 +2406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 unsigned long to_ratio(u64 period, u64 runtime)
 {
 	if (runtime == RUNTIME_INF)
-		return 1ULL << 20;
+		return BW_UNIT;
 
 	/*
 	 * Doing this here saves a lot of checks in all
@@ -2440,93 +2416,9 @@ unsigned long to_ratio(u64 period, u64 runtime)
 	if (period == 0)
 		return 0;
 
-	return div64_u64(runtime << 20, period);
+	return div64_u64(runtime << BW_SHIFT, period);
 }
 
-#ifdef CONFIG_SMP
-inline struct dl_bw *dl_bw_of(int i)
-{
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
-			 "sched RCU must be held");
-	return &cpu_rq(i)->rd->dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
-	struct root_domain *rd = cpu_rq(i)->rd;
-	int cpus = 0;
-
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
-			 "sched RCU must be held");
-	for_each_cpu_and(i, rd->span, cpu_active_mask)
-		cpus++;
-
-	return cpus;
-}
-#else
-inline struct dl_bw *dl_bw_of(int i)
-{
-	return &cpu_rq(i)->dl.dl_bw;
-}
-
-static inline int dl_bw_cpus(int i)
-{
-	return 1;
-}
-#endif
-
-/*
- * We must be sure that accepting a new task (or allowing changing the
- * parameters of an existing one) is consistent with the bandwidth
- * constraints. If yes, this function also accordingly updates the currently
- * allocated bandwidth to reflect the new situation.
- *
- * This function is called while holding p's rq->lock.
- *
- * XXX we should delay bw change until the task's 0-lag point, see
- * __setparam_dl().
- */
-static int dl_overflow(struct task_struct *p, int policy,
-		       const struct sched_attr *attr)
-{
-
-	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-	u64 period = attr->sched_period ?: attr->sched_deadline;
-	u64 runtime = attr->sched_runtime;
-	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
-	int cpus, err = -1;
-
-	/* !deadline task may carry old deadline bandwidth */
-	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
-		return 0;
-
-	/*
-	 * Either if a task, enters, leave, or stays -deadline but changes
-	 * its parameters, we may need to update accordingly the total
-	 * allocated bandwidth of the container.
-	 */
-	raw_spin_lock(&dl_b->lock);
-	cpus = dl_bw_cpus(task_cpu(p));
-	if (dl_policy(policy) && !task_has_dl_policy(p) &&
-	    !__dl_overflow(dl_b, cpus, 0, new_bw)) {
-		__dl_add(dl_b, new_bw);
-		err = 0;
-	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
-		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
-		__dl_clear(dl_b, p->dl.dl_bw);
-		__dl_add(dl_b, new_bw);
-		err = 0;
-	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
-		__dl_clear(dl_b, p->dl.dl_bw);
-		err = 0;
-	}
-	raw_spin_unlock(&dl_b->lock);
-
-	return err;
-}
-
-extern void init_dl_bw(struct dl_bw *dl_b);
-
 /*
  * wake_up_new_task - wake up a newly created task for the first time.
  *
@@ -3687,7 +3579,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	exception_exit(prev_state);
 }
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
 	return try_to_wake_up(curr->private, mode, wake_flags);
@@ -4009,46 +3901,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 }
 
 /*
- * This function initializes the sched_dl_entity of a newly becoming
- * SCHED_DEADLINE task.
- *
- * Only the static values are considered here, the actual runtime and the
- * absolute deadline will be properly calculated when the task is enqueued
- * for the first time with its new policy.
- */
-static void
-__setparam_dl(struct task_struct *p, const struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	dl_se->dl_runtime = attr->sched_runtime;
-	dl_se->dl_deadline = attr->sched_deadline;
-	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
-	dl_se->flags = attr->sched_flags;
-	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
-
-	/*
-	 * Changing the parameters of a task is 'tricky' and we're not doing
-	 * the correct thing -- also see task_dead_dl() and switched_from_dl().
-	 *
-	 * What we SHOULD do is delay the bandwidth release until the 0-lag
-	 * point. This would include retaining the task_struct until that time
-	 * and change dl_overflow() to not immediately decrement the current
-	 * amount.
-	 *
-	 * Instead we retain the current runtime/deadline and let the new
-	 * parameters take effect after the current reservation period lapses.
-	 * This is safe (albeit pessimistic) because the 0-lag point is always
-	 * before the current scheduling deadline.
-	 *
-	 * We can still have temporary overloads because we do not delay the
-	 * change in bandwidth until that time; so admission control is
-	 * not on the safe side. It does however guarantee tasks will never
-	 * consume more than promised.
-	 */
-}
-
-/*
  * sched_setparam() passes in -1 for its policy, to let the functions
  * it calls know not to change it.
  */
@@ -4101,59 +3953,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 		p->sched_class = &fair_sched_class;
 }
 
-static void
-__getparam_dl(struct task_struct *p, struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	attr->sched_priority = p->rt_priority;
-	attr->sched_runtime = dl_se->dl_runtime;
-	attr->sched_deadline = dl_se->dl_deadline;
-	attr->sched_period = dl_se->dl_period;
-	attr->sched_flags = dl_se->flags;
-}
-
-/*
- * This function validates the new parameters of a -deadline task.
- * We ask for the deadline not being zero, and greater or equal
- * than the runtime, as well as the period of being zero or
- * greater than deadline. Furthermore, we have to be sure that
- * user parameters are above the internal resolution of 1us (we
- * check sched_runtime only since it is always the smaller one) and
- * below 2^63 ns (we have to check both sched_deadline and
- * sched_period, as the latter can be zero).
- */
-static bool
-__checkparam_dl(const struct sched_attr *attr)
-{
-	/* deadline != 0 */
-	if (attr->sched_deadline == 0)
-		return false;
-
-	/*
-	 * Since we truncate DL_SCALE bits, make sure we're at least
-	 * that big.
-	 */
-	if (attr->sched_runtime < (1ULL << DL_SCALE))
-		return false;
-
-	/*
-	 * Since we use the MSB for wrap-around and sign issues, make
-	 * sure it's not set (mind that period can be equal to zero).
-	 */
-	if (attr->sched_deadline & (1ULL << 63) ||
-	    attr->sched_period & (1ULL << 63))
-		return false;
-
-	/* runtime <= deadline <= period (if period != 0) */
-	if ((attr->sched_period != 0 &&
-	     attr->sched_period < attr->sched_deadline) ||
-	    attr->sched_deadline < attr->sched_runtime)
-		return false;
-
-	return true;
-}
-
 /*
  * Check the target process has a UID that matches the current process's:
  */
@@ -4170,19 +3969,6 @@ static bool check_same_owner(struct task_struct *p)
 	return match;
 }
 
-static bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
-{
-	struct sched_dl_entity *dl_se = &p->dl;
-
-	if (dl_se->dl_runtime != attr->sched_runtime ||
-		dl_se->dl_deadline != attr->sched_deadline ||
-		dl_se->dl_period != attr->sched_period ||
-		dl_se->flags != attr->sched_flags)
-		return true;
-
-	return false;
-}
-
 static int __sched_setscheduler(struct task_struct *p,
 				const struct sched_attr *attr,
 				bool user, bool pi)
@@ -4197,8 +3983,8 @@ static int __sched_setscheduler(struct task_struct *p,
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct rq *rq;
 
-	/* May grab non-irq protected spin_locks: */
-	BUG_ON(in_interrupt());
+	/* The pi code expects interrupts enabled */
+	BUG_ON(pi && in_interrupt());
 recheck:
 	/* Double check policy once rq lock held: */
 	if (policy < 0) {
@@ -4211,7 +3997,8 @@ static int __sched_setscheduler(struct task_struct *p,
 			return -EINVAL;
 	}
 
-	if (attr->sched_flags & ~(SCHED_FLAG_RESET_ON_FORK))
+	if (attr->sched_flags &
+		~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM))
 		return -EINVAL;
 
 	/*
@@ -4362,7 +4149,7 @@ static int __sched_setscheduler(struct task_struct *p,
 	 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
 	 * is available.
 	 */
-	if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) {
+	if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
 		task_rq_unlock(rq, p, &rf);
 		return -EBUSY;
 	}
@@ -5463,26 +5250,17 @@ void init_idle(struct task_struct *idle, int cpu)
 #endif
 }
 
+#ifdef CONFIG_SMP
+
 int cpuset_cpumask_can_shrink(const struct cpumask *cur,
 			      const struct cpumask *trial)
 {
-	int ret = 1, trial_cpus;
-	struct dl_bw *cur_dl_b;
-	unsigned long flags;
+	int ret = 1;
 
 	if (!cpumask_weight(cur))
 		return ret;
 
-	rcu_read_lock_sched();
-	cur_dl_b = dl_bw_of(cpumask_any(cur));
-	trial_cpus = cpumask_weight(trial);
-
-	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
-	if (cur_dl_b->bw != -1 &&
-	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
-		ret = 0;
-	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
-	rcu_read_unlock_sched();
+	ret = dl_cpuset_cpumask_can_shrink(cur, trial);
 
 	return ret;
 }
@@ -5506,43 +5284,14 @@ int task_can_attach(struct task_struct *p,
 		goto out;
 	}
 
-#ifdef CONFIG_SMP
 	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
-					      cs_cpus_allowed)) {
-		unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
-							cs_cpus_allowed);
-		struct dl_bw *dl_b;
-		bool overflow;
-		int cpus;
-		unsigned long flags;
+					      cs_cpus_allowed))
+		ret = dl_task_can_attach(p, cs_cpus_allowed);
 
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(dest_cpu);
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		cpus = dl_bw_cpus(dest_cpu);
-		overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
-		if (overflow)
-			ret = -EBUSY;
-		else {
-			/*
-			 * We reserve space for this task in the destination
-			 * root_domain, as we can't fail after this point.
-			 * We will free resources in the source root_domain
-			 * later on (see set_cpus_allowed_dl()).
-			 */
-			__dl_add(dl_b, p->dl.dl_bw);
-		}
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-		rcu_read_unlock_sched();
-
-	}
-#endif
 out:
 	return ret;
 }
 
-#ifdef CONFIG_SMP
-
 bool sched_smp_initialized __read_mostly;
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -5605,7 +5354,7 @@ void idle_task_exit(void)
 	BUG_ON(cpu_online(smp_processor_id()));
 
 	if (mm != &init_mm) {
-		switch_mm_irqs_off(mm, &init_mm, current);
+		switch_mm(mm, &init_mm, current);
 		finish_arch_post_lock_switch();
 	}
 	mmdrop(mm);
@@ -5805,23 +5554,8 @@ static void cpuset_cpu_active(void)
 
 static int cpuset_cpu_inactive(unsigned int cpu)
 {
-	unsigned long flags;
-	struct dl_bw *dl_b;
-	bool overflow;
-	int cpus;
-
 	if (!cpuhp_tasks_frozen) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		cpus = dl_bw_cpus(cpu);
-		overflow = __dl_overflow(dl_b, cpus, 0, 0);
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-
-		if (overflow)
+		if (dl_cpu_busy(cpu))
 			return -EBUSY;
 		cpuset_update_active_cpus();
 	} else {
@@ -5874,15 +5608,9 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 * users of this state to go away such that all new such users will
 	 * observe it.
 	 *
-	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
-	 * not imply sync_sched(), so wait for both.
-	 *
 	 * Do sync before park smpboot threads to take care the rcu boost case.
 	 */
-	if (IS_ENABLED(CONFIG_PREEMPT))
-		synchronize_rcu_mult(call_rcu, call_rcu_sched);
-	else
-		synchronize_rcu();
+	synchronize_rcu_mult(call_rcu, call_rcu_sched);
 
 	if (!sched_smp_initialized)
 		return 0;
@@ -5958,7 +5686,6 @@ void __init sched_init_smp(void)
 	cpumask_var_t non_isolated_cpus;
 
 	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
 	sched_init_numa();
 
@@ -5968,7 +5695,7 @@ void __init sched_init_smp(void)
 	 * happen.
 	 */
 	mutex_lock(&sched_domains_mutex);
-	init_sched_domains(cpu_active_mask);
+	sched_init_domains(cpu_active_mask);
 	cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
 	if (cpumask_empty(non_isolated_cpus))
 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -5984,7 +5711,6 @@ void __init sched_init_smp(void)
 	init_sched_dl_class();
 
 	sched_init_smt();
-	sched_clock_init_late();
 
 	sched_smp_initialized = true;
 }
@@ -6000,7 +5726,6 @@ early_initcall(migration_init);
 void __init sched_init_smp(void)
 {
 	sched_init_granularity();
-	sched_clock_init_late();
 }
 #endif /* CONFIG_SMP */
 
@@ -6026,28 +5751,13 @@ static struct kmem_cache *task_group_cache __read_mostly;
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
-#define WAIT_TABLE_BITS 8
-#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
-static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
-
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-	unsigned long val = (unsigned long)word << shift | bit;
-
-	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
 void __init sched_init(void)
 {
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
 	sched_clock_init();
-
-	for (i = 0; i < WAIT_TABLE_SIZE; i++)
-		init_waitqueue_head(bit_wait_table + i);
+	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
@@ -6199,7 +5909,6 @@ void __init sched_init(void)
 	calc_load_update = jiffies + LOAD_FREQ;
 
 #ifdef CONFIG_SMP
-	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
 	/* May be allocated at isolcpus cmdline parse time */
 	if (cpu_isolated_map == NULL)
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
@@ -6251,8 +5960,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 	     !is_idle_task(current)) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
+	    oops_in_progress)
 		return;
+
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
 	prev_jiffy = jiffies;
@@ -6507,385 +6218,6 @@ void sched_move_task(struct task_struct *tsk)
 
 	task_rq_unlock(rq, tsk, &rf);
 }
-#endif /* CONFIG_CGROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-/*
- * Ensure that the real time constraints are schedulable.
- */
-static DEFINE_MUTEX(rt_constraints_mutex);
-
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
-{
-	struct task_struct *g, *p;
-
-	/*
-	 * Autogroups do not have RT tasks; see autogroup_create().
-	 */
-	if (task_group_is_autogroup(tg))
-		return 0;
-
-	for_each_process_thread(g, p) {
-		if (rt_task(p) && task_group(p) == tg)
-			return 1;
-	}
-
-	return 0;
-}
-
-struct rt_schedulable_data {
-	struct task_group *tg;
-	u64 rt_period;
-	u64 rt_runtime;
-};
-
-static int tg_rt_schedulable(struct task_group *tg, void *data)
-{
-	struct rt_schedulable_data *d = data;
-	struct task_group *child;
-	unsigned long total, sum = 0;
-	u64 period, runtime;
-
-	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	runtime = tg->rt_bandwidth.rt_runtime;
-
-	if (tg == d->tg) {
-		period = d->rt_period;
-		runtime = d->rt_runtime;
-	}
-
-	/*
-	 * Cannot have more runtime than the period.
-	 */
-	if (runtime > period && runtime != RUNTIME_INF)
-		return -EINVAL;
-
-	/*
-	 * Ensure we don't starve existing RT tasks.
-	 */
-	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
-		return -EBUSY;
-
-	total = to_ratio(period, runtime);
-
-	/*
-	 * Nobody can have more than the global setting allows.
-	 */
-	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
-		return -EINVAL;
-
-	/*
-	 * The sum of our children's runtime should not exceed our own.
-	 */
-	list_for_each_entry_rcu(child, &tg->children, siblings) {
-		period = ktime_to_ns(child->rt_bandwidth.rt_period);
-		runtime = child->rt_bandwidth.rt_runtime;
-
-		if (child == d->tg) {
-			period = d->rt_period;
-			runtime = d->rt_runtime;
-		}
-
-		sum += to_ratio(period, runtime);
-	}
-
-	if (sum > total)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-	int ret;
-
-	struct rt_schedulable_data data = {
-		.tg = tg,
-		.rt_period = period,
-		.rt_runtime = runtime,
-	};
-
-	rcu_read_lock();
-	ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int tg_set_rt_bandwidth(struct task_group *tg,
-		u64 rt_period, u64 rt_runtime)
-{
-	int i, err = 0;
-
-	/*
-	 * Disallowing the root group RT runtime is BAD, it would disallow the
-	 * kernel creating (and or operating) RT threads.
-	 */
-	if (tg == &root_task_group && rt_runtime == 0)
-		return -EINVAL;
-
-	/* No period doesn't make any sense. */
-	if (rt_period == 0)
-		return -EINVAL;
-
-	mutex_lock(&rt_constraints_mutex);
-	read_lock(&tasklist_lock);
-	err = __rt_schedulable(tg, rt_period, rt_runtime);
-	if (err)
-		goto unlock;
-
-	raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
-	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
-	tg->rt_bandwidth.rt_runtime = rt_runtime;
-
-	for_each_possible_cpu(i) {
-		struct rt_rq *rt_rq = tg->rt_rq[i];
-
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-		rt_rq->rt_runtime = rt_runtime;
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-	}
-	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
-unlock:
-	read_unlock(&tasklist_lock);
-	mutex_unlock(&rt_constraints_mutex);
-
-	return err;
-}
-
-static int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
-{
-	u64 rt_runtime, rt_period;
-
-	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
-	if (rt_runtime_us < 0)
-		rt_runtime = RUNTIME_INF;
-
-	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_runtime(struct task_group *tg)
-{
-	u64 rt_runtime_us;
-
-	if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
-		return -1;
-
-	rt_runtime_us = tg->rt_bandwidth.rt_runtime;
-	do_div(rt_runtime_us, NSEC_PER_USEC);
-	return rt_runtime_us;
-}
-
-static int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
-{
-	u64 rt_runtime, rt_period;
-
-	rt_period = rt_period_us * NSEC_PER_USEC;
-	rt_runtime = tg->rt_bandwidth.rt_runtime;
-
-	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
-}
-
-static long sched_group_rt_period(struct task_group *tg)
-{
-	u64 rt_period_us;
-
-	rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	do_div(rt_period_us, NSEC_PER_USEC);
-	return rt_period_us;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static int sched_rt_global_constraints(void)
-{
-	int ret = 0;
-
-	mutex_lock(&rt_constraints_mutex);
-	read_lock(&tasklist_lock);
-	ret = __rt_schedulable(NULL, 0, 0);
-	read_unlock(&tasklist_lock);
-	mutex_unlock(&rt_constraints_mutex);
-
-	return ret;
-}
-
-static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
-{
-	/* Don't accept realtime tasks when there is no way for them to run */
-	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
-		return 0;
-
-	return 1;
-}
-
-#else /* !CONFIG_RT_GROUP_SCHED */
-static int sched_rt_global_constraints(void)
-{
-	unsigned long flags;
-	int i;
-
-	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
-	for_each_possible_cpu(i) {
-		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-		rt_rq->rt_runtime = global_rt_runtime();
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-	}
-	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
-
-	return 0;
-}
-#endif /* CONFIG_RT_GROUP_SCHED */
-
-static int sched_dl_global_validate(void)
-{
-	u64 runtime = global_rt_runtime();
-	u64 period = global_rt_period();
-	u64 new_bw = to_ratio(period, runtime);
-	struct dl_bw *dl_b;
-	int cpu, ret = 0;
-	unsigned long flags;
-
-	/*
-	 * Here we want to check the bandwidth not being set to some
-	 * value smaller than the currently allocated bandwidth in
-	 * any of the root_domains.
-	 *
-	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than
-	 * cycling on root_domains... Discussion on different/better
-	 * solutions is welcome!
-	 */
-	for_each_possible_cpu(cpu) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		if (new_bw < dl_b->total_bw)
-			ret = -EBUSY;
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
-static void sched_dl_do_global(void)
-{
-	u64 new_bw = -1;
-	struct dl_bw *dl_b;
-	int cpu;
-	unsigned long flags;
-
-	def_dl_bandwidth.dl_period = global_rt_period();
-	def_dl_bandwidth.dl_runtime = global_rt_runtime();
-
-	if (global_rt_runtime() != RUNTIME_INF)
-		new_bw = to_ratio(global_rt_period(), global_rt_runtime());
-
-	/*
-	 * FIXME: As above...
-	 */
-	for_each_possible_cpu(cpu) {
-		rcu_read_lock_sched();
-		dl_b = dl_bw_of(cpu);
-
-		raw_spin_lock_irqsave(&dl_b->lock, flags);
-		dl_b->bw = new_bw;
-		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-		rcu_read_unlock_sched();
-	}
-}
-
-static int sched_rt_global_validate(void)
-{
-	if (sysctl_sched_rt_period <= 0)
-		return -EINVAL;
-
-	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
-		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void sched_rt_do_global(void)
-{
-	def_rt_bandwidth.rt_runtime = global_rt_runtime();
-	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
-}
-
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int old_period, old_runtime;
-	static DEFINE_MUTEX(mutex);
-	int ret;
-
-	mutex_lock(&mutex);
-	old_period = sysctl_sched_rt_period;
-	old_runtime = sysctl_sched_rt_runtime;
-
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
-	if (!ret && write) {
-		ret = sched_rt_global_validate();
-		if (ret)
-			goto undo;
-
-		ret = sched_dl_global_validate();
-		if (ret)
-			goto undo;
-
-		ret = sched_rt_global_constraints();
-		if (ret)
-			goto undo;
-
-		sched_rt_do_global();
-		sched_dl_do_global();
-	}
-	if (0) {
-undo:
-		sysctl_sched_rt_period = old_period;
-		sysctl_sched_rt_runtime = old_runtime;
-	}
-	mutex_unlock(&mutex);
-
-	return ret;
-}
-
-int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
-{
-	int ret;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
-	/*
-	 * Make sure that internally we keep jiffies.
-	 * Also, writing zero resets the timeslice to default:
-	 */
-	if (!ret && write) {
-		sched_rr_timeslice =
-			sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
-			msecs_to_jiffies(sysctl_sched_rr_timeslice);
-	}
-	mutex_unlock(&mutex);
-	return ret;
-}
-
-#ifdef CONFIG_CGROUP_SCHED
 
 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
 {

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 76877a6..076a2e3 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c

@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 	if (sg_policy->next_freq == next_freq)
 		return;
 
-	if (sg_policy->next_freq > next_freq)
-		next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
@@ -245,11 +242,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned long util = 0, max = 1;
 	unsigned int j;
 
@@ -265,7 +261,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
 			continue;
@@ -309,7 +305,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 		if (flags & SCHED_CPUFREQ_RT_DL)
 			next_f = sg_policy->policy->cpuinfo.max_freq;
 		else
-			next_f = sugov_next_freq_shared(sg_cpu);
+			next_f = sugov_next_freq_shared(sg_cpu, time);
 
 		sugov_update_commit(sg_policy, time, next_f);
 	}

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index aea3135..67c70e2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c

@@ -615,19 +615,13 @@ static void cputime_adjust(struct task_cputime *curr,
 	 * userspace. Once a task gets some ticks, the monotonicy code at
 	 * 'update' will ensure things converge to the observed ratio.
 	 */
-	if (stime == 0) {
-		utime = rtime;
-		goto update;
+	if (stime != 0) {
+		if (utime == 0)
+			stime = rtime;
+		else
+			stime = scale_stime(stime, rtime, stime + utime);
 	}
 
-	if (utime == 0) {
-		stime = rtime;
-		goto update;
-	}
-
-	stime = scale_stime(stime, rtime, stime + utime);
-
-update:
 	/*
 	 * Make sure stime doesn't go backwards; this preserves monotonicity
 	 * for utime because rtime is monotonic.

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce590..a84299f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -17,6 +17,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <uapi/linux/sched/types.h>
 
 struct dl_bandwidth def_dl_bandwidth;
 
@@ -43,6 +44,254 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
 	return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 
+#ifdef CONFIG_SMP
+static inline struct dl_bw *dl_bw_of(int i)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	return &cpu_rq(i)->rd->dl_bw;
+}
+
+static inline int dl_bw_cpus(int i)
+{
+	struct root_domain *rd = cpu_rq(i)->rd;
+	int cpus = 0;
+
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	for_each_cpu_and(i, rd->span, cpu_active_mask)
+		cpus++;
+
+	return cpus;
+}
+#else
+static inline struct dl_bw *dl_bw_of(int i)
+{
+	return &cpu_rq(i)->dl.dl_bw;
+}
+
+static inline int dl_bw_cpus(int i)
+{
+	return 1;
+}
+#endif
+
+static inline
+void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->running_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->running_bw += dl_bw;
+	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
+	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+}
+
+static inline
+void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->running_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->running_bw -= dl_bw;
+	SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
+	if (dl_rq->running_bw > old)
+		dl_rq->running_bw = 0;
+}
+
+static inline
+void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->this_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->this_bw += dl_bw;
+	SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
+}
+
+static inline
+void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+{
+	u64 old = dl_rq->this_bw;
+
+	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	dl_rq->this_bw -= dl_bw;
+	SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
+	if (dl_rq->this_bw > old)
+		dl_rq->this_bw = 0;
+	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+}
+
+void dl_change_utilization(struct task_struct *p, u64 new_bw)
+{
+	struct rq *rq;
+
+	if (task_on_rq_queued(p))
+		return;
+
+	rq = task_rq(p);
+	if (p->dl.dl_non_contending) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		p->dl.dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+			put_task_struct(p);
+	}
+	sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	add_rq_bw(new_bw, &rq->dl);
+}
+
+/*
+ * The utilization of a task cannot be immediately removed from
+ * the rq active utilization (running_bw) when the task blocks.
+ * Instead, we have to wait for the so called "0-lag time".
+ *
+ * If a task blocks before the "0-lag time", a timer (the inactive
+ * timer) is armed, and running_bw is decreased when the timer
+ * fires.
+ *
+ * If the task wakes up again before the inactive timer fires,
+ * the timer is cancelled, whereas if the task wakes up after the
+ * inactive timer fired (and running_bw has been decreased) the
+ * task's utilization has to be added to running_bw again.
+ * A flag in the deadline scheduling entity (dl_non_contending)
+ * is used to avoid race conditions between the inactive timer handler
+ * and task wakeups.
+ *
+ * The following diagram shows how running_bw is updated. A task is
+ * "ACTIVE" when its utilization contributes to running_bw; an
+ * "ACTIVE contending" task is in the TASK_RUNNING state, while an
+ * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
+ * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
+ * time already passed, which does not contribute to running_bw anymore.
+ *                              +------------------+
+ *             wakeup           |    ACTIVE        |
+ *          +------------------>+   contending     |
+ *          | add_running_bw    |                  |
+ *          |                   +----+------+------+
+ *          |                        |      ^
+ *          |                dequeue |      |
+ * +--------+-------+                |      |
+ * |                |   t >= 0-lag   |      | wakeup
+ * |    INACTIVE    |<---------------+      |
+ * |                | sub_running_bw |      |
+ * +--------+-------+                |      |
+ *          ^                        |      |
+ *          |              t < 0-lag |      |
+ *          |                        |      |
+ *          |                        V      |
+ *          |                   +----+------+------+
+ *          | sub_running_bw    |    ACTIVE        |
+ *          +-------------------+                  |
+ *            inactive timer    |  non contending  |
+ *            fired             +------------------+
+ *
+ * The task_non_contending() function is invoked when a task
+ * blocks, and checks if the 0-lag time already passed or
+ * not (in the first case, it directly updates running_bw;
+ * in the second case, it arms the inactive timer).
+ *
+ * The task_contending() function is invoked when a task wakes
+ * up, and checks if the task is still in the "ACTIVE non contending"
+ * state or not (in the second case, it updates running_bw).
+ */
+static void task_non_contending(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+	struct hrtimer *timer = &dl_se->inactive_timer;
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq);
+	s64 zerolag_time;
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	WARN_ON(hrtimer_active(&dl_se->inactive_timer));
+	WARN_ON(dl_se->dl_non_contending);
+
+	zerolag_time = dl_se->deadline -
+		 div64_long((dl_se->runtime * dl_se->dl_period),
+			dl_se->dl_runtime);
+
+	/*
+	 * Using relative times instead of the absolute "0-lag time"
+	 * allows to simplify the code
+	 */
+	zerolag_time -= rq_clock(rq);
+
+	/*
+	 * If the "0-lag time" already passed, decrease the active
+	 * utilization now, instead of starting a timer
+	 */
+	if (zerolag_time < 0) {
+		if (dl_task(p))
+			sub_running_bw(dl_se->dl_bw, dl_rq);
+		if (!dl_task(p) || p->state == TASK_DEAD) {
+			struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+			if (p->state == TASK_DEAD)
+				sub_rq_bw(p->dl.dl_bw, &rq->dl);
+			raw_spin_lock(&dl_b->lock);
+			__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+			__dl_clear_params(p);
+			raw_spin_unlock(&dl_b->lock);
+		}
+
+		return;
+	}
+
+	dl_se->dl_non_contending = 1;
+	get_task_struct(p);
+	hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
+}
+
+static void task_contending(struct sched_dl_entity *dl_se, int flags)
+{
+	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+
+	/*
+	 * If this is a non-deadline task that has been boosted,
+	 * do nothing
+	 */
+	if (dl_se->dl_runtime == 0)
+		return;
+
+	if (flags & ENQUEUE_MIGRATED)
+		add_rq_bw(dl_se->dl_bw, dl_rq);
+
+	if (dl_se->dl_non_contending) {
+		dl_se->dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
+			put_task_struct(dl_task_of(dl_se));
+	} else {
+		/*
+		 * Since "dl_non_contending" is not set, the
+		 * task's utilization has already been removed from
+		 * active utilization (either when the task blocked,
+		 * when the "inactive timer" fired).
+		 * So, add it back.
+		 */
+		add_running_bw(dl_se->dl_bw, dl_rq);
+	}
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -83,6 +332,10 @@ void init_dl_rq(struct dl_rq *dl_rq)
 #else
 	init_dl_bw(&dl_rq->dl_bw);
 #endif
+
+	dl_rq->running_bw = 0;
+	dl_rq->this_bw = 0;
+	init_dl_rq_bw_ratio(dl_rq);
 }
 
 #ifdef CONFIG_SMP
@@ -484,13 +737,84 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 }
 
 /*
- * When a -deadline entity is queued back on the runqueue, its runtime and
- * deadline might need updating.
+ * Revised wakeup rule [1]: For self-suspending tasks, rather then
+ * re-initializing task's runtime and deadline, the revised wakeup
+ * rule adjusts the task's runtime to avoid the task to overrun its
+ * density.
  *
- * The policy here is that we update the deadline of the entity only if:
- *  - the current deadline is in the past,
- *  - using the remaining runtime with the current deadline would make
- *    the entity exceed its bandwidth.
+ * Reasoning: a task may overrun the density if:
+ *    runtime / (deadline - t) > dl_runtime / dl_deadline
+ *
+ * Therefore, runtime can be adjusted to:
+ *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
+ *
+ * In such way that runtime will be equal to the maximum density
+ * the task can use without breaking any rule.
+ *
+ * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
+ * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
+ */
+static void
+update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
+{
+	u64 laxity = dl_se->deadline - rq_clock(rq);
+
+	/*
+	 * If the task has deadline < period, and the deadline is in the past,
+	 * it should already be throttled before this check.
+	 *
+	 * See update_dl_entity() comments for further details.
+	 */
+	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
+
+	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
+}
+
+/*
+ * Regarding the deadline, a task with implicit deadline has a relative
+ * deadline == relative period. A task with constrained deadline has a
+ * relative deadline <= relative period.
+ *
+ * We support constrained deadline tasks. However, there are some restrictions
+ * applied only for tasks which do not have an implicit deadline. See
+ * update_dl_entity() to know more about such restrictions.
+ *
+ * The dl_is_implicit() returns true if the task has an implicit deadline.
+ */
+static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline == dl_se->dl_period;
+}
+
+/*
+ * When a deadline entity is placed in the runqueue, its runtime and deadline
+ * might need to be updated. This is done by a CBS wake up rule. There are two
+ * different rules: 1) the original CBS; and 2) the Revisited CBS.
+ *
+ * When the task is starting a new period, the Original CBS is used. In this
+ * case, the runtime is replenished and a new absolute deadline is set.
+ *
+ * When a task is queued before the begin of the next period, using the
+ * remaining runtime and deadline could make the entity to overflow, see
+ * dl_entity_overflow() to find more about runtime overflow. When such case
+ * is detected, the runtime and deadline need to be updated.
+ *
+ * If the task has an implicit deadline, i.e., deadline == period, the Original
+ * CBS is applied. the runtime is replenished and a new absolute deadline is
+ * set, as in the previous cases.
+ *
+ * However, the Original CBS does not work properly for tasks with
+ * deadline < period, which are said to have a constrained deadline. By
+ * applying the Original CBS, a constrained deadline task would be able to run
+ * runtime/deadline in a period. With deadline < period, the task would
+ * overrun the runtime/period allowed bandwidth, breaking the admission test.
+ *
+ * In order to prevent this misbehave, the Revisited CBS is used for
+ * constrained deadline tasks when a runtime overflow is detected. In the
+ * Revisited CBS, rather than replenishing & setting a new absolute deadline,
+ * the remaining runtime of the task is reduced to avoid runtime overflow.
+ * Please refer to the comments update_dl_revised_wakeup() function to find
+ * more about the Revised CBS rule.
  */
 static void update_dl_entity(struct sched_dl_entity *dl_se,
 			     struct sched_dl_entity *pi_se)
@@ -500,6 +824,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
+
+		if (unlikely(!dl_is_implicit(dl_se) &&
+			     !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+			     !dl_se->dl_boosted)){
+			update_dl_revised_wakeup(dl_se, rq);
+			return;
+		}
+
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
@@ -593,10 +925,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * The task might have changed its scheduling policy to something
 	 * different than SCHED_DEADLINE (through switched_from_dl()).
 	 */
-	if (!dl_task(p)) {
-		__dl_clear_params(p);
+	if (!dl_task(p))
 		goto unlock;
-	}
 
 	/*
 	 * The task might have been boosted by someone else and might be in the
@@ -723,6 +1053,8 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
 			return;
 		dl_se->dl_throttled = 1;
+		if (dl_se->runtime > 0)
+			dl_se->runtime = 0;
 	}
 }
 
@@ -735,6 +1067,47 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
 
 /*
+ * This function implements the GRUB accounting rule:
+ * according to the GRUB reclaiming algorithm, the runtime is
+ * not decreased as "dq = -dt", but as
+ * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
+ * where u is the utilization of the task, Umax is the maximum reclaimable
+ * utilization, Uinact is the (per-runqueue) inactive utilization, computed
+ * as the difference between the "total runqueue utilization" and the
+ * runqueue active utilization, and Uextra is the (per runqueue) extra
+ * reclaimable utilization.
+ * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
+ * multiplied by 2^BW_SHIFT, the result has to be shifted right by
+ * BW_SHIFT.
+ * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
+ * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
+ * Since delta is a 64 bit variable, to have an overflow its value
+ * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
+ * So, overflow is not an issue here.
+ */
+u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
+{
+	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
+	u64 u_act;
+	u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
+
+	/*
+	 * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
+	 * we compare u_inact + rq->dl.extra_bw with
+	 * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
+	 * u_inact + rq->dl.extra_bw can be larger than
+	 * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
+	 * leading to wrong results)
+	 */
+	if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
+		u_act = u_act_min;
+	else
+		u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
+
+	return (delta * u_act) >> BW_SHIFT;
+}
+
+/*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
  */
@@ -776,6 +1149,8 @@ static void update_curr_dl(struct rq *rq)
 
 	sched_rt_avg_update(rq, delta_exec);
 
+	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
+		delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
 	dl_se->runtime -= delta_exec;
 
 throttle:
@@ -815,6 +1190,56 @@ static void update_curr_dl(struct rq *rq)
 	}
 }
 
+static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
+{
+	struct sched_dl_entity *dl_se = container_of(timer,
+						     struct sched_dl_entity,
+						     inactive_timer);
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(p, &rf);
+
+	if (!dl_task(p) || p->state == TASK_DEAD) {
+		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+		if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
+			sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+			sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+			dl_se->dl_non_contending = 0;
+		}
+
+		raw_spin_lock(&dl_b->lock);
+		__dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
+		raw_spin_unlock(&dl_b->lock);
+		__dl_clear_params(p);
+
+		goto unlock;
+	}
+	if (dl_se->dl_non_contending == 0)
+		goto unlock;
+
+	sched_clock_tick();
+	update_rq_clock(rq);
+
+	sub_running_bw(dl_se->dl_bw, &rq->dl);
+	dl_se->dl_non_contending = 0;
+unlock:
+	task_rq_unlock(rq, p, &rf);
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
+{
+	struct hrtimer *timer = &dl_se->inactive_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = inactive_task_timer;
+}
+
 #ifdef CONFIG_SMP
 
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
@@ -946,10 +1371,12 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
-	if (flags & ENQUEUE_WAKEUP)
+	if (flags & ENQUEUE_WAKEUP) {
+		task_contending(dl_se, flags);
 		update_dl_entity(dl_se, pi_se);
-	else if (flags & ENQUEUE_REPLENISH)
+	} else if (flags & ENQUEUE_REPLENISH) {
 		replenish_dl_entity(dl_se, pi_se);
+	}
 
 	__enqueue_dl_entity(dl_se);
 }
@@ -959,11 +1386,6 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
-static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
-{
-	return dl_se->dl_deadline < dl_se->dl_period;
-}
-
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -995,17 +1417,32 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 	 * If that is the case, the task will be throttled and
 	 * the replenishment timer will be set to the next period.
 	 */
-	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+	if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
 		dl_check_constrained_dl(&p->dl);
 
+	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
+		add_rq_bw(p->dl.dl_bw, &rq->dl);
+		add_running_bw(p->dl.dl_bw, &rq->dl);
+	}
+
 	/*
-	 * If p is throttled, we do nothing. In fact, if it exhausted
+	 * If p is throttled, we do not enqueue it. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on
 	 * its rq, the bandwidth timer callback (which clearly has not
 	 * run yet) will take care of this.
+	 * However, the active utilization does not depend on the fact
+	 * that the task is on the runqueue or not (but depends on the
+	 * task's state - in GRUB parlance, "inactive" vs "active contending").
+	 * In other words, even if a task is throttled its utilization must
+	 * be counted in the active utilization; hence, we need to call
+	 * add_running_bw().
 	 */
-	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
+	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+		if (flags & ENQUEUE_WAKEUP)
+			task_contending(&p->dl, flags);
+
 		return;
+	}
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
@@ -1023,6 +1460,23 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_curr_dl(rq);
 	__dequeue_task_dl(rq, p, flags);
+
+	if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	}
+
+	/*
+	 * This check allows to start the inactive timer (or to immediately
+	 * decrease the active utilization, if needed) in two cases:
+	 * when the task blocks and when it is terminating
+	 * (p->state == TASK_DEAD). We can handle the two cases in the same
+	 * way, because from GRUB's point of view the same thing is happening
+	 * (the task moves from "active contending" to "active non contending"
+	 * or "inactive")
+	 */
+	if (flags & DEQUEUE_SLEEP)
+		task_non_contending(p);
 }
 
 /*
@@ -1100,6 +1554,37 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	return cpu;
 }
 
+static void migrate_task_rq_dl(struct task_struct *p)
+{
+	struct rq *rq;
+
+	if (p->state != TASK_WAKING)
+		return;
+
+	rq = task_rq(p);
+	/*
+	 * Since p->state == TASK_WAKING, set_task_cpu() has been called
+	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but
+	 * rq->lock is not... So, lock it
+	 */
+	raw_spin_lock(&rq->lock);
+	if (p->dl.dl_non_contending) {
+		sub_running_bw(p->dl.dl_bw, &rq->dl);
+		p->dl.dl_non_contending = 0;
+		/*
+		 * If the timer handler is currently running and the
+		 * timer cannot be cancelled, inactive_task_timer()
+		 * will see that dl_not_contending is not set, and
+		 * will not touch the rq's active utilization,
+		 * so we are still safe.
+		 */
+		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+			put_task_struct(p);
+	}
+	sub_rq_bw(p->dl.dl_bw, &rq->dl);
+	raw_spin_unlock(&rq->lock);
+}
+
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
@@ -1255,19 +1740,6 @@ static void task_fork_dl(struct task_struct *p)
 	 */
 }
 
-static void task_dead_dl(struct task_struct *p)
-{
-	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-
-	/*
-	 * Since we are TASK_DEAD we won't slip out of the domain!
-	 */
-	raw_spin_lock_irq(&dl_b->lock);
-	/* XXX we should retain the bw until 0-lag */
-	dl_b->total_bw -= p->dl.dl_bw;
-	raw_spin_unlock_irq(&dl_b->lock);
-}
-
 static void set_curr_task_dl(struct rq *rq)
 {
 	struct task_struct *p = rq->curr;
@@ -1533,7 +2005,7 @@ static int push_dl_task(struct rq *rq)
 		 * then possible that next_task has migrated.
 		 */
 		task = pick_next_pushable_dl_task(rq);
-		if (task_cpu(next_task) == rq->cpu && task == next_task) {
+		if (task == next_task) {
 			/*
 			 * The task is still there. We don't try
 			 * again, some other cpu will pull it when ready.
@@ -1551,7 +2023,11 @@ static int push_dl_task(struct rq *rq)
 	}
 
 	deactivate_task(rq, next_task, 0);
+	sub_running_bw(next_task->dl.dl_bw, &rq->dl);
+	sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
 	set_task_cpu(next_task, later_rq->cpu);
+	add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
+	add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
 	activate_task(later_rq, next_task, 0);
 	ret = 1;
 
@@ -1639,7 +2115,11 @@ static void pull_dl_task(struct rq *this_rq)
 			resched = true;
 
 			deactivate_task(src_rq, p, 0);
+			sub_running_bw(p->dl.dl_bw, &src_rq->dl);
+			sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
 			set_task_cpu(p, this_cpu);
+			add_rq_bw(p->dl.dl_bw, &this_rq->dl);
+			add_running_bw(p->dl.dl_bw, &this_rq->dl);
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
 
@@ -1695,7 +2175,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
 		 * until we complete the update.
 		 */
 		raw_spin_lock(&src_dl_b->lock);
-		__dl_clear(src_dl_b, p->dl.dl_bw);
+		__dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
 		raw_spin_unlock(&src_dl_b->lock);
 	}
 
@@ -1737,13 +2217,26 @@ void __init init_sched_dl_class(void)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * Start the deadline timer; if we switch back to dl before this we'll
-	 * continue consuming our current CBS slice. If we stay outside of
-	 * SCHED_DEADLINE until the deadline passes, the timer will reset the
-	 * task.
+	 * task_non_contending() can start the "inactive timer" (if the 0-lag
+	 * time is in the future). If the task switches back to dl before
+	 * the "inactive timer" fires, it can continue to consume its current
+	 * runtime using its current deadline. If it stays outside of
+	 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
+	 * will reset the task parameters.
 	 */
-	if (!start_dl_timer(p))
-		__dl_clear_params(p);
+	if (task_on_rq_queued(p) && p->dl.dl_runtime)
+		task_non_contending(p);
+
+	if (!task_on_rq_queued(p))
+		sub_rq_bw(p->dl.dl_bw, &rq->dl);
+
+	/*
+	 * We cannot use inactive_task_timer() to invoke sub_running_bw()
+	 * at the 0-lag time, because the task could have been migrated
+	 * while SCHED_OTHER in the meanwhile.
+	 */
+	if (p->dl.dl_non_contending)
+		p->dl.dl_non_contending = 0;
 
 	/*
 	 * Since this might be the only -deadline task on the rq,
@@ -1762,11 +2255,15 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+		put_task_struct(p);
 
 	/* If p is not queued we will update its parameters at next wakeup. */
-	if (!task_on_rq_queued(p))
-		return;
+	if (!task_on_rq_queued(p)) {
+		add_rq_bw(p->dl.dl_bw, &rq->dl);
 
+		return;
+	}
 	/*
 	 * If p is boosted we already updated its params in
 	 * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
@@ -1836,6 +2333,7 @@ const struct sched_class dl_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_dl,
+	.migrate_task_rq	= migrate_task_rq_dl,
 	.set_cpus_allowed       = set_cpus_allowed_dl,
 	.rq_online              = rq_online_dl,
 	.rq_offline             = rq_offline_dl,
@@ -1845,7 +2343,6 @@ const struct sched_class dl_sched_class = {
 	.set_curr_task		= set_curr_task_dl,
 	.task_tick		= task_tick_dl,
 	.task_fork              = task_fork_dl,
-	.task_dead		= task_dead_dl,
 
 	.prio_changed           = prio_changed_dl,
 	.switched_from		= switched_from_dl,
@@ -1854,6 +2351,317 @@ const struct sched_class dl_sched_class = {
 	.update_curr		= update_curr_dl,
 };
 
+int sched_dl_global_validate(void)
+{
+	u64 runtime = global_rt_runtime();
+	u64 period = global_rt_period();
+	u64 new_bw = to_ratio(period, runtime);
+	struct dl_bw *dl_b;
+	int cpu, ret = 0;
+	unsigned long flags;
+
+	/*
+	 * Here we want to check the bandwidth not being set to some
+	 * value smaller than the currently allocated bandwidth in
+	 * any of the root_domains.
+	 *
+	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than
+	 * cycling on root_domains... Discussion on different/better
+	 * solutions is welcome!
+	 */
+	for_each_possible_cpu(cpu) {
+		rcu_read_lock_sched();
+		dl_b = dl_bw_of(cpu);
+
+		raw_spin_lock_irqsave(&dl_b->lock, flags);
+		if (new_bw < dl_b->total_bw)
+			ret = -EBUSY;
+		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+		rcu_read_unlock_sched();
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
+{
+	if (global_rt_runtime() == RUNTIME_INF) {
+		dl_rq->bw_ratio = 1 << RATIO_SHIFT;
+		dl_rq->extra_bw = 1 << BW_SHIFT;
+	} else {
+		dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
+			  global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
+		dl_rq->extra_bw = to_ratio(global_rt_period(),
+						    global_rt_runtime());
+	}
+}
+
+void sched_dl_do_global(void)
+{
+	u64 new_bw = -1;
+	struct dl_bw *dl_b;
+	int cpu;
+	unsigned long flags;
+
+	def_dl_bandwidth.dl_period = global_rt_period();
+	def_dl_bandwidth.dl_runtime = global_rt_runtime();
+
+	if (global_rt_runtime() != RUNTIME_INF)
+		new_bw = to_ratio(global_rt_period(), global_rt_runtime());
+
+	/*
+	 * FIXME: As above...
+	 */
+	for_each_possible_cpu(cpu) {
+		rcu_read_lock_sched();
+		dl_b = dl_bw_of(cpu);
+
+		raw_spin_lock_irqsave(&dl_b->lock, flags);
+		dl_b->bw = new_bw;
+		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+		rcu_read_unlock_sched();
+		init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
+	}
+}
+
+/*
+ * We must be sure that accepting a new task (or allowing changing the
+ * parameters of an existing one) is consistent with the bandwidth
+ * constraints. If yes, this function also accordingly updates the currently
+ * allocated bandwidth to reflect the new situation.
+ *
+ * This function is called while holding p's rq->lock.
+ */
+int sched_dl_overflow(struct task_struct *p, int policy,
+		      const struct sched_attr *attr)
+{
+	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+	u64 period = attr->sched_period ?: attr->sched_deadline;
+	u64 runtime = attr->sched_runtime;
+	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
+	int cpus, err = -1;
+
+	/* !deadline task may carry old deadline bandwidth */
+	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
+		return 0;
+
+	/*
+	 * Either if a task, enters, leave, or stays -deadline but changes
+	 * its parameters, we may need to update accordingly the total
+	 * allocated bandwidth of the container.
+	 */
+	raw_spin_lock(&dl_b->lock);
+	cpus = dl_bw_cpus(task_cpu(p));
+	if (dl_policy(policy) && !task_has_dl_policy(p) &&
+	    !__dl_overflow(dl_b, cpus, 0, new_bw)) {
+		if (hrtimer_active(&p->dl.inactive_timer))
+			__dl_clear(dl_b, p->dl.dl_bw, cpus);
+		__dl_add(dl_b, new_bw, cpus);
+		err = 0;
+	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
+		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
+		/*
+		 * XXX this is slightly incorrect: when the task
+		 * utilization decreases, we should delay the total
+		 * utilization change until the task's 0-lag point.
+		 * But this would require to set the task's "inactive
+		 * timer" when the task is not inactive.
+		 */
+		__dl_clear(dl_b, p->dl.dl_bw, cpus);
+		__dl_add(dl_b, new_bw, cpus);
+		dl_change_utilization(p, new_bw);
+		err = 0;
+	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
+		/*
+		 * Do not decrease the total deadline utilization here,
+		 * switched_from_dl() will take care to do it at the correct
+		 * (0-lag) time.
+		 */
+		err = 0;
+	}
+	raw_spin_unlock(&dl_b->lock);
+
+	return err;
+}
+
+/*
+ * This function initializes the sched_dl_entity of a newly becoming
+ * SCHED_DEADLINE task.
+ *
+ * Only the static values are considered here, the actual runtime and the
+ * absolute deadline will be properly calculated when the task is enqueued
+ * for the first time with its new policy.
+ */
+void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	dl_se->dl_runtime = attr->sched_runtime;
+	dl_se->dl_deadline = attr->sched_deadline;
+	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
+	dl_se->flags = attr->sched_flags;
+	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+}
+
+void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	attr->sched_priority = p->rt_priority;
+	attr->sched_runtime = dl_se->dl_runtime;
+	attr->sched_deadline = dl_se->dl_deadline;
+	attr->sched_period = dl_se->dl_period;
+	attr->sched_flags = dl_se->flags;
+}
+
+/*
+ * This function validates the new parameters of a -deadline task.
+ * We ask for the deadline not being zero, and greater or equal
+ * than the runtime, as well as the period of being zero or
+ * greater than deadline. Furthermore, we have to be sure that
+ * user parameters are above the internal resolution of 1us (we
+ * check sched_runtime only since it is always the smaller one) and
+ * below 2^63 ns (we have to check both sched_deadline and
+ * sched_period, as the latter can be zero).
+ */
+bool __checkparam_dl(const struct sched_attr *attr)
+{
+	/* deadline != 0 */
+	if (attr->sched_deadline == 0)
+		return false;
+
+	/*
+	 * Since we truncate DL_SCALE bits, make sure we're at least
+	 * that big.
+	 */
+	if (attr->sched_runtime < (1ULL << DL_SCALE))
+		return false;
+
+	/*
+	 * Since we use the MSB for wrap-around and sign issues, make
+	 * sure it's not set (mind that period can be equal to zero).
+	 */
+	if (attr->sched_deadline & (1ULL << 63) ||
+	    attr->sched_period & (1ULL << 63))
+		return false;
+
+	/* runtime <= deadline <= period (if period != 0) */
+	if ((attr->sched_period != 0 &&
+	     attr->sched_period < attr->sched_deadline) ||
+	    attr->sched_deadline < attr->sched_runtime)
+		return false;
+
+	return true;
+}
+
+/*
+ * This function clears the sched_dl_entity static params.
+ */
+void __dl_clear_params(struct task_struct *p)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	dl_se->dl_runtime = 0;
+	dl_se->dl_deadline = 0;
+	dl_se->dl_period = 0;
+	dl_se->flags = 0;
+	dl_se->dl_bw = 0;
+	dl_se->dl_density = 0;
+
+	dl_se->dl_throttled = 0;
+	dl_se->dl_yielded = 0;
+	dl_se->dl_non_contending = 0;
+}
+
+bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+{
+	struct sched_dl_entity *dl_se = &p->dl;
+
+	if (dl_se->dl_runtime != attr->sched_runtime ||
+	    dl_se->dl_deadline != attr->sched_deadline ||
+	    dl_se->dl_period != attr->sched_period ||
+	    dl_se->flags != attr->sched_flags)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_SMP
+int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
+{
+	unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
+							cs_cpus_allowed);
+	struct dl_bw *dl_b;
+	bool overflow;
+	int cpus, ret;
+	unsigned long flags;
+
+	rcu_read_lock_sched();
+	dl_b = dl_bw_of(dest_cpu);
+	raw_spin_lock_irqsave(&dl_b->lock, flags);
+	cpus = dl_bw_cpus(dest_cpu);
+	overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
+	if (overflow)
+		ret = -EBUSY;
+	else {
+		/*
+		 * We reserve space for this task in the destination
+		 * root_domain, as we can't fail after this point.
+		 * We will free resources in the source root_domain
+		 * later on (see set_cpus_allowed_dl()).
+		 */
+		__dl_add(dl_b, p->dl.dl_bw, cpus);
+		ret = 0;
+	}
+	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return ret;
+}
+
+int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+				 const struct cpumask *trial)
+{
+	int ret = 1, trial_cpus;
+	struct dl_bw *cur_dl_b;
+	unsigned long flags;
+
+	rcu_read_lock_sched();
+	cur_dl_b = dl_bw_of(cpumask_any(cur));
+	trial_cpus = cpumask_weight(trial);
+
+	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
+	if (cur_dl_b->bw != -1 &&
+	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+		ret = 0;
+	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return ret;
+}
+
+bool dl_cpu_busy(unsigned int cpu)
+{
+	unsigned long flags;
+	struct dl_bw *dl_b;
+	bool overflow;
+	int cpus;
+
+	rcu_read_lock_sched();
+	dl_b = dl_bw_of(cpu);
+	raw_spin_lock_irqsave(&dl_b->lock, flags);
+	cpus = dl_bw_cpus(cpu);
+	overflow = __dl_overflow(dl_b, cpus, 0, 0);
+	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+	rcu_read_unlock_sched();
+	return overflow;
+}
+#endif
+
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
 

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 38f0193..4fa66de 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c

@@ -552,15 +552,21 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
 #define PN(x) \
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
 
-	P(rt_nr_running);
+	PU(rt_nr_running);
+#ifdef CONFIG_SMP
+	PU(rt_nr_migratory);
+#endif
 	P(rt_throttled);
 	PN(rt_time);
 	PN(rt_runtime);
 
 #undef PN
+#undef PU
 #undef P
 }
 
@@ -569,14 +575,21 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 	struct dl_bw *dl_bw;
 
 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
-	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+
+#define PU(x) \
+	SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
+
+	PU(dl_nr_running);
 #ifdef CONFIG_SMP
+	PU(dl_nr_migratory);
 	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
 #else
 	dl_bw = &dl_rq->dl_bw;
 #endif
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
 	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
+
+#undef PU
 }
 
 extern __read_mostly int sched_clock_running;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d711093..008c514 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -369,8 +369,9 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 /* Iterate thr' all leaf cfs_rq's on a runqueue */
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-	list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)			\
+	list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,	\
+				 leaf_cfs_rq_list)
 
 /* Do the two (enqueued) entities belong to the same group ? */
 static inline struct cfs_rq *
@@ -463,8 +464,8 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 }
 
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-		for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)	\
+		for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
 
 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 {
@@ -1381,7 +1382,6 @@ static unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
 static unsigned long capacity_of(int cpu);
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -2469,7 +2469,8 @@ void task_numa_work(struct callback_head *work)
 		return;
 
 
-	down_read(&mm->mmap_sem);
+	if (!down_read_trylock(&mm->mmap_sem))
+		return;
 	vma = find_vma(mm, start);
 	if (!vma) {
 		reset_ptenuma_scan(p);
@@ -2584,6 +2585,60 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 		}
 	}
 }
+
+/*
+ * Can a task be moved from prev_cpu to this_cpu without causing a load
+ * imbalance that would trigger the load balancer?
+ */
+static inline bool numa_wake_affine(struct sched_domain *sd,
+				    struct task_struct *p, int this_cpu,
+				    int prev_cpu, int sync)
+{
+	struct numa_stats prev_load, this_load;
+	s64 this_eff_load, prev_eff_load;
+
+	update_numa_stats(&prev_load, cpu_to_node(prev_cpu));
+	update_numa_stats(&this_load, cpu_to_node(this_cpu));
+
+	/*
+	 * If sync wakeup then subtract the (maximum possible)
+	 * effect of the currently running task from the load
+	 * of the current CPU:
+	 */
+	if (sync) {
+		unsigned long current_load = task_h_load(current);
+
+		if (this_load.load > current_load)
+			this_load.load -= current_load;
+		else
+			this_load.load = 0;
+	}
+
+	/*
+	 * In low-load situations, where this_cpu's node is idle due to the
+	 * sync cause above having dropped this_load.load to 0, move the task.
+	 * Moving to an idle socket will not create a bad imbalance.
+	 *
+	 * Otherwise check if the nodes are near enough in load to allow this
+	 * task to be woken on this_cpu's node.
+	 */
+	if (this_load.load > 0) {
+		unsigned long task_load = task_h_load(p);
+
+		this_eff_load = 100;
+		this_eff_load *= prev_load.compute_capacity;
+
+		prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
+		prev_eff_load *= this_load.compute_capacity;
+
+		this_eff_load *= this_load.load + task_load;
+		prev_eff_load *= prev_load.load - task_load;
+
+		return this_eff_load <= prev_eff_load;
+	}
+
+	return true;
+}
 #else
 static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 {
@@ -2596,6 +2651,15 @@ static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
 static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
 {
 }
+
+#ifdef CONFIG_SMP
+static inline bool numa_wake_affine(struct sched_domain *sd,
+				    struct task_struct *p, int this_cpu,
+				    int prev_cpu, int sync)
+{
+	return true;
+}
+#endif /* !SMP */
 #endif /* CONFIG_NUMA_BALANCING */
 
 static void
@@ -2916,12 +2980,12 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 	/*
 	 * Step 2: update *_avg.
 	 */
-	sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+	sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
 	if (cfs_rq) {
 		cfs_rq->runnable_load_avg =
-			div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
+			div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
 	}
-	sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
+	sa->util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib);
 
 	return 1;
 }
@@ -2982,8 +3046,7 @@ __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
  * differential update where we store the last value we propagated. This in
  * turn allows skipping updates if the differential is 'small'.
  *
- * Updating tg's load_avg is necessary before update_cfs_share() (which is
- * done) and effective_load() (which is not done because it is too costly).
+ * Updating tg's load_avg is necessary before update_cfs_share().
  */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 {
@@ -3563,7 +3626,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_runtime_enabled())  {
 		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
+			     "kernel parameter schedstats=enable or "
 			     "kernel.sched_schedstats=1\n");
 	}
 #endif
@@ -4642,24 +4705,43 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	hrtimer_cancel(&cfs_b->slack_timer);
 }
 
+/*
+ * Both these cpu hotplug callbacks race against unregister_fair_sched_group()
+ *
+ * The race is harmless, since modifying bandwidth settings of unhooked group
+ * bits doesn't do much.
+ */
+
+/* cpu online calback */
 static void __maybe_unused update_runtime_enabled(struct rq *rq)
 {
-	struct cfs_rq *cfs_rq;
+	struct task_group *tg;
 
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;
+	lockdep_assert_held(&rq->lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tg, &task_groups, list) {
+		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
 		raw_spin_lock(&cfs_b->lock);
 		cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
 		raw_spin_unlock(&cfs_b->lock);
 	}
+	rcu_read_unlock();
 }
 
+/* cpu offline callback */
 static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
-	struct cfs_rq *cfs_rq;
+	struct task_group *tg;
 
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	lockdep_assert_held(&rq->lock);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tg, &task_groups, list) {
+		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+
 		if (!cfs_rq->runtime_enabled)
 			continue;
 
@@ -4677,6 +4759,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 		if (cfs_rq_throttled(cfs_rq))
 			unthrottle_cfs_rq(cfs_rq);
 	}
+	rcu_read_unlock();
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
@@ -5215,126 +5298,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 	return 0;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * effective_load() calculates the load change as seen from the root_task_group
- *
- * Adding load to a group doesn't make a group heavier, but can cause movement
- * of group shares between cpus. Assuming the shares were perfectly aligned one
- * can calculate the shift in shares.
- *
- * Calculate the effective load difference if @wl is added (subtracted) to @tg
- * on this @cpu and results in a total addition (subtraction) of @wg to the
- * total group weight.
- *
- * Given a runqueue weight distribution (rw_i) we can compute a shares
- * distribution (s_i) using:
- *
- *   s_i = rw_i / \Sum rw_j						(1)
- *
- * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
- * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
- * shares distribution (s_i):
- *
- *   rw_i = {   2,   4,   1,   0 }
- *   s_i  = { 2/7, 4/7, 1/7,   0 }
- *
- * As per wake_affine() we're interested in the load of two CPUs (the CPU the
- * task used to run on and the CPU the waker is running on), we need to
- * compute the effect of waking a task on either CPU and, in case of a sync
- * wakeup, compute the effect of the current task going to sleep.
- *
- * So for a change of @wl to the local @cpu with an overall group weight change
- * of @wl we can compute the new shares distribution (s'_i) using:
- *
- *   s'_i = (rw_i + @wl) / (@wg + \Sum rw_j)				(2)
- *
- * Suppose we're interested in CPUs 0 and 1, and want to compute the load
- * differences in waking a task to CPU 0. The additional task changes the
- * weight and shares distributions like:
- *
- *   rw'_i = {   3,   4,   1,   0 }
- *   s'_i  = { 3/8, 4/8, 1/8,   0 }
- *
- * We can then compute the difference in effective weight by using:
- *
- *   dw_i = S * (s'_i - s_i)						(3)
- *
- * Where 'S' is the group weight as seen by its parent.
- *
- * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
- * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
- * 4/7) times the weight of the group.
- */
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
-	struct sched_entity *se = tg->se[cpu];
-
-	if (!tg->parent)	/* the trivial, non-cgroup case */
-		return wl;
-
-	for_each_sched_entity(se) {
-		struct cfs_rq *cfs_rq = se->my_q;
-		long W, w = cfs_rq_load_avg(cfs_rq);
-
-		tg = cfs_rq->tg;
-
-		/*
-		 * W = @wg + \Sum rw_j
-		 */
-		W = wg + atomic_long_read(&tg->load_avg);
-
-		/* Ensure \Sum rw_j >= rw_i */
-		W -= cfs_rq->tg_load_avg_contrib;
-		W += w;
-
-		/*
-		 * w = rw_i + @wl
-		 */
-		w += wl;
-
-		/*
-		 * wl = S * s'_i; see (2)
-		 */
-		if (W > 0 && w < W)
-			wl = (w * (long)scale_load_down(tg->shares)) / W;
-		else
-			wl = scale_load_down(tg->shares);
-
-		/*
-		 * Per the above, wl is the new se->load.weight value; since
-		 * those are clipped to [MIN_SHARES, ...) do so now. See
-		 * calc_cfs_shares().
-		 */
-		if (wl < MIN_SHARES)
-			wl = MIN_SHARES;
-
-		/*
-		 * wl = dw_i = S * (s'_i - s_i); see (3)
-		 */
-		wl -= se->avg.load_avg;
-
-		/*
-		 * Recursively apply this logic to all parent groups to compute
-		 * the final effective load change on the root group. Since
-		 * only the @tg group gets extra weight, all parent groups can
-		 * only redistribute existing shares. @wl is the shift in shares
-		 * resulting from this level per the above.
-		 */
-		wg = 0;
-	}
-
-	return wl;
-}
-#else
-
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
-	return wl;
-}
-
-#endif
-
 static void record_wakee(struct task_struct *p)
 {
 	/*
@@ -5385,67 +5348,25 @@ static int wake_wide(struct task_struct *p)
 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 		       int prev_cpu, int sync)
 {
-	s64 this_load, load;
-	s64 this_eff_load, prev_eff_load;
-	int idx, this_cpu;
-	struct task_group *tg;
-	unsigned long weight;
-	int balanced;
-
-	idx	  = sd->wake_idx;
-	this_cpu  = smp_processor_id();
-	load	  = source_load(prev_cpu, idx);
-	this_load = target_load(this_cpu, idx);
+	int this_cpu = smp_processor_id();
+	bool affine = false;
 
 	/*
-	 * If sync wakeup then subtract the (maximum possible)
-	 * effect of the currently running task from the load
-	 * of the current CPU:
+	 * Common case: CPUs are in the same socket, and select_idle_sibling()
+	 * will do its thing regardless of what we return:
 	 */
-	if (sync) {
-		tg = task_group(current);
-		weight = current->se.avg.load_avg;
-
-		this_load += effective_load(tg, this_cpu, -weight, -weight);
-		load += effective_load(tg, prev_cpu, 0, -weight);
-	}
-
-	tg = task_group(p);
-	weight = p->se.avg.load_avg;
-
-	/*
-	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
-	 * due to the sync cause above having dropped this_load to 0, we'll
-	 * always have an imbalance, but there's really nothing you can do
-	 * about that, so that's good too.
-	 *
-	 * Otherwise check if either cpus are near enough in load to allow this
-	 * task to be woken on this_cpu.
-	 */
-	this_eff_load = 100;
-	this_eff_load *= capacity_of(prev_cpu);
-
-	prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
-	prev_eff_load *= capacity_of(this_cpu);
-
-	if (this_load > 0) {
-		this_eff_load *= this_load +
-			effective_load(tg, this_cpu, weight, weight);
-
-		prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
-	}
-
-	balanced = this_eff_load <= prev_eff_load;
+	if (cpus_share_cache(prev_cpu, this_cpu))
+		affine = true;
+	else
+		affine = numa_wake_affine(sd, p, this_cpu, prev_cpu, sync);
 
 	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
+	if (affine) {
+		schedstat_inc(sd->ttwu_move_affine);
+		schedstat_inc(p->se.statistics.nr_wakeups_affine);
+	}
 
-	if (!balanced)
-		return 0;
-
-	schedstat_inc(sd->ttwu_move_affine);
-	schedstat_inc(p->se.statistics.nr_wakeups_affine);
-
-	return 1;
+	return affine;
 }
 
 static inline int task_util(struct task_struct *p);
@@ -5484,12 +5405,12 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpumask_intersects(sched_group_cpus(group),
+		if (!cpumask_intersects(sched_group_span(group),
 					&p->cpus_allowed))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
-					       sched_group_cpus(group));
+					       sched_group_span(group));
 
 		/*
 		 * Tally up the load of all CPUs in the group and find
@@ -5499,7 +5420,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		runnable_load = 0;
 		max_spare_cap = 0;
 
-		for_each_cpu(i, sched_group_cpus(group)) {
+		for_each_cpu(i, sched_group_span(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -5602,10 +5523,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 
 	/* Check if we have any choice: */
 	if (group->group_weight == 1)
-		return cpumask_first(sched_group_cpus(group));
+		return cpumask_first(sched_group_span(group));
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
@@ -5640,43 +5561,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
-/*
- * Implement a for_each_cpu() variant that starts the scan at a given cpu
- * (@start), and wraps around.
- *
- * This is used to scan for idle CPUs; such that not all CPUs looking for an
- * idle CPU find the same CPU. The down-side is that tasks tend to cycle
- * through the LLC domain.
- *
- * Especially tbench is found sensitive to this.
- */
-
-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
-{
-	int next;
-
-again:
-	next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
-
-	if (*wrapped) {
-		if (next >= start)
-			return nr_cpumask_bits;
-	} else {
-		if (next >= nr_cpumask_bits) {
-			*wrapped = 1;
-			n = -1;
-			goto again;
-		}
-	}
-
-	return next;
-}
-
-#define for_each_cpu_wrap(cpu, mask, start, wrap)				\
-	for ((wrap) = 0, (cpu) = (start)-1;					\
-		(cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)),	\
-		(cpu) < nr_cpumask_bits; )
-
 #ifdef CONFIG_SCHED_SMT
 
 static inline void set_idle_cores(int cpu, int val)
@@ -5736,7 +5620,7 @@ void __update_idle_core(struct rq *rq)
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu, wrap;
+	int core, cpu;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -5746,7 +5630,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 
 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
-	for_each_cpu_wrap(core, cpus, target, wrap) {
+	for_each_cpu_wrap(core, cpus, target) {
 		bool idle = true;
 
 		for_each_cpu(cpu, cpu_smt_mask(core)) {
@@ -5809,27 +5693,38 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct sched_domain *this_sd;
-	u64 avg_cost, avg_idle = this_rq()->avg_idle;
+	u64 avg_cost, avg_idle;
 	u64 time, cost;
 	s64 delta;
-	int cpu, wrap;
+	int cpu, nr = INT_MAX;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
 		return -1;
 
-	avg_cost = this_sd->avg_scan_cost;
-
 	/*
 	 * Due to large variance we need a large fuzz factor; hackbench in
 	 * particularly is sensitive here.
 	 */
-	if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
+	avg_idle = this_rq()->avg_idle / 512;
+	avg_cost = this_sd->avg_scan_cost + 1;
+
+	if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost)
 		return -1;
 
+	if (sched_feat(SIS_PROP)) {
+		u64 span_avg = sd->span_weight * avg_idle;
+		if (span_avg > 4*avg_cost)
+			nr = div_u64(span_avg, avg_cost);
+		else
+			nr = 4;
+	}
+
 	time = local_clock();
 
-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+		if (!--nr)
+			return -1;
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
@@ -6011,11 +5906,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 
 	if (affine_sd) {
 		sd = NULL; /* Prefer wake_affine over balance flags */
-		if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
+		if (cpu == prev_cpu)
+			goto pick_cpu;
+
+		if (wake_affine(affine_sd, p, prev_cpu, sync))
 			new_cpu = cpu;
 	}
 
 	if (!sd) {
+ pick_cpu:
 		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
@@ -6168,8 +6067,11 @@ static void set_last_buddy(struct sched_entity *se)
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se)
+	for_each_sched_entity(se) {
+		if (SCHED_WARN_ON(!se->on_rq))
+			return;
 		cfs_rq_of(se)->last = se;
+	}
 }
 
 static void set_next_buddy(struct sched_entity *se)
@@ -6177,8 +6079,11 @@ static void set_next_buddy(struct sched_entity *se)
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se)
+	for_each_sched_entity(se) {
+		if (SCHED_WARN_ON(!se->on_rq))
+			return;
 		cfs_rq_of(se)->next = se;
+	}
 }
 
 static void set_skip_buddy(struct sched_entity *se)
@@ -6686,6 +6591,10 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	if (dst_nid == p->numa_preferred_nid)
 		return 0;
 
+	/* Leaving a core idle is often worse than degrading locality. */
+	if (env->idle != CPU_NOT_IDLE)
+		return -1;
+
 	if (numa_group) {
 		src_faults = group_faults(p, src_nid);
 		dst_faults = group_faults(p, dst_nid);
@@ -6970,10 +6879,28 @@ static void attach_tasks(struct lb_env *env)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->runnable_load_sum)
+		return false;
+
+	return true;
+}
+
 static void update_blocked_averages(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;
 	struct rq_flags rf;
 
 	rq_lock_irqsave(rq, &rf);
@@ -6983,7 +6910,7 @@ static void update_blocked_averages(int cpu)
 	 * Iterates the task_group tree in a bottom up fashion, see
 	 * list_add_leaf_cfs_rq() for details.
 	 */
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
+	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
 		struct sched_entity *se;
 
 		/* throttled entities do not contribute to load */
@@ -6997,6 +6924,13 @@ static void update_blocked_averages(int cpu)
 		se = cfs_rq->tg->se[cpu];
 		if (se && !skip_blocked_update(se))
 			update_load_avg(se, 0);
+
+		/*
+		 * There can be a lot of idle CPU cgroups.  Don't let fully
+		 * decayed cfs_rqs linger on the list.
+		 */
+		if (cfs_rq_is_decayed(cfs_rq))
+			list_del_leaf_cfs_rq(cfs_rq);
 	}
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -7229,7 +7163,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 		 * span the current group.
 		 */
 
-		for_each_cpu(cpu, sched_group_cpus(sdg)) {
+		for_each_cpu(cpu, sched_group_span(sdg)) {
 			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
@@ -7408,7 +7342,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 	memset(sgs, 0, sizeof(*sgs));
 
-	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
+	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 		struct rq *rq = cpu_rq(i);
 
 		/* Bias balancing toward cpus of our domain */
@@ -7572,7 +7506,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		struct sg_lb_stats *sgs = &tmp_sgs;
 		int local_group;
 
-		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
+		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(sg));
 		if (local_group) {
 			sds->local = sg;
 			sgs = local;
@@ -7927,7 +7861,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 	unsigned long busiest_load = 0, busiest_capacity = 1;
 	int i;
 
-	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
+	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 		unsigned long capacity, wl;
 		enum fbq_type rt;
 
@@ -8033,7 +7967,6 @@ static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
-	struct cpumask *sg_cpus, *sg_mask;
 	int cpu, balance_cpu = -1;
 
 	/*
@@ -8043,11 +7976,9 @@ static int should_we_balance(struct lb_env *env)
 	if (env->idle == CPU_NEWLY_IDLE)
 		return 1;
 
-	sg_cpus = sched_group_cpus(sg);
-	sg_mask = sched_group_mask(sg);
 	/* Try to find first idle cpu */
-	for_each_cpu_and(cpu, sg_cpus, env->cpus) {
-		if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+	for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
+		if (!idle_cpu(cpu))
 			continue;
 
 		balance_cpu = cpu;
@@ -8083,7 +8014,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.sd		= sd,
 		.dst_cpu	= this_cpu,
 		.dst_rq		= this_rq,
-		.dst_grpmask    = sched_group_cpus(sd->groups),
+		.dst_grpmask    = sched_group_span(sd->groups),
 		.idle		= idle,
 		.loop_break	= sched_nr_migrate_break,
 		.cpus		= cpus,
@@ -8659,6 +8590,10 @@ void nohz_balance_enter_idle(int cpu)
 	if (!cpu_active(cpu))
 		return;
 
+	/* Spare idle load balancing on CPUs that don't want to be disturbed: */
+	if (!is_housekeeping_cpu(cpu))
+		return;
+
 	if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
 		return;
 
@@ -9523,10 +9458,10 @@ const struct sched_class fair_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 void print_cfs_stats(struct seq_file *m, int cpu)
 {
-	struct cfs_rq *cfs_rq;
+	struct cfs_rq *cfs_rq, *pos;
 
 	rcu_read_lock();
-	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+	for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
 		print_cfs_rq(m, cpu, cfs_rq);
 	rcu_read_unlock();
 }

diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 11192e0..d3fb155 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h

@@ -55,6 +55,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
  */
 SCHED_FEAT(SIS_AVG_CPU, false)
+SCHED_FEAT(SIS_PROP, true)
 
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls
@@ -76,7 +77,6 @@ SCHED_FEAT(WARN_DOUBLE_CLOCK, false)
 SCHED_FEAT(RT_PUSH_IPI, true)
 #endif
 
-SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index ef63adc..6c23e30 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c

@@ -219,6 +219,7 @@ static void do_idle(void)
 	 */
 
 	__current_set_polling();
+	quiet_vmstat();
 	tick_nohz_idle_enter();
 
 	while (!need_resched()) {

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index f15fb2b..f14716a 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c

@@ -117,7 +117,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  * load-average relies on per-cpu sampling from the tick, it is affected by
  * NO_HZ.
  *
- * The basic idea is to fold the nr_active delta into a global idle-delta upon
+ * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
  * entering NO_HZ state such that we can include this as an 'extra' cpu delta
  * when we read the global state.
  *
@@ -126,7 +126,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *  - When we go NO_HZ idle during the window, we can negate our sample
  *    contribution, causing under-accounting.
  *
- *    We avoid this by keeping two idle-delta counters and flipping them
+ *    We avoid this by keeping two NO_HZ-delta counters and flipping them
  *    when the window starts, thus separating old and new NO_HZ load.
  *
  *    The only trick is the slight shift in index flip for read vs write.
@@ -137,22 +137,22 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  *    r:0 0 1           1 0           0 1           1 0
  *    w:0 1 1           0 0           1 1           0 0
  *
- *    This ensures we'll fold the old idle contribution in this window while
+ *    This ensures we'll fold the old NO_HZ contribution in this window while
  *    accumlating the new one.
  *
- *  - When we wake up from NO_HZ idle during the window, we push up our
+ *  - When we wake up from NO_HZ during the window, we push up our
  *    contribution, since we effectively move our sample point to a known
  *    busy state.
  *
  *    This is solved by pushing the window forward, and thus skipping the
- *    sample, for this cpu (effectively using the idle-delta for this cpu which
+ *    sample, for this cpu (effectively using the NO_HZ-delta for this cpu which
  *    was in effect at the time the window opened). This also solves the issue
- *    of having to deal with a cpu having been in NOHZ idle for multiple
- *    LOAD_FREQ intervals.
+ *    of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ
+ *    intervals.
  *
  * When making the ILB scale, we should try to pull this in as well.
  */
-static atomic_long_t calc_load_idle[2];
+static atomic_long_t calc_load_nohz[2];
 static int calc_load_idx;
 
 static inline int calc_load_write_idx(void)
@@ -167,7 +167,7 @@ static inline int calc_load_write_idx(void)
 
 	/*
 	 * If the folding window started, make sure we start writing in the
-	 * next idle-delta.
+	 * next NO_HZ-delta.
 	 */
 	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
 		idx++;
@@ -180,24 +180,24 @@ static inline int calc_load_read_idx(void)
 	return calc_load_idx & 1;
 }
 
-void calc_load_enter_idle(void)
+void calc_load_nohz_start(void)
 {
 	struct rq *this_rq = this_rq();
 	long delta;
 
 	/*
-	 * We're going into NOHZ mode, if there's any pending delta, fold it
-	 * into the pending idle delta.
+	 * We're going into NO_HZ mode, if there's any pending delta, fold it
+	 * into the pending NO_HZ delta.
 	 */
 	delta = calc_load_fold_active(this_rq, 0);
 	if (delta) {
 		int idx = calc_load_write_idx();
 
-		atomic_long_add(delta, &calc_load_idle[idx]);
+		atomic_long_add(delta, &calc_load_nohz[idx]);
 	}
 }
 
-void calc_load_exit_idle(void)
+void calc_load_nohz_stop(void)
 {
 	struct rq *this_rq = this_rq();
 
@@ -217,13 +217,13 @@ void calc_load_exit_idle(void)
 		this_rq->calc_load_update += LOAD_FREQ;
 }
 
-static long calc_load_fold_idle(void)
+static long calc_load_nohz_fold(void)
 {
 	int idx = calc_load_read_idx();
 	long delta = 0;
 
-	if (atomic_long_read(&calc_load_idle[idx]))
-		delta = atomic_long_xchg(&calc_load_idle[idx], 0);
+	if (atomic_long_read(&calc_load_nohz[idx]))
+		delta = atomic_long_xchg(&calc_load_nohz[idx], 0);
 
 	return delta;
 }
@@ -299,9 +299,9 @@ calc_load_n(unsigned long load, unsigned long exp,
 
 /*
  * NO_HZ can leave us missing all per-cpu ticks calling
- * calc_load_account_active(), but since an idle CPU folds its delta into
- * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
- * in the pending idle delta if our idle period crossed a load cycle boundary.
+ * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
+ * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
+ * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
  *
  * Once we've updated the global active value, we need to apply the exponential
  * weights adjusted to the number of cycles missed.
@@ -330,7 +330,7 @@ static void calc_global_nohz(void)
 	}
 
 	/*
-	 * Flip the idle index...
+	 * Flip the NO_HZ index...
 	 *
 	 * Make sure we first write the new time then flip the index, so that
 	 * calc_load_write_idx() will see the new time when it reads the new
@@ -341,7 +341,7 @@ static void calc_global_nohz(void)
 }
 #else /* !CONFIG_NO_HZ_COMMON */
 
-static inline long calc_load_fold_idle(void) { return 0; }
+static inline long calc_load_nohz_fold(void) { return 0; }
 static inline void calc_global_nohz(void) { }
 
 #endif /* CONFIG_NO_HZ_COMMON */
@@ -362,9 +362,9 @@ void calc_global_load(unsigned long ticks)
 		return;
 
 	/*
-	 * Fold the 'old' idle-delta to include all NO_HZ cpus.
+	 * Fold the 'old' NO_HZ-delta to include all NO_HZ cpus.
 	 */
-	delta = calc_load_fold_idle();
+	delta = calc_load_nohz_fold();
 	if (delta)
 		atomic_long_add(delta, &calc_load_tasks);
 
@@ -378,7 +378,8 @@ void calc_global_load(unsigned long ticks)
 	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
 	/*
-	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
+	 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
+	 * catch up in bulk.
 	 */
 	calc_global_nohz();
 }

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 979b734..45caf93 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -840,6 +840,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 		struct rq *rq = rq_of_rt_rq(rt_rq);
+		int skip;
+
+		/*
+		 * When span == cpu_online_mask, taking each rq->lock
+		 * can be time-consuming. Try to avoid it when possible.
+		 */
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+		if (skip)
+			continue;
 
 		raw_spin_lock(&rq->lock);
 		if (rt_rq->rt_time) {
@@ -1819,7 +1830,7 @@ static int push_rt_task(struct rq *rq)
 		 * pushing.
 		 */
 		task = pick_next_pushable_task(rq);
-		if (task_cpu(next_task) == rq->cpu && task == next_task) {
+		if (task == next_task) {
 			/*
 			 * The task hasn't migrated, and is still the next
 			 * eligible task, but we failed to find a run-queue
@@ -2438,6 +2449,316 @@ const struct sched_class rt_sched_class = {
 	.update_curr		= update_curr_rt,
 };
 
+#ifdef CONFIG_RT_GROUP_SCHED
+/*
+ * Ensure that the real time constraints are schedulable.
+ */
+static DEFINE_MUTEX(rt_constraints_mutex);
+
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
+{
+	struct task_struct *g, *p;
+
+	/*
+	 * Autogroups do not have RT tasks; see autogroup_create().
+	 */
+	if (task_group_is_autogroup(tg))
+		return 0;
+
+	for_each_process_thread(g, p) {
+		if (rt_task(p) && task_group(p) == tg)
+			return 1;
+	}
+
+	return 0;
+}
+
+struct rt_schedulable_data {
+	struct task_group *tg;
+	u64 rt_period;
+	u64 rt_runtime;
+};
+
+static int tg_rt_schedulable(struct task_group *tg, void *data)
+{
+	struct rt_schedulable_data *d = data;
+	struct task_group *child;
+	unsigned long total, sum = 0;
+	u64 period, runtime;
+
+	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	runtime = tg->rt_bandwidth.rt_runtime;
+
+	if (tg == d->tg) {
+		period = d->rt_period;
+		runtime = d->rt_runtime;
+	}
+
+	/*
+	 * Cannot have more runtime than the period.
+	 */
+	if (runtime > period && runtime != RUNTIME_INF)
+		return -EINVAL;
+
+	/*
+	 * Ensure we don't starve existing RT tasks.
+	 */
+	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+		return -EBUSY;
+
+	total = to_ratio(period, runtime);
+
+	/*
+	 * Nobody can have more than the global setting allows.
+	 */
+	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+		return -EINVAL;
+
+	/*
+	 * The sum of our children's runtime should not exceed our own.
+	 */
+	list_for_each_entry_rcu(child, &tg->children, siblings) {
+		period = ktime_to_ns(child->rt_bandwidth.rt_period);
+		runtime = child->rt_bandwidth.rt_runtime;
+
+		if (child == d->tg) {
+			period = d->rt_period;
+			runtime = d->rt_runtime;
+		}
+
+		sum += to_ratio(period, runtime);
+	}
+
+	if (sum > total)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+{
+	int ret;
+
+	struct rt_schedulable_data data = {
+		.tg = tg,
+		.rt_period = period,
+		.rt_runtime = runtime,
+	};
+
+	rcu_read_lock();
+	ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int tg_set_rt_bandwidth(struct task_group *tg,
+		u64 rt_period, u64 rt_runtime)
+{
+	int i, err = 0;
+
+	/*
+	 * Disallowing the root group RT runtime is BAD, it would disallow the
+	 * kernel creating (and or operating) RT threads.
+	 */
+	if (tg == &root_task_group && rt_runtime == 0)
+		return -EINVAL;
+
+	/* No period doesn't make any sense. */
+	if (rt_period == 0)
+		return -EINVAL;
+
+	mutex_lock(&rt_constraints_mutex);
+	read_lock(&tasklist_lock);
+	err = __rt_schedulable(tg, rt_period, rt_runtime);
+	if (err)
+		goto unlock;
+
+	raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
+	tg->rt_bandwidth.rt_runtime = rt_runtime;
+
+	for_each_possible_cpu(i) {
+		struct rt_rq *rt_rq = tg->rt_rq[i];
+
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		rt_rq->rt_runtime = rt_runtime;
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+	}
+	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
+unlock:
+	read_unlock(&tasklist_lock);
+	mutex_unlock(&rt_constraints_mutex);
+
+	return err;
+}
+
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
+{
+	u64 rt_runtime, rt_period;
+
+	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
+	if (rt_runtime_us < 0)
+		rt_runtime = RUNTIME_INF;
+
+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
+}
+
+long sched_group_rt_runtime(struct task_group *tg)
+{
+	u64 rt_runtime_us;
+
+	if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
+		return -1;
+
+	rt_runtime_us = tg->rt_bandwidth.rt_runtime;
+	do_div(rt_runtime_us, NSEC_PER_USEC);
+	return rt_runtime_us;
+}
+
+int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
+{
+	u64 rt_runtime, rt_period;
+
+	rt_period = rt_period_us * NSEC_PER_USEC;
+	rt_runtime = tg->rt_bandwidth.rt_runtime;
+
+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
+}
+
+long sched_group_rt_period(struct task_group *tg)
+{
+	u64 rt_period_us;
+
+	rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	do_div(rt_period_us, NSEC_PER_USEC);
+	return rt_period_us;
+}
+
+static int sched_rt_global_constraints(void)
+{
+	int ret = 0;
+
+	mutex_lock(&rt_constraints_mutex);
+	read_lock(&tasklist_lock);
+	ret = __rt_schedulable(NULL, 0, 0);
+	read_unlock(&tasklist_lock);
+	mutex_unlock(&rt_constraints_mutex);
+
+	return ret;
+}
+
+int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
+{
+	/* Don't accept realtime tasks when there is no way for them to run */
+	if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
+		return 0;
+
+	return 1;
+}
+
+#else /* !CONFIG_RT_GROUP_SCHED */
+static int sched_rt_global_constraints(void)
+{
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
+	for_each_possible_cpu(i) {
+		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
+
+		raw_spin_lock(&rt_rq->rt_runtime_lock);
+		rt_rq->rt_runtime = global_rt_runtime();
+		raw_spin_unlock(&rt_rq->rt_runtime_lock);
+	}
+	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
+
+	return 0;
+}
+#endif /* CONFIG_RT_GROUP_SCHED */
+
+static int sched_rt_global_validate(void)
+{
+	if (sysctl_sched_rt_period <= 0)
+		return -EINVAL;
+
+	if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
+		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void sched_rt_do_global(void)
+{
+	def_rt_bandwidth.rt_runtime = global_rt_runtime();
+	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
+}
+
+int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int old_period, old_runtime;
+	static DEFINE_MUTEX(mutex);
+	int ret;
+
+	mutex_lock(&mutex);
+	old_period = sysctl_sched_rt_period;
+	old_runtime = sysctl_sched_rt_runtime;
+
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	if (!ret && write) {
+		ret = sched_rt_global_validate();
+		if (ret)
+			goto undo;
+
+		ret = sched_dl_global_validate();
+		if (ret)
+			goto undo;
+
+		ret = sched_rt_global_constraints();
+		if (ret)
+			goto undo;
+
+		sched_rt_do_global();
+		sched_dl_do_global();
+	}
+	if (0) {
+undo:
+		sysctl_sched_rt_period = old_period;
+		sysctl_sched_rt_runtime = old_runtime;
+	}
+	mutex_unlock(&mutex);
+
+	return ret;
+}
+
+int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	/*
+	 * Make sure that internally we keep jiffies.
+	 * Also, writing zero resets the timeslice to default:
+	 */
+	if (!ret && write) {
+		sched_rr_timeslice =
+			sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
+			msecs_to_jiffies(sysctl_sched_rr_timeslice);
+	}
+	mutex_unlock(&mutex);
+	return ret;
+}
+
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6dda2aa..eeef1a3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -39,9 +39,9 @@
 #include "cpuacct.h"
 
 #ifdef CONFIG_SCHED_DEBUG
-#define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
 #else
-#define SCHED_WARN_ON(x)	((void)(x))
+# define SCHED_WARN_ON(x)	({ (void)(x), 0; })
 #endif
 
 struct rq;
@@ -218,23 +218,25 @@ static inline int dl_bandwidth_enabled(void)
 	return sysctl_sched_rt_runtime >= 0;
 }
 
-extern struct dl_bw *dl_bw_of(int i);
-
 struct dl_bw {
 	raw_spinlock_t lock;
 	u64 bw, total_bw;
 };
 
+static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
+
 static inline
-void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
 {
 	dl_b->total_bw -= tsk_bw;
+	__dl_update(dl_b, (s32)tsk_bw / cpus);
 }
 
 static inline
-void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
 {
 	dl_b->total_bw += tsk_bw;
+	__dl_update(dl_b, -((s32)tsk_bw / cpus));
 }
 
 static inline
@@ -244,7 +246,22 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
 	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
 }
 
+void dl_change_utilization(struct task_struct *p, u64 new_bw);
 extern void init_dl_bw(struct dl_bw *dl_b);
+extern int sched_dl_global_validate(void);
+extern void sched_dl_do_global(void);
+extern int sched_dl_overflow(struct task_struct *p, int policy,
+			     const struct sched_attr *attr);
+extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
+extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
+extern bool __checkparam_dl(const struct sched_attr *attr);
+extern void __dl_clear_params(struct task_struct *p);
+extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
+extern int dl_task_can_attach(struct task_struct *p,
+			      const struct cpumask *cs_cpus_allowed);
+extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
+					const struct cpumask *trial);
+extern bool dl_cpu_busy(unsigned int cpu);
 
 #ifdef CONFIG_CGROUP_SCHED
 
@@ -366,6 +383,11 @@ extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 		struct sched_rt_entity *rt_se, int cpu,
 		struct sched_rt_entity *parent);
+extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
+extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
+extern long sched_group_rt_period(struct task_group *tg);
+extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
 
 extern struct task_group *sched_create_group(struct task_group *parent);
 extern void sched_online_group(struct task_group *tg,
@@ -558,6 +580,30 @@ struct dl_rq {
 #else
 	struct dl_bw dl_bw;
 #endif
+	/*
+	 * "Active utilization" for this runqueue: increased when a
+	 * task wakes up (becomes TASK_RUNNING) and decreased when a
+	 * task blocks
+	 */
+	u64 running_bw;
+
+	/*
+	 * Utilization of the tasks "assigned" to this runqueue (including
+	 * the tasks that are in runqueue and the tasks that executed on this
+	 * CPU and blocked). Increased when a task moves to this runqueue, and
+	 * decreased when the task moves away (migrates, changes scheduling
+	 * policy, or terminates).
+	 * This is needed to compute the "inactive utilization" for the
+	 * runqueue (inactive utilization = this_bw - running_bw).
+	 */
+	u64 this_bw;
+	u64 extra_bw;
+
+	/*
+	 * Inverse of the fraction of CPU utilization that can be reclaimed
+	 * by the GRUB algorithm.
+	 */
+	u64 bw_ratio;
 };
 
 #ifdef CONFIG_SMP
@@ -606,11 +652,9 @@ struct root_domain {
 
 extern struct root_domain def_root_domain;
 extern struct mutex sched_domains_mutex;
-extern cpumask_var_t fallback_doms;
-extern cpumask_var_t sched_domains_tmpmask;
 
 extern void init_defrootdomain(void);
-extern int init_sched_domains(const struct cpumask *cpu_map);
+extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
 
 #endif /* CONFIG_SMP */
@@ -1025,7 +1069,11 @@ struct sched_group_capacity {
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
 
-	unsigned long cpumask[0]; /* iteration mask */
+#ifdef CONFIG_SCHED_DEBUG
+	int id;
+#endif
+
+	unsigned long cpumask[0]; /* balance mask */
 };
 
 struct sched_group {
@@ -1046,16 +1094,15 @@ struct sched_group {
 	unsigned long cpumask[0];
 };
 
-static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+static inline struct cpumask *sched_group_span(struct sched_group *sg)
 {
 	return to_cpumask(sg->cpumask);
 }
 
 /*
- * cpumask masking which cpus in the group are allowed to iterate up the domain
- * tree.
+ * See build_balance_mask().
  */
-static inline struct cpumask *sched_group_mask(struct sched_group *sg)
+static inline struct cpumask *group_balance_mask(struct sched_group *sg)
 {
 	return to_cpumask(sg->sgc->cpumask);
 }
@@ -1066,7 +1113,7 @@ static inline struct cpumask *sched_group_mask(struct sched_group *sg)
  */
 static inline unsigned int group_first_cpu(struct sched_group *group)
 {
-	return cpumask_first(sched_group_cpus(group));
+	return cpumask_first(sched_group_span(group));
 }
 
 extern int group_balance_cpu(struct sched_group *sg);
@@ -1422,7 +1469,11 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
 	curr->sched_class->set_curr_task(rq);
 }
 
+#ifdef CONFIG_SMP
 #define sched_class_highest (&stop_sched_class)
+#else
+#define sched_class_highest (&dl_sched_class)
+#endif
 #define for_each_class(class) \
    for (class = sched_class_highest; class; class = class->next)
 
@@ -1486,7 +1537,12 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
+extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
 
+#define BW_SHIFT	20
+#define BW_UNIT		(1 << BW_SHIFT)
+#define RATIO_SHIFT	8
 unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
@@ -1928,6 +1984,33 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
 static inline void nohz_balance_exit_idle(unsigned int cpu) { }
 #endif
 
+
+#ifdef CONFIG_SMP
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+	struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
+	int i;
+
+	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+			 "sched RCU must be held");
+	for_each_cpu_and(i, rd->span, cpu_active_mask) {
+		struct rq *rq = cpu_rq(i);
+
+		rq->dl.extra_bw += bw;
+	}
+}
+#else
+static inline
+void __dl_update(struct dl_bw *dl_b, s64 bw)
+{
+	struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
+
+	dl->extra_bw += bw;
+}
+#endif
+
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 struct irqtime {
 	u64			total;

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 1b0b4fb..79895ae 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c

@@ -10,6 +10,7 @@ DEFINE_MUTEX(sched_domains_mutex);
 
 /* Protected by sched_domains_mutex: */
 cpumask_var_t sched_domains_tmpmask;
+cpumask_var_t sched_domains_tmpmask2;
 
 #ifdef CONFIG_SCHED_DEBUG
 
@@ -35,7 +36,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 	cpumask_clear(groupmask);
 
-	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
+	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
 
 	if (!(sd->flags & SD_LOAD_BALANCE)) {
 		printk("does not load-balance\n");
@@ -45,14 +46,14 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		return -1;
 	}
 
-	printk(KERN_CONT "span %*pbl level %s\n",
+	printk(KERN_CONT "span=%*pbl level=%s\n",
 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);
 
 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
+	if (!cpumask_test_cpu(cpu, sched_group_span(group))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -65,29 +66,47 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!cpumask_weight(sched_group_cpus(group))) {
+		if (!cpumask_weight(sched_group_span(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
 		if (!(sd->flags & SD_OVERLAP) &&
-		    cpumask_intersects(groupmask, sched_group_cpus(group))) {
+		    cpumask_intersects(groupmask, sched_group_span(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
+		cpumask_or(groupmask, groupmask, sched_group_span(group));
 
-		printk(KERN_CONT " %*pbl",
-		       cpumask_pr_args(sched_group_cpus(group)));
-		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-			printk(KERN_CONT " (cpu_capacity = %lu)",
-				group->sgc->capacity);
+		printk(KERN_CONT " %d:{ span=%*pbl",
+				group->sgc->id,
+				cpumask_pr_args(sched_group_span(group)));
+
+		if ((sd->flags & SD_OVERLAP) &&
+		    !cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
+			printk(KERN_CONT " mask=%*pbl",
+				cpumask_pr_args(group_balance_mask(group)));
 		}
 
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
+			printk(KERN_CONT " cap=%lu", group->sgc->capacity);
+
+		if (group == sd->groups && sd->child &&
+		    !cpumask_equal(sched_domain_span(sd->child),
+				   sched_group_span(group))) {
+			printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
+		}
+
+		printk(KERN_CONT " }");
+
 		group = group->next;
+
+		if (group != sd->groups)
+			printk(KERN_CONT ",");
+
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
@@ -113,7 +132,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 		return;
 	}
 
-	printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
+	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
 
 	for (;;) {
 		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
@@ -477,46 +496,214 @@ enum s_alloc {
 };
 
 /*
- * Build an iteration mask that can exclude certain CPUs from the upwards
- * domain traversal.
+ * Return the canonical balance CPU for this group, this is the first CPU
+ * of this group that's also in the balance mask.
  *
- * Asymmetric node setups can result in situations where the domain tree is of
- * unequal depth, make sure to skip domains that already cover the entire
- * range.
+ * The balance mask are all those CPUs that could actually end up at this
+ * group. See build_balance_mask().
  *
- * In that case build_sched_domains() will have terminated the iteration early
- * and our sibling sd spans will be empty. Domains should always include the
- * CPU they're built on, so check that.
+ * Also see should_we_balance().
  */
-static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
+int group_balance_cpu(struct sched_group *sg)
 {
-	const struct cpumask *span = sched_domain_span(sd);
+	return cpumask_first(group_balance_mask(sg));
+}
+
+
+/*
+ * NUMA topology (first read the regular topology blurb below)
+ *
+ * Given a node-distance table, for example:
+ *
+ *   node   0   1   2   3
+ *     0:  10  20  30  20
+ *     1:  20  10  20  30
+ *     2:  30  20  10  20
+ *     3:  20  30  20  10
+ *
+ * which represents a 4 node ring topology like:
+ *
+ *   0 ----- 1
+ *   |       |
+ *   |       |
+ *   |       |
+ *   3 ----- 2
+ *
+ * We want to construct domains and groups to represent this. The way we go
+ * about doing this is to build the domains on 'hops'. For each NUMA level we
+ * construct the mask of all nodes reachable in @level hops.
+ *
+ * For the above NUMA topology that gives 3 levels:
+ *
+ * NUMA-2	0-3		0-3		0-3		0-3
+ *  groups:	{0-1,3},{1-3}	{0-2},{0,2-3}	{1-3},{0-1,3}	{0,2-3},{0-2}
+ *
+ * NUMA-1	0-1,3		0-2		1-3		0,2-3
+ *  groups:	{0},{1},{3}	{0},{1},{2}	{1},{2},{3}	{0},{2},{3}
+ *
+ * NUMA-0	0		1		2		3
+ *
+ *
+ * As can be seen; things don't nicely line up as with the regular topology.
+ * When we iterate a domain in child domain chunks some nodes can be
+ * represented multiple times -- hence the "overlap" naming for this part of
+ * the topology.
+ *
+ * In order to minimize this overlap, we only build enough groups to cover the
+ * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
+ *
+ * Because:
+ *
+ *  - the first group of each domain is its child domain; this
+ *    gets us the first 0-1,3
+ *  - the only uncovered node is 2, who's child domain is 1-3.
+ *
+ * However, because of the overlap, computing a unique CPU for each group is
+ * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
+ * groups include the CPUs of Node-0, while those CPUs would not in fact ever
+ * end up at those groups (they would end up in group: 0-1,3).
+ *
+ * To correct this we have to introduce the group balance mask. This mask
+ * will contain those CPUs in the group that can reach this group given the
+ * (child) domain tree.
+ *
+ * With this we can once again compute balance_cpu and sched_group_capacity
+ * relations.
+ *
+ * XXX include words on how balance_cpu is unique and therefore can be
+ * used for sched_group_capacity links.
+ *
+ *
+ * Another 'interesting' topology is:
+ *
+ *   node   0   1   2   3
+ *     0:  10  20  20  30
+ *     1:  20  10  20  20
+ *     2:  20  20  10  20
+ *     3:  30  20  20  10
+ *
+ * Which looks a little like:
+ *
+ *   0 ----- 1
+ *   |     / |
+ *   |   /   |
+ *   | /     |
+ *   2 ----- 3
+ *
+ * This topology is asymmetric, nodes 1,2 are fully connected, but nodes 0,3
+ * are not.
+ *
+ * This leads to a few particularly weird cases where the sched_domain's are
+ * not of the same number for each cpu. Consider:
+ *
+ * NUMA-2	0-3						0-3
+ *  groups:	{0-2},{1-3}					{1-3},{0-2}
+ *
+ * NUMA-1	0-2		0-3		0-3		1-3
+ *
+ * NUMA-0	0		1		2		3
+ *
+ */
+
+
+/*
+ * Build the balance mask; it contains only those CPUs that can arrive at this
+ * group and should be considered to continue balancing.
+ *
+ * We do this during the group creation pass, therefore the group information
+ * isn't complete yet, however since each group represents a (child) domain we
+ * can fully construct this using the sched_domain bits (which are already
+ * complete).
+ */
+static void
+build_balance_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
+{
+	const struct cpumask *sg_span = sched_group_span(sg);
 	struct sd_data *sdd = sd->private;
 	struct sched_domain *sibling;
 	int i;
 
-	for_each_cpu(i, span) {
+	cpumask_clear(mask);
+
+	for_each_cpu(i, sg_span) {
 		sibling = *per_cpu_ptr(sdd->sd, i);
-		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+
+		/*
+		 * Can happen in the asymmetric case, where these siblings are
+		 * unused. The mask will not be empty because those CPUs that
+		 * do have the top domain _should_ span the domain.
+		 */
+		if (!sibling->child)
 			continue;
 
-		cpumask_set_cpu(i, sched_group_mask(sg));
+		/* If we would not end up here, we can't continue from here */
+		if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
+			continue;
+
+		cpumask_set_cpu(i, mask);
 	}
+
+	/* We must not have empty masks here */
+	WARN_ON_ONCE(cpumask_empty(mask));
 }
 
 /*
- * Return the canonical balance CPU for this group, this is the first CPU
- * of this group that's also in the iteration mask.
+ * XXX: This creates per-node group entries; since the load-balancer will
+ * immediately access remote memory to construct this group's load-balance
+ * statistics having the groups node local is of dubious benefit.
  */
-int group_balance_cpu(struct sched_group *sg)
+static struct sched_group *
+build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
 {
-	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
+	struct sched_group *sg;
+	struct cpumask *sg_span;
+
+	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+			GFP_KERNEL, cpu_to_node(cpu));
+
+	if (!sg)
+		return NULL;
+
+	sg_span = sched_group_span(sg);
+	if (sd->child)
+		cpumask_copy(sg_span, sched_domain_span(sd->child));
+	else
+		cpumask_copy(sg_span, sched_domain_span(sd));
+
+	return sg;
+}
+
+static void init_overlap_sched_group(struct sched_domain *sd,
+				     struct sched_group *sg)
+{
+	struct cpumask *mask = sched_domains_tmpmask2;
+	struct sd_data *sdd = sd->private;
+	struct cpumask *sg_span;
+	int cpu;
+
+	build_balance_mask(sd, sg, mask);
+	cpu = cpumask_first_and(sched_group_span(sg), mask);
+
+	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+	if (atomic_inc_return(&sg->sgc->ref) == 1)
+		cpumask_copy(group_balance_mask(sg), mask);
+	else
+		WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
+
+	/*
+	 * Initialize sgc->capacity such that even if we mess up the
+	 * domains and no possible iteration will get us here, we won't
+	 * die on a /0 trap.
+	 */
+	sg_span = sched_group_span(sg);
+	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
-	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+	struct sched_group *first = NULL, *last = NULL, *sg;
 	const struct cpumask *span = sched_domain_span(sd);
 	struct cpumask *covered = sched_domains_tmpmask;
 	struct sd_data *sdd = sd->private;
@@ -525,7 +712,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 	cpumask_clear(covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_wrap(i, span, cpu) {
 		struct cpumask *sg_span;
 
 		if (cpumask_test_cpu(i, covered))
@@ -533,44 +720,27 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 		sibling = *per_cpu_ptr(sdd->sd, i);
 
-		/* See the comment near build_group_mask(). */
+		/*
+		 * Asymmetric node setups can result in situations where the
+		 * domain tree is of unequal depth, make sure to skip domains
+		 * that already cover the entire range.
+		 *
+		 * In that case build_sched_domains() will have terminated the
+		 * iteration early and our sibling sd spans will be empty.
+		 * Domains should always include the CPU they're built on, so
+		 * check that.
+		 */
 		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
 			continue;
 
-		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
-				GFP_KERNEL, cpu_to_node(cpu));
-
+		sg = build_group_from_child_sched_domain(sibling, cpu);
 		if (!sg)
 			goto fail;
 
-		sg_span = sched_group_cpus(sg);
-		if (sibling->child)
-			cpumask_copy(sg_span, sched_domain_span(sibling->child));
-		else
-			cpumask_set_cpu(i, sg_span);
-
+		sg_span = sched_group_span(sg);
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
-		if (atomic_inc_return(&sg->sgc->ref) == 1)
-			build_group_mask(sd, sg);
-
-		/*
-		 * Initialize sgc->capacity such that even if we mess up the
-		 * domains and no possible iteration will get us here, we won't
-		 * die on a /0 trap.
-		 */
-		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-		sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
-
-		/*
-		 * Make sure the first group of this domain contains the
-		 * canonical balance CPU. Otherwise the sched_domain iteration
-		 * breaks. See update_sg_lb_stats().
-		 */
-		if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
-		    group_balance_cpu(sg) == cpu)
-			groups = sg;
+		init_overlap_sched_group(sd, sg);
 
 		if (!first)
 			first = sg;
@@ -579,7 +749,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		last = sg;
 		last->next = first;
 	}
-	sd->groups = groups;
+	sd->groups = first;
 
 	return 0;
 
@@ -589,23 +759,106 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 	return -ENOMEM;
 }
 
-static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
+
+/*
+ * Package topology (also see the load-balance blurb in fair.c)
+ *
+ * The scheduler builds a tree structure to represent a number of important
+ * topology features. By default (default_topology[]) these include:
+ *
+ *  - Simultaneous multithreading (SMT)
+ *  - Multi-Core Cache (MC)
+ *  - Package (DIE)
+ *
+ * Where the last one more or less denotes everything up to a NUMA node.
+ *
+ * The tree consists of 3 primary data structures:
+ *
+ *	sched_domain -> sched_group -> sched_group_capacity
+ *	    ^ ^             ^ ^
+ *          `-'             `-'
+ *
+ * The sched_domains are per-cpu and have a two way link (parent & child) and
+ * denote the ever growing mask of CPUs belonging to that level of topology.
+ *
+ * Each sched_domain has a circular (double) linked list of sched_group's, each
+ * denoting the domains of the level below (or individual CPUs in case of the
+ * first domain level). The sched_group linked by a sched_domain includes the
+ * CPU of that sched_domain [*].
+ *
+ * Take for instance a 2 threaded, 2 core, 2 cache cluster part:
+ *
+ * CPU   0   1   2   3   4   5   6   7
+ *
+ * DIE  [                             ]
+ * MC   [             ] [             ]
+ * SMT  [     ] [     ] [     ] [     ]
+ *
+ *  - or -
+ *
+ * DIE  0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
+ * MC	0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
+ * SMT  0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
+ *
+ * CPU   0   1   2   3   4   5   6   7
+ *
+ * One way to think about it is: sched_domain moves you up and down among these
+ * topology levels, while sched_group moves you sideways through it, at child
+ * domain granularity.
+ *
+ * sched_group_capacity ensures each unique sched_group has shared storage.
+ *
+ * There are two related construction problems, both require a CPU that
+ * uniquely identify each group (for a given domain):
+ *
+ *  - The first is the balance_cpu (see should_we_balance() and the
+ *    load-balance blub in fair.c); for each group we only want 1 CPU to
+ *    continue balancing at a higher domain.
+ *
+ *  - The second is the sched_group_capacity; we want all identical groups
+ *    to share a single sched_group_capacity.
+ *
+ * Since these topologies are exclusive by construction. That is, its
+ * impossible for an SMT thread to belong to multiple cores, and cores to
+ * be part of multiple caches. There is a very clear and unique location
+ * for each CPU in the hierarchy.
+ *
+ * Therefore computing a unique CPU for each group is trivial (the iteration
+ * mask is redundant and set all 1s; all CPUs in a group will end up at _that_
+ * group), we can simply pick the first CPU in each group.
+ *
+ *
+ * [*] in other words, the first group of each domain is its child domain.
+ */
+
+static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 {
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	struct sched_domain *child = sd->child;
+	struct sched_group *sg;
 
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
 
-	if (sg) {
-		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+	sg = *per_cpu_ptr(sdd->sg, cpu);
+	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
 
-		/* For claim_allocations: */
-		atomic_set(&(*sg)->sgc->ref, 1);
+	/* For claim_allocations: */
+	atomic_inc(&sg->ref);
+	atomic_inc(&sg->sgc->ref);
+
+	if (child) {
+		cpumask_copy(sched_group_span(sg), sched_domain_span(child));
+		cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
+	} else {
+		cpumask_set_cpu(cpu, sched_group_span(sg));
+		cpumask_set_cpu(cpu, group_balance_mask(sg));
 	}
 
-	return cpu;
+	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
+	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+
+	return sg;
 }
 
 /*
@@ -624,34 +877,20 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 	struct cpumask *covered;
 	int i;
 
-	get_group(cpu, sdd, &sd->groups);
-	atomic_inc(&sd->groups->ref);
-
-	if (cpu != cpumask_first(span))
-		return 0;
-
 	lockdep_assert_held(&sched_domains_mutex);
 	covered = sched_domains_tmpmask;
 
 	cpumask_clear(covered);
 
-	for_each_cpu(i, span) {
+	for_each_cpu_wrap(i, span, cpu) {
 		struct sched_group *sg;
-		int group, j;
 
 		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		group = get_group(i, sdd, &sg);
-		cpumask_setall(sched_group_mask(sg));
+		sg = get_group(i, sdd);
 
-		for_each_cpu(j, span) {
-			if (get_group(j, sdd, NULL) != group)
-				continue;
-
-			cpumask_set_cpu(j, covered);
-			cpumask_set_cpu(j, sched_group_cpus(sg));
-		}
+		cpumask_or(covered, covered, sched_group_span(sg));
 
 		if (!first)
 			first = sg;
@@ -660,6 +899,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 		last = sg;
 	}
 	last->next = first;
+	sd->groups = first;
 
 	return 0;
 }
@@ -683,12 +923,12 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 	do {
 		int cpu, max_cpu = -1;
 
-		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+		sg->group_weight = cpumask_weight(sched_group_span(sg));
 
 		if (!(sd->flags & SD_ASYM_PACKING))
 			goto next;
 
-		for_each_cpu(cpu, sched_group_cpus(sg)) {
+		for_each_cpu(cpu, sched_group_span(sg)) {
 			if (max_cpu < 0)
 				max_cpu = cpu;
 			else if (sched_asym_prefer(cpu, max_cpu))
@@ -1308,6 +1548,10 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 			if (!sgc)
 				return -ENOMEM;
 
+#ifdef CONFIG_SCHED_DEBUG
+			sgc->id = j;
+#endif
+
 			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
@@ -1407,7 +1651,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
 			if (tl == sched_domain_topology)
 				*per_cpu_ptr(d.sd, i) = sd;
-			if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+			if (tl->flags & SDTL_OVERLAP)
 				sd->flags |= SD_OVERLAP;
 			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
 				break;
@@ -1478,7 +1722,7 @@ static struct sched_domain_attr		*dattr_cur;
  * cpumask) fails, then fallback to a single sched domain,
  * as determined by the single cpumask fallback_doms.
  */
-cpumask_var_t				fallback_doms;
+static cpumask_var_t			fallback_doms;
 
 /*
  * arch_update_cpu_topology lets virtualized architectures update the
@@ -1520,10 +1764,14 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
  * For now this just excludes isolated CPUs, but could be used to
  * exclude other special cases in the future.
  */
-int init_sched_domains(const struct cpumask *cpu_map)
+int sched_init_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
+	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
+	zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
+	zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
+
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
 	doms_cur = alloc_sched_domains(ndoms_cur);

diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index b8c84c6..17f11c6 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c

@@ -12,44 +12,44 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 
-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
 {
-	spin_lock_init(&q->lock);
-	lockdep_set_class_and_name(&q->lock, key, name);
-	INIT_LIST_HEAD(&q->task_list);
+	spin_lock_init(&wq_head->lock);
+	lockdep_set_class_and_name(&wq_head->lock, key, name);
+	INIT_LIST_HEAD(&wq_head->head);
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
 
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue);
 
-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_tail(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue_entry_tail(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__remove_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(remove_wait_queue);
 
@@ -63,12 +63,12 @@ EXPORT_SYMBOL(remove_wait_queue);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, int wake_flags, void *key)
 {
-	wait_queue_t *curr, *next;
+	wait_queue_entry_t *curr, *next;
 
-	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
+	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
 
 		if (curr->func(curr, mode, wake_flags, key) &&
@@ -79,7 +79,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 
 /**
  * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
@@ -87,35 +87,35 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
+void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, 0, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, 0, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(__wake_up);
 
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, nr, 0, NULL);
+	__wake_up_common(wq_head, mode, nr, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked);
 
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
 {
-	__wake_up_common(q, mode, 1, 0, key);
+	__wake_up_common(wq_head, mode, 1, 0, key);
 }
 EXPORT_SYMBOL_GPL(__wake_up_locked_key);
 
 /**
  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * @wq_head: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: opaque value to be passed to wakeup targets
@@ -130,30 +130,30 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
  */
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
 	int wake_flags = 1; /* XXX WF_SYNC */
 
-	if (unlikely(!q))
+	if (unlikely(!wq_head))
 		return;
 
 	if (unlikely(nr_exclusive != 1))
 		wake_flags = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 
 /*
  * __wake_up_sync - see __wake_up_sync_key()
  */
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive)
 {
-	__wake_up_sync_key(q, mode, nr_exclusive, NULL);
+	__wake_up_sync_key(wq_head, mode, nr_exclusive, NULL);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
@@ -170,48 +170,48 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
  * loads to move into the critical region).
  */
 void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
+	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
 void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue_tail(q, wait);
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue_entry_tail(wq_head, wq_entry);
 	set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
-void init_wait_entry(wait_queue_t *wait, int flags)
+void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
-	wait->flags = flags;
-	wait->private = current;
-	wait->func = autoremove_wake_function;
-	INIT_LIST_HEAD(&wait->task_list);
+	wq_entry->flags = flags;
+	wq_entry->private = current;
+	wq_entry->func = autoremove_wake_function;
+	INIT_LIST_HEAD(&wq_entry->entry);
 }
 EXPORT_SYMBOL(init_wait_entry);
 
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
 	long ret = 0;
 
-	spin_lock_irqsave(&q->lock, flags);
+	spin_lock_irqsave(&wq_head->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
 		 * Exclusive waiter must not fail if it was selected by wakeup,
@@ -219,24 +219,24 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 		 *
 		 * The caller will recheck the condition and return success if
 		 * we were already woken up, we can not miss the event because
-		 * wakeup locks/unlocks the same q->lock.
+		 * wakeup locks/unlocks the same wq_head->lock.
 		 *
 		 * But we need to ensure that set-condition + wakeup after that
 		 * can't see us, it should wake up another exclusive waiter if
 		 * we fail.
 		 */
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 		ret = -ERESTARTSYS;
 	} else {
-		if (list_empty(&wait->task_list)) {
-			if (wait->flags & WQ_FLAG_EXCLUSIVE)
-				__add_wait_queue_tail(q, wait);
+		if (list_empty(&wq_entry->entry)) {
+			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
+				__add_wait_queue_entry_tail(wq_head, wq_entry);
 			else
-				__add_wait_queue(q, wait);
+				__add_wait_queue(wq_head, wq_entry);
 		}
 		set_current_state(state);
 	}
-	spin_unlock_irqrestore(&q->lock, flags);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
 
 	return ret;
 }
@@ -249,10 +249,10 @@ EXPORT_SYMBOL(prepare_to_wait_event);
  * condition in the caller before they add the wait
  * entry to the wake queue.
  */
-int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -265,10 +265,10 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
 }
 EXPORT_SYMBOL(do_wait_intr);
 
-int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
 {
-	if (likely(list_empty(&wait->task_list)))
-		__add_wait_queue_tail(wq, wait);
+	if (likely(list_empty(&wait->entry)))
+		__add_wait_queue_entry_tail(wq, wait);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (signal_pending(current))
@@ -283,14 +283,14 @@ EXPORT_SYMBOL(do_wait_intr_irq);
 
 /**
  * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
+ * @wq_head: waitqueue waited on
+ * @wq_entry: wait descriptor
  *
  * Sets current thread back to running state and removes
  * the wait descriptor from the given waitqueue if still
  * queued.
  */
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
 
@@ -308,20 +308,20 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 	 *    have _one_ other CPU that looks at or modifies
 	 *    the list).
 	 */
-	if (!list_empty_careful(&wait->task_list)) {
-		spin_lock_irqsave(&q->lock, flags);
-		list_del_init(&wait->task_list);
-		spin_unlock_irqrestore(&q->lock, flags);
+	if (!list_empty_careful(&wq_entry->entry)) {
+		spin_lock_irqsave(&wq_head->lock, flags);
+		list_del_init(&wq_entry->entry);
+		spin_unlock_irqrestore(&wq_head->lock, flags);
 	}
 }
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
-	int ret = default_wake_function(wait, mode, sync, key);
+	int ret = default_wake_function(wq_entry, mode, sync, key);
 
 	if (ret)
-		list_del_init(&wait->task_list);
+		list_del_init(&wq_entry->entry);
 	return ret;
 }
 EXPORT_SYMBOL(autoremove_wake_function);
@@ -334,24 +334,24 @@ static inline bool is_kthread_should_stop(void)
 /*
  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
  *
- * add_wait_queue(&wq, &wait);
+ * add_wait_queue(&wq_head, &wait);
  * for (;;) {
  *     if (condition)
  *         break;
  *
  *     p->state = mode;				condition = true;
  *     smp_mb(); // A				smp_wmb(); // C
- *     if (!wait->flags & WQ_FLAG_WOKEN)	wait->flags |= WQ_FLAG_WOKEN;
+ *     if (!wq_entry->flags & WQ_FLAG_WOKEN)	wq_entry->flags |= WQ_FLAG_WOKEN;
  *         schedule()				try_to_wake_up();
  *     p->state = TASK_RUNNING;		    ~~~~~~~~~~~~~~~~~~
- *     wait->flags &= ~WQ_FLAG_WOKEN;		condition = true;
+ *     wq_entry->flags &= ~WQ_FLAG_WOKEN;		condition = true;
  *     smp_mb() // B				smp_wmb(); // C
- *						wait->flags |= WQ_FLAG_WOKEN;
+ *						wq_entry->flags |= WQ_FLAG_WOKEN;
  * }
- * remove_wait_queue(&wq, &wait);
+ * remove_wait_queue(&wq_head, &wait);
  *
  */
-long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 {
 	set_current_state(mode); /* A */
 	/*
@@ -359,7 +359,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
 	 * also observe all state before the wakeup.
 	 */
-	if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
+	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
 		timeout = schedule_timeout(timeout);
 	__set_current_state(TASK_RUNNING);
 
@@ -369,13 +369,13 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
 	 * an event.
 	 */
-	smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
 
 	return timeout;
 }
 EXPORT_SYMBOL(wait_woken);
 
-int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
 {
 	/*
 	 * Although this function is called under waitqueue lock, LOCK
@@ -385,267 +385,8 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	 * and is paired with smp_store_mb() in wait_woken().
 	 */
 	smp_wmb(); /* C */
-	wait->flags |= WQ_FLAG_WOKEN;
+	wq_entry->flags |= WQ_FLAG_WOKEN;
 
-	return default_wake_function(wait, mode, sync, key);
+	return default_wake_function(wq_entry, mode, sync, key);
 }
 EXPORT_SYMBOL(woken_wake_function);
-
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-
-	if (wait_bit->key.flags != key->flags ||
-			wait_bit->key.bit_nr != key->bit_nr ||
-			test_bit(key->bit_nr, key->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, key);
-}
-EXPORT_SYMBOL(wake_bit_function);
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking)
- * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
- * permitted return codes. Nonzero return codes halt waiting and return.
- */
-int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
-	      wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(&q->key, mode);
-	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-EXPORT_SYMBOL(__wait_on_bit);
-
-int __sched out_of_line_wait_on_bit(void *word, int bit,
-				    wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
-
-int __sched out_of_line_wait_on_bit_timeout(
-	void *word, int bit, wait_bit_action_f *action,
-	unsigned mode, unsigned long timeout)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	wait.key.timeout = jiffies + timeout;
-	return __wait_on_bit(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
-
-int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
-			wait_bit_action_f *action, unsigned mode)
-{
-	int ret = 0;
-
-	for (;;) {
-		prepare_to_wait_exclusive(wq, &q->wait, mode);
-		if (test_bit(q->key.bit_nr, q->key.flags)) {
-			ret = action(&q->key, mode);
-			/*
-			 * See the comment in prepare_to_wait_event().
-			 * finish_wait() does not necessarily takes wq->lock,
-			 * but test_and_set_bit() implies mb() which pairs with
-			 * smp_mb__after_atomic() before wake_up_page().
-			 */
-			if (ret)
-				finish_wait(wq, &q->wait);
-		}
-		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
-			if (!ret)
-				finish_wait(wq, &q->wait);
-			return 0;
-		} else if (ret) {
-			return ret;
-		}
-	}
-}
-EXPORT_SYMBOL(__wait_on_bit_lock);
-
-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
-					 wait_bit_action_f *action, unsigned mode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(word, bit);
-	DEFINE_WAIT_BIT(wait, word, bit);
-
-	return __wait_on_bit_lock(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
-
-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
-{
-	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, 1, &key);
-}
-EXPORT_SYMBOL(__wake_up_bit);
-
-/**
- * wake_up_bit - wake up a waiter on a bit
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
- *
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that wakes up waiters
- * on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
- *
- * In order for this to function properly, as it uses waitqueue_active()
- * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_atomic(), but in some
- * cases where bitflags are manipulated non-atomically under a lock, one
- * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
- * because spin_unlock() does not guarantee a memory barrier.
- */
-void wake_up_bit(void *word, int bit)
-{
-	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
-}
-EXPORT_SYMBOL(wake_up_bit);
-
-/*
- * Manipulate the atomic_t address to produce a better bit waitqueue table hash
- * index (we're keying off bit -1, but that would produce a horrible hash
- * value).
- */
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
-{
-	if (BITS_PER_LONG == 64) {
-		unsigned long q = (unsigned long)p;
-		return bit_waitqueue((void *)(q & ~1), q & 1);
-	}
-	return bit_waitqueue(p, 0);
-}
-
-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
-				  void *arg)
-{
-	struct wait_bit_key *key = arg;
-	struct wait_bit_queue *wait_bit
-		= container_of(wait, struct wait_bit_queue, wait);
-	atomic_t *val = key->flags;
-
-	if (wait_bit->key.flags != key->flags ||
-	    wait_bit->key.bit_nr != key->bit_nr ||
-	    atomic_read(val) != 0)
-		return 0;
-	return autoremove_wake_function(wait, mode, sync, key);
-}
-
-/*
- * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
- * return codes halt waiting and return.
- */
-static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
-		       int (*action)(atomic_t *), unsigned mode)
-{
-	atomic_t *val;
-	int ret = 0;
-
-	do {
-		prepare_to_wait(wq, &q->wait, mode);
-		val = q->key.flags;
-		if (atomic_read(val) == 0)
-			break;
-		ret = (*action)(val);
-	} while (!ret && atomic_read(val) != 0);
-	finish_wait(wq, &q->wait);
-	return ret;
-}
-
-#define DEFINE_WAIT_ATOMIC_T(name, p)					\
-	struct wait_bit_queue name = {					\
-		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
-		.wait	= {						\
-			.private	= current,			\
-			.func		= wake_atomic_t_function,	\
-			.task_list	=				\
-				LIST_HEAD_INIT((name).wait.task_list),	\
-		},							\
-	}
-
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
-					 unsigned mode)
-{
-	wait_queue_head_t *wq = atomic_t_waitqueue(p);
-	DEFINE_WAIT_ATOMIC_T(wait, p);
-
-	return __wait_on_atomic_t(wq, &wait, action, mode);
-}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
-
-/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @p: The atomic_t being waited on, a kernel virtual address
- *
- * Wake up anyone waiting for the atomic_t to go to zero.
- *
- * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
- * check is done by the waiter's wake function, not the by the waker itself).
- */
-void wake_up_atomic_t(atomic_t *p)
-{
-	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
-}
-EXPORT_SYMBOL(wake_up_atomic_t);
-
-__sched int bit_wait(struct wait_bit_key *word, int mode)
-{
-	schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait);
-
-__sched int bit_wait_io(struct wait_bit_key *word, int mode)
-{
-	io_schedule();
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL(bit_wait_io);
-
-__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_timeout);
-
-__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
-{
-	unsigned long now = READ_ONCE(jiffies);
-	if (time_after_eq(now, word->timeout))
-		return -EAGAIN;
-	io_schedule_timeout(word->timeout - now);
-	if (signal_pending_state(mode, current))
-		return -EINTR;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bit_wait_io_timeout);

diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
new file mode 100644
index 0000000..f815969
--- /dev/null
+++ b/kernel/sched/wait_bit.c

@@ -0,0 +1,286 @@
+/*
+ * The implementation of the wait_bit*() and related waiting APIs:
+ */
+#include <linux/wait_bit.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/hash.h>
+
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
+int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched
+__wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+	      wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
+			ret = (*action)(&wbq_entry->key, mode);
+	} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched out_of_line_wait_on_bit(void *word, int bit,
+				    wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	wq_entry.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
+int __sched
+__wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+			wait_bit_action_f *action, unsigned mode)
+{
+	int ret = 0;
+
+	for (;;) {
+		prepare_to_wait_exclusive(wq_head, &wbq_entry->wq_entry, mode);
+		if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			ret = action(&wbq_entry->key, mode);
+			/*
+			 * See the comment in prepare_to_wait_event().
+			 * finish_wait() does not necessarily takes wwq_head->lock,
+			 * but test_and_set_bit() implies mb() which pairs with
+			 * smp_mb__after_atomic() before wake_up_page().
+			 */
+			if (ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+		}
+		if (!test_and_set_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) {
+			if (!ret)
+				finish_wait(wq_head, &wbq_entry->wq_entry);
+			return 0;
+		} else if (ret) {
+			return ret;
+		}
+	}
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
+					 wait_bit_action_f *action, unsigned mode)
+{
+	struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wq_entry, word, bit);
+
+	return __wait_on_bit_lock(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq_head))
+		__wake_up(wq_head, TASK_NORMAL, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+/*
+ * Manipulate the atomic_t address to produce a better bit waitqueue table hash
+ * index (we're keying off bit -1, but that would produce a horrible hash
+ * value).
+ */
+static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
+{
+	if (BITS_PER_LONG == 64) {
+		unsigned long q = (unsigned long)p;
+		return bit_waitqueue((void *)(q & ~1), q & 1);
+	}
+	return bit_waitqueue(p, 0);
+}
+
+static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync,
+				  void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue_entry *wait_bit = container_of(wq_entry, struct wait_bit_queue_entry, wq_entry);
+	atomic_t *val = key->flags;
+
+	if (wait_bit->key.flags != key->flags ||
+	    wait_bit->key.bit_nr != key->bit_nr ||
+	    atomic_read(val) != 0)
+		return 0;
+	return autoremove_wake_function(wq_entry, mode, sync, key);
+}
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
+ * the actions of __wait_on_atomic_t() are permitted return codes.  Nonzero
+ * return codes halt waiting and return.
+ */
+static __sched
+int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
+		       int (*action)(atomic_t *), unsigned mode)
+{
+	atomic_t *val;
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
+		val = wbq_entry->key.flags;
+		if (atomic_read(val) == 0)
+			break;
+		ret = (*action)(val);
+	} while (!ret && atomic_read(val) != 0);
+	finish_wait(wq_head, &wbq_entry->wq_entry);
+	return ret;
+}
+
+#define DEFINE_WAIT_ATOMIC_T(name, p)					\
+	struct wait_bit_queue_entry name = {				\
+		.key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p),		\
+		.wq_entry = {						\
+			.private	= current,			\
+			.func		= wake_atomic_t_function,	\
+			.entry		=				\
+				LIST_HEAD_INIT((name).wq_entry.entry),	\
+		},							\
+	}
+
+__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
+					 unsigned mode)
+{
+	struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
+	DEFINE_WAIT_ATOMIC_T(wq_entry, p);
+
+	return __wait_on_atomic_t(wq_head, &wq_entry, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+
+/**
+ * wake_up_atomic_t - Wake up a waiter on a atomic_t
+ * @p: The atomic_t being waited on, a kernel virtual address
+ *
+ * Wake up anyone waiting for the atomic_t to go to zero.
+ *
+ * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
+ * check is done by the waiter's wake function, not the by the waker itself).
+ */
+void wake_up_atomic_t(atomic_t *p)
+{
+	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
+}
+EXPORT_SYMBOL(wake_up_atomic_t);
+
+__sched int bit_wait(struct wait_bit_key *word, int mode)
+{
+	schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait);
+
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
+{
+	io_schedule();
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
+{
+	unsigned long now = READ_ONCE(jiffies);
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	if (signal_pending_state(mode, current))
+		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
+
+void __init wait_bit_init(void)
+{
+	int i;
+
+	for (i = 0; i < WAIT_TABLE_SIZE; i++)
+		init_waitqueue_head(bit_wait_table + i);
+}

diff --git a/kernel/signal.c b/kernel/signal.c
index ca92bcf..45b4c1f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+			   bool *resched_timer)
 {
 	struct sigqueue *q, *first = NULL;
 
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 still_pending:
 		list_del_init(&first->list);
 		copy_siginfo(info, &first->info);
+
+		*resched_timer =
+			(first->flags & SIGQUEUE_PREALLOC) &&
+			(info->si_code == SI_TIMER) &&
+			(info->si_sys_private);
+
 		__sigqueue_free(first);
 	} else {
 		/*
@@ -548,12 +555,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-			siginfo_t *info)
+			siginfo_t *info, bool *resched_timer)
 {
 	int sig = next_signal(pending, mask);
 
 	if (sig)
-		collect_signal(sig, pending, info);
+		collect_signal(sig, pending, info, resched_timer);
 	return sig;
 }
 
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  */
 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 {
+	bool resched_timer = false;
 	int signr;
 
 	/* We only dequeue private signals from ourselves, we don't let
 	 * signalfd steal them
 	 */
-	signr = __dequeue_signal(&tsk->pending, mask, info);
+	signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
 	if (!signr) {
 		signr = __dequeue_signal(&tsk->signal->shared_pending,
-					 mask, info);
+					 mask, info, &resched_timer);
 #ifdef CONFIG_POSIX_TIMERS
 		/*
 		 * itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 		current->jobctl |= JOBCTL_STOP_DEQUEUED;
 	}
 #ifdef CONFIG_POSIX_TIMERS
-	if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+	if (resched_timer) {
 		/*
 		 * Release the siglock to ensure proper locking order
 		 * of timer locks outside of siglocks.  Note, we leave

diff --git a/kernel/smp.c b/kernel/smp.c
index a817769..3061483 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c

@@ -30,6 +30,7 @@ enum {
 struct call_function_data {
 	struct call_single_data	__percpu *csd;
 	cpumask_var_t		cpumask;
+	cpumask_var_t		cpumask_ipi;
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
@@ -45,9 +46,15 @@ int smpcfd_prepare_cpu(unsigned int cpu)
 	if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 				     cpu_to_node(cpu)))
 		return -ENOMEM;
+	if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
+				     cpu_to_node(cpu))) {
+		free_cpumask_var(cfd->cpumask);
+		return -ENOMEM;
+	}
 	cfd->csd = alloc_percpu(struct call_single_data);
 	if (!cfd->csd) {
 		free_cpumask_var(cfd->cpumask);
+		free_cpumask_var(cfd->cpumask_ipi);
 		return -ENOMEM;
 	}
 
@@ -59,6 +66,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
 	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
 
 	free_cpumask_var(cfd->cpumask);
+	free_cpumask_var(cfd->cpumask_ipi);
 	free_percpu(cfd->csd);
 	return 0;
 }
@@ -428,12 +436,13 @@ void smp_call_function_many(const struct cpumask *mask,
 	cfd = this_cpu_ptr(&cfd_data);
 
 	cpumask_and(cfd->cpumask, mask, cpu_online_mask);
-	cpumask_clear_cpu(this_cpu, cfd->cpumask);
+	__cpumask_clear_cpu(this_cpu, cfd->cpumask);
 
 	/* Some callers race with other cpus changing the passed mask */
 	if (unlikely(!cpumask_weight(cfd->cpumask)))
 		return;
 
+	cpumask_clear(cfd->cpumask_ipi);
 	for_each_cpu(cpu, cfd->cpumask) {
 		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
 
@@ -442,11 +451,12 @@ void smp_call_function_many(const struct cpumask *mask,
 			csd->flags |= CSD_FLAG_SYNCHRONOUS;
 		csd->func = func;
 		csd->info = info;
-		llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
+		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
+			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
 	}
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi_mask(cfd->cpumask);
+	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
 
 	if (wait) {
 		for_each_cpu(cpu, cfd->cpumask) {

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index ece4b17..939a158 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c

@@ -1119,7 +1119,7 @@ static ssize_t bin_uuid(struct file *file,
 	/* Only supports reads */
 	if (oldval && oldlen) {
 		char buf[UUID_STRING_LEN + 1];
-		uuid_be uuid;
+		uuid_t uuid;
 
 		result = kernel_read(file, 0, buf, sizeof(buf) - 1);
 		if (result < 0)
@@ -1128,7 +1128,7 @@ static ssize_t bin_uuid(struct file *file,
 		buf[result] = '\0';
 
 		result = -EIO;
-		if (uuid_be_to_bin(buf, &uuid))
+		if (uuid_parse(buf, &uuid))
 			goto out;
 
 		if (oldlen > 16)

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 4008d9f..ac09bc2 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig

@@ -126,56 +126,6 @@
 	 Note the boot CPU will still be kept outside the range to
 	 handle the timekeeping duty.
 
-config NO_HZ_FULL_SYSIDLE
-	bool "Detect full-system idle state for full dynticks system"
-	depends on NO_HZ_FULL
-	default n
-	help
-	 At least one CPU must keep the scheduling-clock tick running for
-	 timekeeping purposes whenever there is a non-idle CPU, where
-	 "non-idle" also includes dynticks CPUs as long as they are
-	 running non-idle tasks.  Because the underlying adaptive-tick
-	 support cannot distinguish between all CPUs being idle and
-	 all CPUs each running a single task in dynticks mode, the
-	 underlying support simply ensures that there is always a CPU
-	 handling the scheduling-clock tick, whether or not all CPUs
-	 are idle.  This Kconfig option enables scalable detection of
-	 the all-CPUs-idle state, thus allowing the scheduling-clock
-	 tick to be disabled when all CPUs are idle.  Note that scalable
-	 detection of the all-CPUs-idle state means that larger systems
-	 will be slower to declare the all-CPUs-idle state.
-
-	 Say Y if you would like to help debug all-CPUs-idle detection.
-
-	 Say N if you are unsure.
-
-config NO_HZ_FULL_SYSIDLE_SMALL
-	int "Number of CPUs above which large-system approach is used"
-	depends on NO_HZ_FULL_SYSIDLE
-	range 1 NR_CPUS
-	default 8
-	help
-	 The full-system idle detection mechanism takes a lazy approach
-	 on large systems, as is required to attain decent scalability.
-	 However, on smaller systems, scalability is not anywhere near as
-	 large a concern as is energy efficiency.  The sysidle subsystem
-	 therefore uses a fast but non-scalable algorithm for small
-	 systems and a lazier but scalable algorithm for large systems.
-	 This Kconfig parameter defines the number of CPUs in the largest
-	 system that will be considered to be "small".
-
-	 The default value will be fine in most cases.	Battery-powered
-	 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
-	 numbers of CPUs, and (3) are suffering from battery-lifetime
-	 problems due to long sysidle latencies might wish to experiment
-	 with larger values for this Kconfig parameter.  On the other
-	 hand, they might be even better served by disabling NO_HZ_FULL
-	 entirely, given that NO_HZ_FULL is intended for HPC and
-	 real-time workloads that at present do not tend to be run on
-	 battery-powered systems.
-
-	 Take the default if you are unsure.
-
 config NO_HZ
 	bool "Old Idle dynticks config"
 	depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5cb5b00..ee2f420 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c

@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
 	struct alarm_base *base = &alarm_bases[alarm->type];
 
-	start = ktime_add(start, base->gettime());
+	start = ktime_add_safe(start, base->gettime());
 	alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 		overrun++;
 	}
 
-	alarm->node.expires = ktime_add(alarm->node.expires, interval);
+	alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
 	return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+	/*
+	 * Rate limit to the tick as a hot fix to prevent DOS. Will be
+	 * mopped up later.
+	 */
+	if (timr->it.alarm.interval < TICK_NSEC)
+		timr->it.alarm.interval = TICK_NSEC;
+
 	exp = timespec64_to_ktime(new_setting->it_value);
 	/* Convert (if necessary) to absolute time */
 	if (flags != TIMER_ABSTIME) {
 		ktime_t now;
 
 		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-		exp = ktime_add(now, exp);
+		exp = ktime_add_safe(now, exp);
 	}
 
 	alarm_start(&timr->it.alarm.alarmtimer, exp);

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 93621ae..03918a1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c

@@ -233,6 +233,9 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 		}
 
+		if (cs == curr_clocksource && cs->tick_stable)
+			cs->tick_stable(cs);
+
 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 1370f067..d2a1e6d 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c

@@ -825,8 +825,10 @@ static void check_thread_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -838,8 +840,10 @@ static void check_thread_timers(struct task_struct *tsk,
 				soft += USEC_PER_SEC;
 				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 			}
-			pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
@@ -936,8 +940,10 @@ static void check_process_timers(struct task_struct *tsk,
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
 			 */
-			pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
@@ -945,8 +951,10 @@ static void check_process_timers(struct task_struct *tsk,
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
-			pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
-				tsk->comm, task_pid_nr(tsk));
+			if (print_fatal_signals) {
+				pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
+					tsk->comm, task_pid_nr(tsk));
+			}
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 			if (soft < hard) {
 				soft++;

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 987e496..b398c2e 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c

@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 /**
  * tick_broadcast_setup_oneshot - setup the broadcast device
  */
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
 	int cpu = smp_processor_id();
 

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f738251..be0ac01 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h

@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 64c97fc..c7a899c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -150,6 +150,12 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 		touch_softlockup_watchdog_sched();
 		if (is_idle_task(current))
 			ts->idle_jiffies++;
+		/*
+		 * In case the current tick fired too early past its expected
+		 * expiration, make sure we don't bypass the next clock reprogramming
+		 * to the same deadline.
+		 */
+		ts->next_tick = 0;
 	}
 #endif
 	update_process_times(user_mode(regs));
@@ -554,7 +560,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
 
-	sched_clock_idle_wakeup_event(0);
+	sched_clock_idle_wakeup_event();
 }
 
 static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -660,6 +666,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+
+	/*
+	 * Reset to make sure next tick stop doesn't get fooled by past
+	 * cached clock deadline.
+	 */
+	ts->next_tick = 0;
 }
 
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
@@ -701,8 +713,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 */
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
-		tick = 0;
-
 		/*
 		 * Tell the timer code that the base is not idle, i.e. undo
 		 * the effect of get_next_timer_interrupt():
@@ -712,23 +722,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
 		 */
-		if (!ts->tick_stopped)
-			goto out;
-
-		/*
-		 * If, OTOH, we did stop it, but there's a pending (expired)
-		 * timer reprogram the timer hardware to fire now.
-		 *
-		 * We will not restart the tick proper, just prod the timer
-		 * hardware into firing an interrupt to process the pending
-		 * timers. Just like tick_irq_exit() will not restart the tick
-		 * for 'normal' interrupts.
-		 *
-		 * Only once we exit the idle loop will we re-enable the tick,
-		 * see tick_nohz_idle_exit().
-		 */
-		if (delta == 0) {
-			tick_nohz_restart(ts, now);
+		if (!ts->tick_stopped) {
+			tick = 0;
 			goto out;
 		}
 	}
@@ -771,8 +766,16 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	tick = expires;
 
 	/* Skip reprogram of event if its not changed */
-	if (ts->tick_stopped && (expires == dev->next_event))
-		goto out;
+	if (ts->tick_stopped && (expires == ts->next_tick)) {
+		/* Sanity check: make sure clockevent is actually programmed */
+		if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
+			goto out;
+
+		WARN_ON_ONCE(1);
+		printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
+			    basemono, ts->next_tick, dev->next_event,
+			    hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
+	}
 
 	/*
 	 * nohz_stop_sched_tick can be called several times before
@@ -782,8 +785,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	 * the scheduler tick in nohz_restart_sched_tick.
 	 */
 	if (!ts->tick_stopped) {
-		nohz_balance_enter_idle(cpu);
-		calc_load_enter_idle();
+		calc_load_nohz_start();
 		cpu_load_update_nohz_start();
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -791,6 +793,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
+	ts->next_tick = tick;
+
 	/*
 	 * If the expiration time == KTIME_MAX, then we simply stop
 	 * the tick timer.
@@ -801,12 +805,17 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		goto out;
 	}
 
+	hrtimer_set_expires(&ts->sched_timer, tick);
+
 	if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
-		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(tick, 1);
 out:
-	/* Update the estimated sleep length */
+	/*
+	 * Update the estimated sleep length until the next timer
+	 * (not only the tick).
+	 */
 	ts->sleep_length = ktime_sub(dev->next_event, now);
 	return tick;
 }
@@ -823,7 +832,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	 */
 	timer_clear_idle();
 
-	calc_load_exit_idle();
+	calc_load_nohz_stop();
 	touch_softlockup_watchdog_sched();
 	/*
 	 * Cancel the scheduled timer and restore the tick
@@ -864,6 +873,11 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	if (unlikely(!cpu_online(cpu))) {
 		if (cpu == tick_do_timer_cpu)
 			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		/*
+		 * Make sure the CPU doesn't get fooled by obsolete tick
+		 * deadline if it comes back online later.
+		 */
+		ts->next_tick = 0;
 		return false;
 	}
 
@@ -923,8 +937,10 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 			ts->idle_expires = expires;
 		}
 
-		if (!was_stopped && ts->tick_stopped)
+		if (!was_stopped && ts->tick_stopped) {
 			ts->idle_jiffies = ts->last_jiffies;
+			nohz_balance_enter_idle(cpu);
+		}
 	}
 }
 
@@ -1172,6 +1188,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 	 */
 	if (regs)
 		tick_sched_handle(ts, regs);
+	else
+		ts->next_tick = 0;
 
 	/* No need to reprogram if we are in idle or full dynticks mode */
 	if (unlikely(ts->tick_stopped))

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bf38226..075444e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h

@@ -27,6 +27,7 @@ enum tick_nohz_mode {
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
+ * @next_tick:		Next tick to be fired when in dynticks mode.
  * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
@@ -44,6 +45,7 @@ struct tick_sched {
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
 	ktime_t				last_tick;
+	ktime_t				next_tick;
 	int				inidle;
 	int				tick_stopped;
 	unsigned long			idle_jiffies;

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc5..b602c48 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c

@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	 */
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tkr->read(tkr->clock);
+		now = tk_clock_read(tkr);
 		last = tkr->cycle_last;
 		mask = tkr->mask;
 		max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	u64 cycle_now, delta;
 
 	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	cycle_now = tk_clock_read(tkr);
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
-	tk->tkr_mono.read = clock->read;
 	tk->tkr_mono.mask = clock->mask;
-	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
 	tk->tkr_raw.clock = clock;
-	tk->tkr_raw.read = clock->read;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	tk->xtime_interval = interval * clock->mult;
 	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-	tk->raw_interval = (interval * clock->mult) >> clock->shift;
+	tk->raw_interval = interval * clock->mult;
 
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 
 		now += timekeeping_delta_to_ns(tkr,
 				clocksource_delta(
-					tkr->read(tkr->clock),
+					tk_clock_read(tkr),
 					tkr->cycle_last,
 					tkr->mask));
 	} while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
 	return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+	.read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	cycles_at_suspend = tkr->read(tkr->clock);
-	tkr_dummy.read = dummy_clock_read;
+	cycles_at_suspend = tk_clock_read(tkr);
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	tkr_dummy.read = dummy_clock_read;
+	tkr_dummy.clock = &dummy_clock;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr_mono.clock;
 	u64 cycle_now, delta;
 	u64 nsec;
 
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
 	tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 		 * Check whether the system counter value provided by the
 		 * device driver is on the current timekeeping interval.
 		 */
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = tk_clock_read(&tk->tkr_mono);
 		interval_start = tk->tkr_mono.cycle_last;
 		if (!cycle_between(interval_start, cycles, now)) {
 			clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = tk_clock_read(&tk->tkr_mono);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 				    u32 shift, unsigned int *clock_set)
 {
 	u64 interval = tk->cycle_interval << shift;
-	u64 raw_nsecs;
+	u64 snsec_per_sec;
 
 	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
-	raw_nsecs = (u64)tk->raw_interval << shift;
-	raw_nsecs += tk->raw_time.tv_nsec;
-	if (raw_nsecs >= NSEC_PER_SEC) {
-		u64 raw_secs = raw_nsecs;
-		raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-		tk->raw_time.tv_sec += raw_secs;
+	tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+		tk->raw_time.tv_sec++;
 	}
-	tk->raw_time.tv_nsec = raw_nsecs;
+	tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+	tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
 	/* Accumulate error between NTP and clock interval */
 	tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
 				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5..bc364f8 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c

@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
 
 		__blk_add_trace(bt, bio->bi_iter.bi_sector,
 				bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-				BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
 				&rpdu);
 	}
 }
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
 	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
 			sizeof(r), &r);
 }
 

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 74fdfe9..b308be3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -4337,9 +4337,6 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
 
 	command = strsep(&next, ":");
 
-	if (WARN_ON_ONCE(!tr))
-		return -EINVAL;
-
 	mutex_lock(&ftrace_cmd_mutex);
 	list_for_each_entry(p, &ftrace_commands, list) {
 		if (strcmp(p->name, command) == 0) {
@@ -5063,7 +5060,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 	}
 
  out:
-	kfree(fgd->new_hash);
+	free_ftrace_hash(fgd->new_hash);
 	kfree(fgd);
 
 	return ret;

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1122f15..091e801 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -6881,6 +6881,9 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
 	char *number;
 	int ret;
 
+	if (!tr)
+		return -ENODEV;
+
 	/* hash funcs only work with set_ftrace_filter */
 	if (!enable)
 		return -EINVAL;

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a3bddbf..a0910c0 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c

@@ -654,6 +654,9 @@ ftrace_trace_onoff_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	/* we register both traceon and traceoff to this callback */
 	if (strcmp(cmd, "traceon") == 0)
 		ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
@@ -670,6 +673,9 @@ ftrace_stacktrace_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
 
 	return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd,
@@ -682,6 +688,9 @@ ftrace_dump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = &dump_probe_ops;
 
 	/* Only dump once. */
@@ -695,6 +704,9 @@ ftrace_cpudump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
 	struct ftrace_probe_ops *ops;
 
+	if (!tr)
+		return -ENODEV;
+
 	ops = &cpudump_probe_ops;
 
 	/* Only dump once. */

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c129fca..b53c8d3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -707,20 +707,16 @@ static int create_trace_kprobe(int argc, char **argv)
 		pr_info("Probe point is not specified.\n");
 		return -EINVAL;
 	}
-	if (isdigit(argv[1][0])) {
-		/* an address specified */
-		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
-		if (ret) {
-			pr_info("Failed to parse address.\n");
-			return ret;
-		}
-	} else {
+
+	/* try to parse an address. if that fails, try to read the
+	 * input as a symbol. */
+	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
 		/* a symbol specified */
 		symbol = argv[1];
 		/* TODO: support .init module functions */
 		ret = traceprobe_split_symbol_offset(symbol, &offset);
 		if (ret) {
-			pr_info("Failed to parse symbol.\n");
+			pr_info("Failed to parse either an address or a symbol.\n");
 			return ret;
 		}
 		if (offset && is_return &&

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 76aa04d..b4a751e 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c

@@ -409,7 +409,9 @@ static const struct file_operations stack_trace_fops = {
 static int
 stack_trace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+	struct ftrace_ops *ops = inode->i_private;
+
+	return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
 				 inode, file);
 }
 
@@ -476,7 +478,7 @@ static __init int stack_trace_init(void)
 			NULL, &stack_trace_fops);
 
 	trace_create_file("stack_trace_filter", 0444, d_tracer,
-			NULL, &stack_trace_filter_fops);
+			  &trace_ops, &stack_trace_filter_fops);
 
 	if (stack_trace_filter_buf[0])
 		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39..a86688fa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c

@@ -2864,11 +2864,11 @@ bool flush_work(struct work_struct *work)
 EXPORT_SYMBOL_GPL(flush_work);
 
 struct cwt_wait {
-	wait_queue_t		wait;
+	wait_queue_entry_t		wait;
 	struct work_struct	*work;
 };
 
-static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
 

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587eb..9c5d40a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -1052,6 +1052,7 @@
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select LOCKDEP
 	help
 	 This feature will check whether any held lock (spinlock, rwlock,
@@ -1067,6 +1068,7 @@
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	select TRACE_IRQFLAGS
 	default n
@@ -1121,6 +1123,7 @@
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
 	select DEBUG_LOCK_ALLOC
 	default n
 	help
@@ -1301,189 +1304,7 @@
 
 	  If unsure, say N.
 
-menu "RCU Debugging"
-
-config PROVE_RCU
-	def_bool PROVE_LOCKING
-
-config PROVE_RCU_REPEATEDLY
-	bool "RCU debugging: don't disable PROVE_RCU on first splat"
-	depends on PROVE_RCU
-	default n
-	help
-	 By itself, PROVE_RCU will disable checking upon issuing the
-	 first warning (or "splat").  This feature prevents such
-	 disabling, allowing multiple RCU-lockdep warnings to be printed
-	 on a single reboot.
-
-	 Say Y to allow multiple RCU-lockdep warnings per boot.
-
-	 Say N if you are unsure.
-
-config SPARSE_RCU_POINTER
-	bool "RCU debugging: sparse-based checks for pointer usage"
-	default n
-	help
-	 This feature enables the __rcu sparse annotation for
-	 RCU-protected pointers.  This annotation will cause sparse
-	 to flag any non-RCU used of annotated pointers.  This can be
-	 helpful when debugging RCU usage.  Please note that this feature
-	 is not intended to enforce code cleanliness; it is instead merely
-	 a debugging aid.
-
-	 Say Y to make sparse flag questionable use of RCU-protected pointers
-
-	 Say N if you are unsure.
-
-config TORTURE_TEST
-	tristate
-	default n
-
-config RCU_PERF_TEST
-	tristate "performance tests for RCU"
-	depends on DEBUG_KERNEL
-	select TORTURE_TEST
-	select SRCU
-	select TASKS_RCU
-	default n
-	help
-	  This option provides a kernel module that runs performance
-	  tests on the RCU infrastructure.  The kernel module may be built
-	  after the fact on the running kernel to be tested, if desired.
-
-	  Say Y here if you want RCU performance tests to be built into
-	  the kernel.
-	  Say M if you want the RCU performance tests to build as a module.
-	  Say N if you are unsure.
-
-config RCU_TORTURE_TEST
-	tristate "torture tests for RCU"
-	depends on DEBUG_KERNEL
-	select TORTURE_TEST
-	select SRCU
-	select TASKS_RCU
-	default n
-	help
-	  This option provides a kernel module that runs torture tests
-	  on the RCU infrastructure.  The kernel module may be built
-	  after the fact on the running kernel to be tested, if desired.
-
-	  Say Y here if you want RCU torture tests to be built into
-	  the kernel.
-	  Say M if you want the RCU torture tests to build as a module.
-	  Say N if you are unsure.
-
-config RCU_TORTURE_TEST_SLOW_PREINIT
-	bool "Slow down RCU grace-period pre-initialization to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period pre-initialization (the
-	  propagation of CPU-hotplug changes up the rcu_node combining
-	  tree) for a few jiffies between initializing each pair of
-	  consecutive rcu_node structures.  This helps to expose races
-	  involving grace-period pre-initialization, in other words, it
-	  makes your kernel less stable.  It can also greatly increase
-	  grace-period latency, especially on systems with large numbers
-	  of CPUs.  This is useful when torture-testing RCU, but in
-	  almost no other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
-	int "How much to slow down RCU grace-period pre-initialization"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_PREINIT
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure pre-initialization step.
-
-config RCU_TORTURE_TEST_SLOW_INIT
-	bool "Slow down RCU grace-period initialization to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period initialization for a few
-	  jiffies between initializing each pair of consecutive
-	  rcu_node structures.	This helps to expose races involving
-	  grace-period initialization, in other words, it makes your
-	  kernel less stable.  It can also greatly increase grace-period
-	  latency, especially on systems with large numbers of CPUs.
-	  This is useful when torture-testing RCU, but in almost no
-	  other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_INIT_DELAY
-	int "How much to slow down RCU grace-period initialization"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_INIT
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure initialization.
-
-config RCU_TORTURE_TEST_SLOW_CLEANUP
-	bool "Slow down RCU grace-period cleanup to expose races"
-	depends on RCU_TORTURE_TEST
-	help
-	  This option delays grace-period cleanup for a few jiffies
-	  between cleaning up each pair of consecutive rcu_node
-	  structures.  This helps to expose races involving grace-period
-	  cleanup, in other words, it makes your kernel less stable.
-	  It can also greatly increase grace-period latency, especially
-	  on systems with large numbers of CPUs.  This is useful when
-	  torture-testing RCU, but in almost no other circumstance.
-
-	  Say Y here if you want your system to crash and hang more often.
-	  Say N if you want a sane system.
-
-config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
-	int "How much to slow down RCU grace-period cleanup"
-	range 0 5
-	default 3
-	depends on RCU_TORTURE_TEST_SLOW_CLEANUP
-	help
-	  This option specifies the number of jiffies to wait between
-	  each rcu_node structure cleanup operation.
-
-config RCU_CPU_STALL_TIMEOUT
-	int "RCU CPU stall timeout in seconds"
-	depends on RCU_STALL_COMMON
-	range 3 300
-	default 21
-	help
-	  If a given RCU grace period extends more than the specified
-	  number of seconds, a CPU stall warning is printed.  If the
-	  RCU grace period persists, additional CPU stall warnings are
-	  printed at more widely spaced intervals.
-
-config RCU_TRACE
-	bool "Enable tracing for RCU"
-	depends on DEBUG_KERNEL
-	default y if TREE_RCU
-	select TRACE_CLOCK
-	help
-	  This option provides tracing in RCU which presents stats
-	  in debugfs for debugging RCU implementation.  It also enables
-	  additional tracepoints for ftrace-style event tracing.
-
-	  Say Y here if you want to enable RCU tracing
-	  Say N if you are unsure.
-
-config RCU_EQS_DEBUG
-	bool "Provide debugging asserts for adding NO_HZ support to an arch"
-	depends on DEBUG_KERNEL
-	help
-	  This option provides consistency checks in RCU's handling of
-	  NO_HZ.  These checks have proven quite helpful in detecting
-	  bugs in arch-specific NO_HZ code.
-
-	  Say N here if you need ultimate kernel/user switch latencies
-	  Say Y if you are unsure
-
-endmenu # "RCU Debugging"
+source "kernel/rcu/Kconfig.debug"
 
 config DEBUG_WQ_FORCE_RR_CPU
 	bool "Force round-robin CPU selection for unbound work items"

diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc..07fbe6a 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -25,9 +25,6 @@
 	 earlycpio.o seq_buf.o siphash.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
-CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
-CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
-
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o

diff --git a/lib/cmdline.c b/lib/cmdline.c
index 3c6432df..4c0888c 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c

@@ -23,14 +23,14 @@
  *	the values[M, M+1, ..., N] into the ints array in get_options.
  */
 
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
 {
 	int x, inc_counter, upper_range;
 
 	(*str)++;
 	upper_range = simple_strtol((*str), NULL, 0);
 	inc_counter = upper_range - *pint;
-	for (x = *pint; x < upper_range; x++)
+	for (x = *pint; n && x < upper_range; x++, n--)
 		*pint++ = x;
 	return inc_counter;
 }
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
 			break;
 		if (res == 3) {
 			int range_nums;
-			range_nums = get_range((char **)&str, ints + i);
+			range_nums = get_range((char **)&str, ints + i, nints - i);
 			if (range_nums < 0)
 				break;
 			/*

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 81dedaa..4731a08 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c

@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 }
 EXPORT_SYMBOL(cpumask_any_but);
 
+/**
+ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
+ * @n: the cpu prior to the place to search
+ * @mask: the cpumask pointer
+ * @start: the start point of the iteration
+ * @wrap: assume @n crossing @start terminates the iteration
+ *
+ * Returns >= nr_cpu_ids on completion
+ *
+ * Note: the @wrap argument is required for the start condition when
+ * we cannot assume @start is set in @mask.
+ */
+int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+	int next;
+
+again:
+	next = cpumask_next(n, mask);
+
+	if (wrap && n < start && next >= start) {
+		return nr_cpumask_bits;
+
+	} else if (next >= nr_cpumask_bits) {
+		wrap = true;
+		n = -1;
+		goto again;
+	}
+
+	return next;
+}
+EXPORT_SYMBOL(cpumask_next_wrap);
+
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 /**

diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 74a54b7..9f79547 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c

@@ -43,7 +43,7 @@ static struct crypto_shash *tfm;
 u32 crc32c(u32 crc, const void *address, unsigned int length)
 {
 	SHASH_DESC_ON_STACK(shash, tfm);
-	u32 *ctx = (u32 *)shash_desc_ctx(shash);
+	u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
 	int err;
 
 	shash->tfm = tfm;
@@ -53,7 +53,9 @@ u32 crc32c(u32 crc, const void *address, unsigned int length)
 	err = crypto_shash_update(shash, address, length);
 	BUG_ON(err);
 
-	return *ctx;
+	ret = *ctx;
+	barrier_data(ctx);
+	return ret;
 }
 
 EXPORT_SYMBOL(crc32c);

diff --git a/lib/locking-selftest-rtmutex.h b/lib/locking-selftest-rtmutex.h
new file mode 100644
index 0000000..e3cb839
--- /dev/null
+++ b/lib/locking-selftest-rtmutex.h

@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		RTL
+
+#undef UNLOCK
+#define UNLOCK		RTU
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		RTI

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index f3a217e..6f2b135 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c

@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
 #include <linux/irqflags.h>
+#include <linux/rtmutex.h>
 
 /*
  * Change this to 1 if you want to see the failure printouts:
@@ -46,6 +47,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
 #define LOCKTYPE_MUTEX	0x4
 #define LOCKTYPE_RWSEM	0x8
 #define LOCKTYPE_WW	0x10
+#define LOCKTYPE_RTMUTEX 0x20
 
 static struct ww_acquire_ctx t, t2;
 static struct ww_mutex o, o2, o3;
@@ -74,6 +76,15 @@ static DECLARE_RWSEM(rwsem_B);
 static DECLARE_RWSEM(rwsem_C);
 static DECLARE_RWSEM(rwsem_D);
 
+#ifdef CONFIG_RT_MUTEXES
+
+static DEFINE_RT_MUTEX(rtmutex_A);
+static DEFINE_RT_MUTEX(rtmutex_B);
+static DEFINE_RT_MUTEX(rtmutex_C);
+static DEFINE_RT_MUTEX(rtmutex_D);
+
+#endif
+
 /*
  * Locks that we initialize dynamically as well so that
  * e.g. X1 and X2 becomes two instances of the same class,
@@ -108,6 +119,17 @@ static DECLARE_RWSEM(rwsem_Y2);
 static DECLARE_RWSEM(rwsem_Z1);
 static DECLARE_RWSEM(rwsem_Z2);
 
+#ifdef CONFIG_RT_MUTEXES
+
+static DEFINE_RT_MUTEX(rtmutex_X1);
+static DEFINE_RT_MUTEX(rtmutex_X2);
+static DEFINE_RT_MUTEX(rtmutex_Y1);
+static DEFINE_RT_MUTEX(rtmutex_Y2);
+static DEFINE_RT_MUTEX(rtmutex_Z1);
+static DEFINE_RT_MUTEX(rtmutex_Z2);
+
+#endif
+
 /*
  * non-inlined runtime initializers, to let separate locks share
  * the same lock-class:
@@ -129,6 +151,17 @@ INIT_CLASS_FUNC(Z)
 
 static void init_shared_classes(void)
 {
+#ifdef CONFIG_RT_MUTEXES
+	static struct lock_class_key rt_X, rt_Y, rt_Z;
+
+	__rt_mutex_init(&rtmutex_X1, __func__, &rt_X);
+	__rt_mutex_init(&rtmutex_X2, __func__, &rt_X);
+	__rt_mutex_init(&rtmutex_Y1, __func__, &rt_Y);
+	__rt_mutex_init(&rtmutex_Y2, __func__, &rt_Y);
+	__rt_mutex_init(&rtmutex_Z1, __func__, &rt_Z);
+	__rt_mutex_init(&rtmutex_Z2, __func__, &rt_Z);
+#endif
+
 	init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
 	init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
 
@@ -193,6 +226,10 @@ static void init_shared_classes(void)
 #define MU(x)			mutex_unlock(&mutex_##x)
 #define MI(x)			mutex_init(&mutex_##x)
 
+#define RTL(x)			rt_mutex_lock(&rtmutex_##x)
+#define RTU(x)			rt_mutex_unlock(&rtmutex_##x)
+#define RTI(x)			rt_mutex_init(&rtmutex_##x)
+
 #define WSL(x)			down_write(&rwsem_##x)
 #define WSU(x)			up_write(&rwsem_##x)
 
@@ -264,6 +301,11 @@ GENERATE_TESTCASE(AA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(AA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(AA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -345,6 +387,11 @@ GENERATE_TESTCASE(ABBA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -373,6 +420,11 @@ GENERATE_TESTCASE(ABBCCA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBCCA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBCCA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -401,6 +453,11 @@ GENERATE_TESTCASE(ABCABC_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCABC_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCABC_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -430,6 +487,11 @@ GENERATE_TESTCASE(ABBCCDDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABBCCDDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABBCCDDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -458,6 +520,11 @@ GENERATE_TESTCASE(ABCDBDDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCDBDDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCDBDDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -486,6 +553,11 @@ GENERATE_TESTCASE(ABCDBCDA_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(ABCDBCDA_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(ABCDBCDA_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -513,33 +585,10 @@ GENERATE_TESTCASE(double_unlock_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(double_unlock_rsem)
 
-#undef E
-
-/*
- * Bad unlock ordering:
- */
-#define E()					\
-						\
-	LOCK(A);				\
-	LOCK(B);				\
-	UNLOCK(A); /* fail */			\
-	UNLOCK(B);
-
-/*
- * 6 testcases:
- */
-#include "locking-selftest-spin.h"
-GENERATE_TESTCASE(bad_unlock_order_spin)
-#include "locking-selftest-wlock.h"
-GENERATE_TESTCASE(bad_unlock_order_wlock)
-#include "locking-selftest-rlock.h"
-GENERATE_TESTCASE(bad_unlock_order_rlock)
-#include "locking-selftest-mutex.h"
-GENERATE_TESTCASE(bad_unlock_order_mutex)
-#include "locking-selftest-wsem.h"
-GENERATE_TESTCASE(bad_unlock_order_wsem)
-#include "locking-selftest-rsem.h"
-GENERATE_TESTCASE(bad_unlock_order_rsem)
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(double_unlock_rtmutex);
+#endif
 
 #undef E
 
@@ -567,6 +616,11 @@ GENERATE_TESTCASE(init_held_wsem)
 #include "locking-selftest-rsem.h"
 GENERATE_TESTCASE(init_held_rsem)
 
+#ifdef CONFIG_RT_MUTEXES
+#include "locking-selftest-rtmutex.h"
+GENERATE_TESTCASE(init_held_rtmutex);
+#endif
+
 #undef E
 
 /*
@@ -916,6 +970,9 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 # define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
 # define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
 # define I_WW(x)	lockdep_reset_lock(&x.dep_map)
+#ifdef CONFIG_RT_MUTEXES
+# define I_RTMUTEX(x)	lockdep_reset_lock(&rtmutex_##x.dep_map)
+#endif
 #else
 # define I_SPINLOCK(x)
 # define I_RWLOCK(x)
@@ -924,12 +981,23 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 # define I_WW(x)
 #endif
 
+#ifndef I_RTMUTEX
+# define I_RTMUTEX(x)
+#endif
+
+#ifdef CONFIG_RT_MUTEXES
+#define I2_RTMUTEX(x)	rt_mutex_init(&rtmutex_##x)
+#else
+#define I2_RTMUTEX(x)
+#endif
+
 #define I1(x)					\
 	do {					\
 		I_SPINLOCK(x);			\
 		I_RWLOCK(x);			\
 		I_MUTEX(x);			\
 		I_RWSEM(x);			\
+		I_RTMUTEX(x);			\
 	} while (0)
 
 #define I2(x)					\
@@ -938,6 +1006,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 		rwlock_init(&rwlock_##x);	\
 		mutex_init(&mutex_##x);		\
 		init_rwsem(&rwsem_##x);		\
+		I2_RTMUTEX(x);			\
 	} while (0)
 
 static void reset_locks(void)
@@ -1013,6 +1082,12 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 	reset_locks();
 }
 
+#ifdef CONFIG_RT_MUTEXES
+#define dotest_rt(fn, e, m)	dotest((fn), (e), (m))
+#else
+#define dotest_rt(fn, e, m)
+#endif
+
 static inline void print_testname(const char *testname)
 {
 	printk("%33s:", testname);
@@ -1050,6 +1125,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 #define DO_TESTCASE_6_SUCCESS(desc, name)			\
@@ -1060,6 +1136,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, SUCCESS, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 /*
@@ -1073,6 +1150,7 @@ static inline void print_testname(const char *testname)
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX);	\
 	pr_cont("\n");
 
 #define DO_TESTCASE_2I(desc, name, nr)				\
@@ -1825,7 +1903,6 @@ void locking_selftest(void)
 	DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
 	DO_TESTCASE_6("double unlock", double_unlock);
 	DO_TESTCASE_6("initialize held", init_held);
-	DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
 
 	printk("  --------------------------------------------------------------------------\n");
 	print_testname("recursive read-lock");

diff --git a/lib/refcount.c b/lib/refcount.c
index 9f90678..5d0582a 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c

@@ -37,6 +37,8 @@
 #include <linux/refcount.h>
 #include <linux/bug.h>
 
+#ifdef CONFIG_REFCOUNT_FULL
+
 /**
  * refcount_add_not_zero - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
@@ -225,6 +227,7 @@ void refcount_dec(refcount_t *r)
 	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL(refcount_dec);
+#endif /* CONFIG_REFCOUNT_FULL */
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf822..be7b4dd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c

@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
 }
 EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl:		 The SG list
+ * @nents:		 Number of SG entries
+ * @buflen:		 The number of bytes to zero out
+ * @skip:		 Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+		       size_t buflen, off_t skip)
+{
+	unsigned int offset = 0;
+	struct sg_mapping_iter miter;
+	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+	sg_miter_start(&miter, sgl, nents, sg_flags);
+
+	if (!sg_miter_skip(&miter, skip))
+		return false;
+
+	while (offset < buflen && sg_miter_next(&miter)) {
+		unsigned int len;
+
+		len = min(miter.length, buflen - offset);
+		memset(miter.addr, 0, len);
+
+		offset += len;
+	}
+
+	sg_miter_stop(&miter);
+	return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);

diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 690d75b..2fb007b 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c

@@ -28,7 +28,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	/*
 	 * It is valid to assume CPU-locality during early bootup:
 	 */
-	if (system_state != SYSTEM_RUNNING)
+	if (system_state < SYSTEM_SCHEDULING)
 		goto out;
 
 	/*

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 889bc31..be88cba 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c

@@ -4504,6 +4504,44 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 6),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 1),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSGE_K: Signed jump: value walk 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -3),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_JMP_IMM(BPF_JSGE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_K */
 	{
 		"JMP_JGT_K: if (3 > 2) return 1",

diff --git a/lib/test_uuid.c b/lib/test_uuid.c
index 547d312..478c049 100644
--- a/lib/test_uuid.c
+++ b/lib/test_uuid.c

@@ -11,25 +11,25 @@
 
 struct test_uuid_data {
 	const char *uuid;
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 };
 
 static const struct test_uuid_data test_uuid_test_data[] = {
 	{
 		.uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
-		.le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-		.be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+		.be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
 	},
 	{
 		.uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
-		.le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-		.be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+		.be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
 	},
 	{
 		.uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
-		.le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-		.be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+		.be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
 	},
 };
 
@@ -61,28 +61,28 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
 
 static void __init test_uuid_test(const struct test_uuid_data *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 	char buf[48];
 
 	/* LE */
 	total_tests++;
-	if (uuid_le_to_bin(data->uuid, &le))
+	if (guid_parse(data->uuid, &le))
 		test_uuid_failed("conversion", false, false, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_le_cmp(data->le, le)) {
+	if (!guid_equal(&data->le, &le)) {
 		sprintf(buf, "%pUl", &le);
 		test_uuid_failed("cmp", false, false, data->uuid, buf);
 	}
 
 	/* BE */
 	total_tests++;
-	if (uuid_be_to_bin(data->uuid, &be))
+	if (uuid_parse(data->uuid, &be))
 		test_uuid_failed("conversion", false, true, data->uuid, NULL);
 
 	total_tests++;
-	if (uuid_be_cmp(data->be, be)) {
+	if (uuid_equal(&data->be, &be)) {
 		sprintf(buf, "%pUb", &be);
 		test_uuid_failed("cmp", false, true, data->uuid, buf);
 	}
@@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 
 static void __init test_uuid_wrong(const char *data)
 {
-	uuid_le le;
-	uuid_be be;
+	guid_t le;
+	uuid_t be;
 
 	/* LE */
 	total_tests++;
-	if (!uuid_le_to_bin(data, &le))
+	if (!guid_parse(data, &le))
 		test_uuid_failed("negative", true, false, data, NULL);
 
 	/* BE */
 	total_tests++;
-	if (!uuid_be_to_bin(data, &be))
+	if (!uuid_parse(data, &be))
 		test_uuid_failed("negative", true, true, data, NULL);
 }
 

diff --git a/lib/uuid.c b/lib/uuid.c
index 37687af..680b9fb 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c

@@ -21,10 +21,13 @@
 #include <linux/uuid.h>
 #include <linux/random.h>
 
-const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_le_index);
-const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_be_index);
+const guid_t guid_null;
+EXPORT_SYMBOL(guid_null);
+const uuid_t uuid_null;
+EXPORT_SYMBOL(uuid_null);
+
+const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
 /***************************************************************
  * Random UUID interface
@@ -53,21 +56,21 @@ static void __uuid_gen_common(__u8 b[16])
 	b[8] = (b[8] & 0x3F) | 0x80;
 }
 
-void uuid_le_gen(uuid_le *lu)
+void guid_gen(guid_t *lu)
 {
 	__uuid_gen_common(lu->b);
 	/* version 4 : random generation */
 	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_le_gen);
+EXPORT_SYMBOL_GPL(guid_gen);
 
-void uuid_be_gen(uuid_be *bu)
+void uuid_gen(uuid_t *bu)
 {
 	__uuid_gen_common(bu->b);
 	/* version 4 : random generation */
 	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_be_gen);
+EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
   * uuid_is_valid - checks if UUID string valid
@@ -97,7 +100,7 @@ bool uuid_is_valid(const char *uuid)
 }
 EXPORT_SYMBOL(uuid_is_valid);
 
-static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16])
 {
 	static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
 	unsigned int i;
@@ -115,14 +118,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
 	return 0;
 }
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u)
+int guid_parse(const char *uuid, guid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_le_index);
+	return __uuid_parse(uuid, u->b, guid_index);
 }
-EXPORT_SYMBOL(uuid_le_to_bin);
+EXPORT_SYMBOL(guid_parse);
 
-int uuid_be_to_bin(const char *uuid, uuid_be *u)
+int uuid_parse(const char *uuid, uuid_t *u)
 {
-	return __uuid_to_bin(uuid, u->b, uuid_be_index);
+	return __uuid_parse(uuid, u->b, uuid_index);
 }
-EXPORT_SYMBOL(uuid_be_to_bin);
+EXPORT_SYMBOL(uuid_parse);

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2d41de3..9f37d62 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c

@@ -1308,14 +1308,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	char uuid[UUID_STRING_LEN + 1];
 	char *p = uuid;
 	int i;
-	const u8 *index = uuid_be_index;
+	const u8 *index = uuid_index;
 	bool uc = false;
 
 	switch (*(++fmt)) {
 	case 'L':
 		uc = true;		/* fall-through */
 	case 'l':
-		index = uuid_le_index;
+		index = guid_index;
 		break;
 	case 'B':
 		uc = true;

diff --git a/mm/cleancache.c b/mm/cleancache.c
index ba5d8f3..f7b9fdc 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c

@@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb)
 	int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
 
 	if (cleancache_ops) {
-		pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
+		pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
 		if (pool_id < 0)
 			pool_id = CLEANCACHE_NO_POOL;
 	}

diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be57..aea58e9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+			   loff_t start_byte, loff_t end_byte)
+{
+	pgoff_t index = start_byte >> PAGE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_SHIFT;
+	struct pagevec pvec;
+	bool ret;
+
+	if (end_byte < start_byte)
+		return false;
+
+	if (mapping->nrpages == 0)
+		return false;
+
+	pagevec_init(&pvec, 0);
+	if (!pagevec_lookup(&pvec, mapping, index, 1))
+		return false;
+	ret = (pvec.pages[0]->index <= end);
+	pagevec_release(&pvec);
+	return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
@@ -768,10 +800,10 @@ struct wait_page_key {
 struct wait_page_queue {
 	struct page *page;
 	int bit_nr;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 };
 
-static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
 	struct wait_page_key *key = arg;
 	struct wait_page_queue *wait_page
@@ -834,7 +866,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 		struct page *page, int bit_nr, int state, bool lock)
 {
 	struct wait_page_queue wait_page;
-	wait_queue_t *wait = &wait_page.wait;
+	wait_queue_entry_t *wait = &wait_page.wait;
 	int ret = 0;
 
 	init_wait(wait);
@@ -845,9 +877,9 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	for (;;) {
 		spin_lock_irq(&q->lock);
 
-		if (likely(list_empty(&wait->task_list))) {
+		if (likely(list_empty(&wait->entry))) {
 			if (lock)
-				__add_wait_queue_tail_exclusive(q, wait);
+				__add_wait_queue_entry_tail_exclusive(q, wait);
 			else
 				__add_wait_queue(q, wait);
 			SetPageWaiters(page);
@@ -907,7 +939,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
  *
  * Add an arbitrary @waiter to the wait queue for the nominated @page.
  */
-void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
+void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
 {
 	wait_queue_head_t *q = page_waitqueue(page);
 	unsigned long flags;
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		loff_t size;
 
 		size = i_size_read(inode);
-		retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-					iocb->ki_pos + count - 1);
-		if (retval < 0)
-			goto out;
+		if (iocb->ki_flags & IOCB_NOWAIT) {
+			if (filemap_range_has_page(mapping, iocb->ki_pos,
+						   iocb->ki_pos + count - 1))
+				return -EAGAIN;
+		} else {
+			retval = filemap_write_and_wait_range(mapping,
+						iocb->ki_pos,
+					        iocb->ki_pos + count - 1);
+			if (retval < 0)
+				goto out;
+		}
 
 		file_accessed(file);
 
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 
 	pos = iocb->ki_pos;
 
+	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+		return -EINVAL;
+
 	if (limit != RLIM_INFINITY) {
 		if (iocb->ki_pos >= limit) {
 			send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
-	if (written)
-		goto out;
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		/* If there are pages to writeback, return */
+		if (filemap_range_has_page(inode->i_mapping, pos,
+					   pos + iov_iter_count(from)))
+			return -EAGAIN;
+	} else {
+		written = filemap_write_and_wait_range(mapping, pos,
+							pos + write_len - 1);
+		if (written)
+			goto out;
+	}
 
 	/*
 	 * After a write we want buffered reads to be sure to go to disk to get

diff --git a/mm/gup.c b/mm/gup.c
index d9e6fdd..576c4df 100644
--- a/mm/gup.c
+++ b/mm/gup.c

@@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
 		return -ENOENT;
-	/* For mm_populate(), just skip the stack guard page. */
-	if ((*flags & FOLL_POPULATE) &&
-			(stack_guard_page_start(vma, address) ||
-			 stack_guard_page_end(vma, address + PAGE_SIZE)))
-		return -ENOENT;
 	if (*flags & FOLL_WRITE)
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
@@ -407,12 +402,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 
 	ret = handle_mm_fault(vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, *flags);
+
+		if (err)
+			return err;
 		BUG();
 	}
 
@@ -723,12 +716,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 	ret = handle_mm_fault(vma, address, fault_flags);
 	major |= ret & VM_FAULT_MAJOR;
 	if (ret & VM_FAULT_ERROR) {
-		if (ret & VM_FAULT_OOM)
-			return -ENOMEM;
-		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
-			return -EHWPOISON;
-		if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
-			return -EFAULT;
+		int err = vm_fault_to_errno(ret, 0);
+
+		if (err)
+			return err;
 		BUG();
 	}
 

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a84909c..88c6167 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -1426,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	 */
 	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
 		page = pmd_page(*vmf->pmd);
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
+		put_page(page);
 		goto out;
 	}
 
@@ -1459,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
+		page_nid = -1;
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
-		page_nid = -1;
+		put_page(page);
 		goto out;
 	}
 

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e582887..3eedb18 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			}
 			ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
 			if (ret & VM_FAULT_ERROR) {
+				int err = vm_fault_to_errno(ret, flags);
+
+				if (err)
+					return err;
+
 				remainder = 0;
 				break;
 			}

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 945fd1c..df4ebdb 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c

@@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 			spin_unlock(ptl);
 			free_page_and_swap_cache(src_page);
 		}
-		cond_resched();
 	}
 }
 

diff --git a/mm/ksm.c b/mm/ksm.c
index d9fc0e456..216184a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c

@@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
 		goto out;
 
 	if (PageTransCompound(page)) {
-		err = split_huge_page(page);
-		if (err)
+		if (split_huge_page(page))
 			goto out_unlock;
 	}
 

diff --git a/mm/memblock.c b/mm/memblock.c
index b049c9b..7b8a5db 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c

@@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 	}
 }
 
+extern unsigned long __init_memblock
+memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+	struct memblock_region *rgn;
+	unsigned long size = 0;
+	int idx;
+
+	for_each_memblock_type((&memblock.reserved), rgn) {
+		phys_addr_t start, end;
+
+		if (rgn->base + rgn->size < start_addr)
+			continue;
+		if (rgn->base > end_addr)
+			continue;
+
+		start = rgn->base;
+		end = start + rgn->size;
+		size += end - start;
+	}
+
+	return size;
+}
+
 void __init_memblock __memblock_dump_all(void)
 {
 	pr_info("MEMBLOCK configuration:\n");

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9417208..d75b38b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c

@@ -170,7 +170,7 @@ struct mem_cgroup_event {
 	 */
 	poll_table pt;
 	wait_queue_head_t *wqh;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	struct work_struct remove;
 };
 
@@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
 	struct mem_cgroup *memcg;
-	wait_queue_t	wait;
+	wait_queue_entry_t	wait;
 };
 
-static int memcg_oom_wake_function(wait_queue_t *wait,
+static int memcg_oom_wake_function(wait_queue_entry_t *wait,
 	unsigned mode, int sync, void *arg)
 {
 	struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
@@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
-	INIT_LIST_HEAD(&owait.wait.task_list);
+	INIT_LIST_HEAD(&owait.wait.entry);
 
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	mem_cgroup_mark_under_oom(memcg);
@@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work)
  *
  * Called with wqh->lock held and interrupts disabled.
  */
-static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
+static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode,
 			    int sync, void *key)
 {
 	struct mem_cgroup_event *event =

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2527dfe..ecc183f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c

@@ -1184,7 +1184,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 * page_remove_rmap() in try_to_unmap_one(). So to determine page status
 	 * correctly, we save a copy of the page flags at this time.
 	 */
-	page_flags = p->flags;
+	if (PageHuge(p))
+		page_flags = hpage->flags;
+	else
+		page_flags = p->flags;
 
 	/*
 	 * unpoison always clear PG_hwpoison inside page lock
@@ -1595,12 +1598,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	if (ret) {
 		pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
 			pfn, ret, page->flags, &page->flags);
-		/*
-		 * We know that soft_offline_huge_page() tries to migrate
-		 * only one hugepage pointed to by hpage, so we need not
-		 * run through the pagelist here.
-		 */
-		putback_active_hugepage(hpage);
+		if (!list_empty(&pagelist))
+			putback_movable_pages(&pagelist);
 		if (ret > 0)
 			ret = -EIO;
 	} else {

diff --git a/mm/memory.c b/mm/memory.c
index 6ff5d72..bb11c47 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -2855,40 +2855,6 @@ int do_swap_page(struct vm_fault *vmf)
 }
 
 /*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
-	address &= PAGE_MASK;
-	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
-		struct vm_area_struct *prev = vma->vm_prev;
-
-		/*
-		 * Is there a mapping abutting this one below?
-		 *
-		 * That's only ok if it's the same stack mapping
-		 * that has gotten split..
-		 */
-		if (prev && prev->vm_end == address)
-			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
-		return expand_downwards(vma, address - PAGE_SIZE);
-	}
-	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
-		struct vm_area_struct *next = vma->vm_next;
-
-		/* As VM_GROWSDOWN but s/below/above/ */
-		if (next && next->vm_start == address + PAGE_SIZE)
-			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
-		return expand_upwards(vma, address + PAGE_SIZE);
-	}
-	return 0;
-}
-
-/*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
 	if (vma->vm_flags & VM_SHARED)
 		return VM_FAULT_SIGBUS;
 
-	/* Check if we need to add a guard page to the stack */
-	if (check_stack_guard_page(vma, vmf->address) < 0)
-		return VM_FAULT_SIGSEGV;
-
 	/*
 	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
 	 * pte_offset_map() on pmds where a huge pmd might be created
@@ -3029,6 +2991,17 @@ static int __do_fault(struct vm_fault *vmf)
 	return ret;
 }
 
+/*
+ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
+ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
+ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
+ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
+ */
+static int pmd_devmap_trans_unstable(pmd_t *pmd)
+{
+	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
+}
+
 static int pte_alloc_one_map(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -3052,18 +3025,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
 map_pte:
 	/*
 	 * If a huge pmd materialized under us just retry later.  Use
-	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
-	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
-	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
-	 * in a different thread of this mm, in turn leading to a misleading
-	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
-	 * regular pmd that we can walk with pte_offset_map() and we can do that
-	 * through an atomic read in C, which is what pmd_trans_unstable()
-	 * provides.
+	 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
+	 * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
+	 * under us and then back to pmd_none, as a result of MADV_DONTNEED
+	 * running immediately after a huge pmd fault in a different thread of
+	 * this mm, in turn leading to a misleading pmd_trans_huge() retval.
+	 * All we have to ensure is that it is a regular pmd that we can walk
+	 * with pte_offset_map() and we can do that through an atomic read in
+	 * C, which is what pmd_trans_unstable() provides.
 	 */
-	if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+	if (pmd_devmap_trans_unstable(vmf->pmd))
 		return VM_FAULT_NOPAGE;
 
+	/*
+	 * At this point we know that our vmf->pmd points to a page of ptes
+	 * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
+	 * for the duration of the fault.  If a racing MADV_DONTNEED runs and
+	 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
+	 * be valid and we will re-check to make sure the vmf->pte isn't
+	 * pte_none() under vmf->ptl protection when we return to
+	 * alloc_set_pte().
+	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
 	return 0;
@@ -3690,7 +3672,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
 		vmf->pte = NULL;
 	} else {
 		/* See comment in pte_alloc_one_map() */
-		if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd))
+		if (pmd_devmap_trans_unstable(vmf->pmd))
 			return 0;
 		/*
 		 * A regular pmd is established and it can't morph into a huge

diff --git a/mm/mempool.c b/mm/mempool.c
index 47a659d..1c02948 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c

@@ -312,7 +312,7 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);

diff --git a/mm/mlock.c b/mm/mlock.c
index c483c5c..b562b55 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c

@@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 {
 	int i;
 	int nr = pagevec_count(pvec);
-	int delta_munlocked;
+	int delta_munlocked = -nr;
 	struct pagevec pvec_putback;
 	int pgrescued = 0;
 
@@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 				continue;
 			else
 				__munlock_isolation_failed(page);
+		} else {
+			delta_munlocked++;
 		}
 
 		/*
@@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 		pagevec_add(&pvec_putback, pvec->pages[i]);
 		pvec->pages[i] = NULL;
 	}
-	delta_munlocked = -nr + pagevec_count(&pvec_putback);
 	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
 	spin_unlock_irq(zone_lru_lock(zone));
 

diff --git a/mm/mmap.c b/mm/mmap.c
index f82741e..a5e3dcd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	unsigned long retval;
 	unsigned long newbrk, oldbrk;
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *next;
 	unsigned long min_brk;
 	bool populate;
 	LIST_HEAD(uf);
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	}
 
 	/* Check against existing mmap mappings. */
-	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+	next = find_vma(mm, oldbrk);
+	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
 		goto out;
 
 	/* Ok, looks good - let it rip. */
@@ -253,10 +255,22 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
-	unsigned long max, subtree_gap;
-	max = vma->vm_start;
-	if (vma->vm_prev)
-		max -= vma->vm_prev->vm_end;
+	unsigned long max, prev_end, subtree_gap;
+
+	/*
+	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+	 * allow two stack_guard_gaps between them here, and when choosing
+	 * an unmapped area; whereas when expanding we only require one.
+	 * That's a little inconsistent, but keeps the code here simpler.
+	 */
+	max = vm_start_gap(vma);
+	if (vma->vm_prev) {
+		prev_end = vm_end_gap(vma->vm_prev);
+		if (max > prev_end)
+			max -= prev_end;
+		else
+			max = 0;
+	}
 	if (vma->vm_rb.rb_left) {
 		subtree_gap = rb_entry(vma->vm_rb.rb_left,
 				struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm)
 			anon_vma_unlock_read(anon_vma);
 		}
 
-		highest_address = vma->vm_end;
+		highest_address = vm_end_gap(vma);
 		vma = vma->vm_next;
 		i++;
 	}
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (vma->vm_next)
 		vma_gap_update(vma->vm_next);
 	else
-		mm->highest_vm_end = vma->vm_end;
+		mm->highest_vm_end = vm_end_gap(vma);
 
 	/*
 	 * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -856,7 +870,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			vma_gap_update(vma);
 		if (end_changed) {
 			if (!next)
-				mm->highest_vm_end = end;
+				mm->highest_vm_end = vm_end_gap(vma);
 			else if (!adjust_next)
 				vma_gap_update(next);
 		}
@@ -941,7 +955,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			 * mm->highest_vm_end doesn't need any update
 			 * in remove_next == 1 case.
 			 */
-			VM_WARN_ON(mm->highest_vm_end != end);
+			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
 		}
 	}
 	if (insert && file)
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit left subtree if it looks promising */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
 			struct vm_area_struct *left =
 				rb_entry(vma->vm_rb.rb_left,
@@ -1798,12 +1812,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 			}
 		}
 
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 check_current:
 		/* Check if current node has a suitable gap */
 		if (gap_start > high_limit)
 			return -ENOMEM;
-		if (gap_end >= low_limit && gap_end - gap_start >= length)
+		if (gap_end >= low_limit &&
+		    gap_end > gap_start && gap_end - gap_start >= length)
 			goto found;
 
 		/* Visit right subtree if it looks promising */
@@ -1825,8 +1840,8 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 			vma = rb_entry(rb_parent(prev),
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_left) {
-				gap_start = vma->vm_prev->vm_end;
-				gap_end = vma->vm_start;
+				gap_start = vm_end_gap(vma->vm_prev);
+				gap_end = vm_start_gap(vma);
 				goto check_current;
 			}
 		}
@@ -1890,7 +1905,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 	while (true) {
 		/* Visit right subtree if it looks promising */
-		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
 			struct vm_area_struct *right =
 				rb_entry(vma->vm_rb.rb_right,
@@ -1903,10 +1918,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 check_current:
 		/* Check if current node has a suitable gap */
-		gap_end = vma->vm_start;
+		gap_end = vm_start_gap(vma);
 		if (gap_end < low_limit)
 			return -ENOMEM;
-		if (gap_start <= high_limit && gap_end - gap_start >= length)
+		if (gap_start <= high_limit &&
+		    gap_end > gap_start && gap_end - gap_start >= length)
 			goto found;
 
 		/* Visit left subtree if it looks promising */
@@ -1929,7 +1945,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 				       struct vm_area_struct, vm_rb);
 			if (prev == vma->vm_rb.rb_right) {
 				gap_start = vma->vm_prev ?
-					vma->vm_prev->vm_end : 0;
+					vm_end_gap(vma->vm_prev) : 0;
 				goto check_current;
 			}
 		}
@@ -1967,7 +1983,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct vm_unmapped_area_info info;
 
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -1978,9 +1994,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2003,7 +2020,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			  const unsigned long len, const unsigned long pgoff,
 			  const unsigned long flags)
 {
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
@@ -2018,9 +2035,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	/* requesting a specific address */
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
+		vma = find_vma_prev(mm, addr, &prev);
 		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-				(!vma || addr + len <= vma->vm_start))
+				(!vma || addr + len <= vm_start_gap(vma)) &&
+				(!prev || addr >= vm_end_gap(prev)))
 			return addr;
 	}
 
@@ -2155,21 +2173,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+			     unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct rlimit *rlim = current->signal->rlim;
-	unsigned long new_start, actual_size;
+	unsigned long new_start;
 
 	/* address space limit tests */
 	if (!may_expand_vm(mm, vma->vm_flags, grow))
 		return -ENOMEM;
 
 	/* Stack limit test */
-	actual_size = size;
-	if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-		actual_size -= PAGE_SIZE;
-	if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -2207,16 +2223,32 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *next;
+	unsigned long gap_addr;
 	int error = 0;
 
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
-	/* Guard against wrapping around to address 0. */
-	if (address < PAGE_ALIGN(address+4))
-		address = PAGE_ALIGN(address+4);
-	else
+	/* Guard against exceeding limits of the address space. */
+	address &= PAGE_MASK;
+	if (address >= TASK_SIZE)
 		return -ENOMEM;
+	address += PAGE_SIZE;
+
+	/* Enforce stack_guard_gap */
+	gap_addr = address + stack_guard_gap;
+
+	/* Guard against overflow */
+	if (gap_addr < address || gap_addr > TASK_SIZE)
+		gap_addr = TASK_SIZE;
+
+	next = vma->vm_next;
+	if (next && next->vm_start < gap_addr) {
+		if (!(next->vm_flags & VM_GROWSUP))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
 
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
@@ -2261,7 +2293,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				if (vma->vm_next)
 					vma_gap_update(vma->vm_next);
 				else
-					mm->highest_vm_end = address;
+					mm->highest_vm_end = vm_end_gap(vma);
 				spin_unlock(&mm->page_table_lock);
 
 				perf_event_mmap(vma);
@@ -2282,6 +2314,8 @@ int expand_downwards(struct vm_area_struct *vma,
 				   unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *prev;
+	unsigned long gap_addr;
 	int error;
 
 	address &= PAGE_MASK;
@@ -2289,6 +2323,17 @@ int expand_downwards(struct vm_area_struct *vma,
 	if (error)
 		return error;
 
+	/* Enforce stack_guard_gap */
+	gap_addr = address - stack_guard_gap;
+	if (gap_addr > address)
+		return -ENOMEM;
+	prev = vma->vm_prev;
+	if (prev && prev->vm_end > gap_addr) {
+		if (!(prev->vm_flags & VM_GROWSDOWN))
+			return -ENOMEM;
+		/* Check that both stack segments have the same anon_vma? */
+	}
+
 	/* We must make sure the anon_vma is allocated. */
 	if (unlikely(anon_vma_prepare(vma)))
 		return -ENOMEM;
@@ -2343,28 +2388,25 @@ int expand_downwards(struct vm_area_struct *vma,
 	return error;
 }
 
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+	unsigned long val;
+	char *endptr;
+
+	val = simple_strtoul(p, &endptr, 10);
+	if (!*endptr)
+		stack_guard_gap = val << PAGE_SHIFT;
+
+	return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *next;
-
-	address &= PAGE_MASK;
-	next = vma->vm_next;
-	if (next && next->vm_start == address + PAGE_SIZE) {
-		if (!(next->vm_flags & VM_GROWSUP))
-			return -ENOMEM;
-	}
 	return expand_upwards(vma, address);
 }
 
@@ -2386,14 +2428,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 #else
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-	struct vm_area_struct *prev;
-
-	address &= PAGE_MASK;
-	prev = vma->vm_prev;
-	if (prev && prev->vm_end == address) {
-		if (!(prev->vm_flags & VM_GROWSDOWN))
-			return -ENOMEM;
-	}
 	return expand_downwards(vma, address);
 }
 
@@ -2491,7 +2525,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 		vma->vm_prev = prev;
 		vma_gap_update(vma);
 	} else
-		mm->highest_vm_end = prev ? prev->vm_end : 0;
+		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
 	tail_vma->vm_next = NULL;
 
 	/* Kill the cache */

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f9e450c..2302f25 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly;
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static inline void reset_deferred_meminit(pg_data_t *pgdat)
 {
+	unsigned long max_initialise;
+	unsigned long reserved_lowmem;
+
+	/*
+	 * Initialise at least 2G of a node but also take into account that
+	 * two large system hashes that can take up 1GB for 0.25TB/node.
+	 */
+	max_initialise = max(2UL << (30 - PAGE_SHIFT),
+		(pgdat->node_spanned_pages >> 8));
+
+	/*
+	 * Compensate the all the memblock reservations (e.g. crash kernel)
+	 * from the initial estimation to make sure we will initialize enough
+	 * memory to boot.
+	 */
+	reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
+			pgdat->node_start_pfn + max_initialise);
+	max_initialise += reserved_lowmem;
+
+	pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
 	pgdat->first_deferred_pfn = ULONG_MAX;
 }
 
@@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 				unsigned long pfn, unsigned long zone_end,
 				unsigned long *nr_initialised)
 {
-	unsigned long max_initialise;
-
 	/* Always populate low zones for address-contrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
-	/*
-	 * Initialise at least 2G of a node but also take into account that
-	 * two large system hashes that can take up 1GB for 0.25TB/node.
-	 */
-	max_initialise = max(2UL << (30 - PAGE_SHIFT),
-		(pgdat->node_spanned_pages >> 8));
-
 	(*nr_initialised)++;
-	if ((*nr_initialised > max_initialise) &&
+	if ((*nr_initialised > pgdat->static_init_size) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
 		pgdat->first_deferred_pfn = pfn;
 		return false;
@@ -3870,7 +3881,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto got_pg;
 
 	/* Avoid allocations with no watermarks from looping endlessly */
-	if (test_thread_flag(TIF_MEMDIE))
+	if (test_thread_flag(TIF_MEMDIE) &&
+	    (alloc_flags == ALLOC_NO_WATERMARKS ||
+	     (gfp_mask & __GFP_NOMEMALLOC)))
 		goto nopage;
 
 	/* Retry as long as the OOM killer is making progress */
@@ -6136,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	/* pg_data_t should be reset to zero when it's allocated */
 	WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
 
-	reset_deferred_meminit(pgdat);
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
 	pgdat->per_cpu_nodestats = NULL;
@@ -6158,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 		(unsigned long)pgdat->node_mem_map);
 #endif
 
+	reset_deferred_meminit(pgdat);
 	free_area_init_core(pgdat);
 }
 

diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d..2da71e6 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		/*
 		 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
 
-	if (bio->bi_error) {
+	if (bio->bi_status) {
 		SetPageError(page);
 		ClearPageUptodate(page);
 		pr_alert("Read-error on swap-device (%u:%u:%llu)\n",

diff --git a/mm/shmem.c b/mm/shmem.c
index e67d6ba..9100c49 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -75,6 +75,7 @@ static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/rmap.h>
+#include <linux/uuid.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -1902,10 +1903,10 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
  * entry unconditionally - even if something else had already woken the
  * target.
  */
-static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
-	list_del_init(&wait->task_list);
+	list_del_init(&wait->entry);
 	return ret;
 }
 
@@ -2840,7 +2841,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		spin_lock(&inode->i_lock);
 		inode->i_private = NULL;
 		wake_up_all(&shmem_falloc_waitq);
-		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.task_list));
+		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
 		spin_unlock(&inode->i_lock);
 		error = 0;
 		goto out;
@@ -3761,6 +3762,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	sb->s_flags |= MS_POSIXACL;
 #endif
+	uuid_gen(&sb->s_uuid);
 
 	inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
 	if (!inode)

diff --git a/mm/slub.c b/mm/slub.c
index 57e5156..8addc53 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 		char mbuf[64];
 		char *buf;
 		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+		ssize_t len;
 
 		if (!attr || !attr->store || !attr->show)
 			continue;
@@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
 			buf = buffer;
 		}
 
-		attr->show(root_cache, buf);
-		attr->store(s, buf, strlen(buf));
+		len = attr->show(root_cache, buf);
+		if (len > 0)
+			attr->store(s, buf, len);
 	}
 
 	if (buffer)
@@ -5623,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
 	return name;
 }
 
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+	struct kmem_cache *s =
+		container_of(work, struct kmem_cache, kobj_remove_work);
+
+	if (!s->kobj.state_in_sysfs)
+		/*
+		 * For a memcg cache, this may be called during
+		 * deactivation and again on shutdown.  Remove only once.
+		 * A cache is never shut down before deactivation is
+		 * complete, so no need to worry about synchronization.
+		 */
+		return;
+
+#ifdef CONFIG_MEMCG
+	kset_unregister(s->memcg_kset);
+#endif
+	kobject_uevent(&s->kobj, KOBJ_REMOVE);
+	kobject_del(&s->kobj);
+	kobject_put(&s->kobj);
+}
+
 static int sysfs_slab_add(struct kmem_cache *s)
 {
 	int err;
@@ -5630,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 	struct kset *kset = cache_kset(s);
 	int unmergeable = slab_unmergeable(s);
 
+	INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
 	if (!kset) {
 		kobject_init(&s->kobj, &slab_ktype);
 		return 0;
@@ -5693,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
 		 */
 		return;
 
-	if (!s->kobj.state_in_sysfs)
-		/*
-		 * For a memcg cache, this may be called during
-		 * deactivation and again on shutdown.  Remove only once.
-		 * A cache is never shut down before deactivation is
-		 * complete, so no need to worry about synchronization.
-		 */
-		return;
-
-#ifdef CONFIG_MEMCG
-	kset_unregister(s->memcg_kset);
-#endif
-	kobject_uevent(&s->kobj, KOBJ_REMOVE);
-	kobject_del(&s->kobj);
+	kobject_get(&s->kobj);
+	schedule_work(&s->kobj_remove_work);
 }
 
 void sysfs_slab_release(struct kmem_cache *s)

diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index ac6318a..3405b4e 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c

@@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type)
 		if (!page)
 			goto not_enough_page;
 		ctrl->map[idx] = page;
+
+		if (!(idx % SWAP_CLUSTER_MAX))
+			cond_resched();
 	}
 	return 0;
 not_enough_page:

diff --git a/mm/util.c b/mm/util.c
index 464df34..26be640 100644
--- a/mm/util.c
+++ b/mm/util.c

@@ -357,8 +357,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
 
 	/*
-	 * Make sure that larger requests are not too disruptive - no OOM
-	 * killer and no allocation failure warnings as we have a fallback
+	 * We want to attempt a large physically contiguous block first because
+	 * it is less likely to fragment multiple larger blocks and therefore
+	 * contribute to a long term fragmentation less than vmalloc fallback.
+	 * However make sure that larger requests are not too disruptive - no
+	 * OOM killer and no allocation failure warnings as we have a fallback.
 	 */
 	if (size > PAGE_SIZE) {
 		kmalloc_flags |= __GFP_NOWARN;

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 34a1c3e..ecc97f7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c

@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	if (p4d_none(*p4d))
 		return NULL;
 	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud))
+
+	/*
+	 * Don't dereference bad PUD or PMD (below) entries. This will also
+	 * identify huge mappings, which we may encounter on architectures
+	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
+	 * not [unambiguously] associated with a struct page, so there is
+	 * no correct value to return for them.
+	 */
+	WARN_ON_ONCE(pud_bad(*pud));
+	if (pud_none(*pud) || pud_bad(*pud))
 		return NULL;
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	WARN_ON_ONCE(pmd_bad(*pmd));
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
 		return NULL;
 
 	ptep = pte_offset_map(pmd, addr);

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 6063581..ce0618b 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c

@@ -115,9 +115,9 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
 	unsigned long pressure = 0;
 
 	/*
-	 * reclaimed can be greater than scanned in cases
-	 * like THP, where the scanned is 1 and reclaimed
-	 * could be 512
+	 * reclaimed can be greater than scanned for things such as reclaimed
+	 * slab pages. shrink_node() just adds reclaimed pages without a
+	 * related increment to scanned pages.
 	 */
 	if (reclaimed >= scanned)
 		goto out;

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8ad39bb..c3c1c6a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -3652,7 +3652,7 @@ int kswapd_run(int nid)
 	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
 	if (IS_ERR(pgdat->kswapd)) {
 		/* failure at boot is fatal */
-		BUG_ON(system_state == SYSTEM_BOOTING);
+		BUG_ON(system_state < SYSTEM_RUNNING);
 		pr_err("Failed to start kswapd on node %d\n", nid);
 		ret = PTR_ERR(pgdat->kswapd);
 		pgdat->kswapd = NULL;

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 467069b..9649579 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c

@@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	return 0;
 
 out_free_newdev:
-	free_netdev(new_dev);
+	if (new_dev->reg_state == NETREG_UNINITIALIZED)
+		free_netdev(new_dev);
 	return err;
 }
 

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 953b672..abc5f40 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c

@@ -813,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev)
 
 	free_percpu(vlan->vlan_pcpu_stats);
 	vlan->vlan_pcpu_stats = NULL;
-	free_netdev(dev);
 }
 
 void vlan_setup(struct net_device *dev)
@@ -826,7 +825,8 @@ void vlan_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	dev->netdev_ops		= &vlan_netdev_ops;
-	dev->destructor		= vlan_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= vlan_dev_free;
 	dev->ethtool_ops	= &vlan_ethtool_ops;
 
 	dev->min_mtu		= 0;

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 7bc2208..dca3cdd 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c

@@ -95,7 +95,7 @@ enum {
 
 struct p9_poll_wait {
 	struct p9_conn *conn;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	wait_queue_head_t *wait_addr;
 };
 
@@ -522,7 +522,7 @@ static void p9_write_work(struct work_struct *work)
 	clear_bit(Wworksched, &m->wsched);
 }
 
-static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
+static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
 {
 	struct p9_poll_wait *pwait =
 		container_of(wait, struct p9_poll_wait, wait);

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 013e970..000ca2f 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c

@@ -1064,8 +1064,9 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 
 		skb_new->protocol = eth_type_trans(skb_new, soft_iface);
 
-		soft_iface->stats.rx_packets++;
-		soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+		batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+		batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+				   skb->len + ETH_HLEN + hdr_size);
 
 		netif_rx(skb_new);
 		batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e1ebe14..ae9f4d3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c

@@ -987,7 +987,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 				batadv_dbg(BATADV_DBG_BLA, bat_priv,
 					   "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
 					   orig_addr_gw);
-				return NET_RX_DROP;
+				goto free_skb;
 			}
 		}
 

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b25789a..10f7edf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c

@@ -1034,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev)
 	 * netdev and its private data (bat_priv)
 	 */
 	rcu_barrier();
-
-	free_netdev(dev);
 }
 
 /**
@@ -1047,7 +1045,8 @@ static void batadv_softif_init_early(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &batadv_netdev_ops;
-	dev->destructor = batadv_softif_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = batadv_softif_free;
 	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
 	dev->priv_flags |= IFF_NO_QUEUE;
 

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 6089599..ab3b654 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c

@@ -598,7 +598,7 @@ static void netdev_setup(struct net_device *dev)
 
 	dev->netdev_ops		= &netdev_ops;
 	dev->header_ops		= &header_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static struct device_type bt_type = {

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251f..5c4808b 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c

@@ -484,7 +484,7 @@ static int bnep_session(void *arg)
 	struct net_device *dev = s->dev;
 	struct sock *sk = s->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("");
 

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b66..14f7c81 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c

@@ -280,7 +280,7 @@ static int cmtp_session(void *arg)
 	struct cmtp_session *session = arg;
 	struct sock *sk = session->sock->sk;
 	struct sk_buff *skb;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	BT_DBG("session %p", session);
 

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec458..fc31161 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c

@@ -1244,7 +1244,7 @@ static void hidp_session_run(struct hidp_session *session)
 static int hidp_session_thread(void *arg)
 {
 	struct hidp_session *session = arg;
-	wait_queue_t ctrl_wait, intr_wait;
+	wait_queue_entry_t ctrl_wait, intr_wait;
 
 	BT_DBG("session %p", session);
 

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 430b53e..f0f3447 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c

@@ -379,7 +379,7 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->needs_free_netdev = true;
 	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 574f788..32bd3ea 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c

@@ -595,7 +595,7 @@ static int br_afspec(struct net_bridge *br,
 		err = 0;
 		switch (nla_type(attr)) {
 		case IFLA_BRIDGE_VLAN_TUNNEL_INFO:
-			if (!(p->flags & BR_VLAN_TUNNEL))
+			if (!p || !(p->flags & BR_VLAN_TUNNEL))
 				return -EINVAL;
 			err = br_parse_vlan_tunnel_info(attr, &tinfo_curr);
 			if (err)

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 08341d2..6f12a52 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c

@@ -179,6 +179,8 @@ static void br_stp_start(struct net_bridge *br)
 		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
+		if (br->dev->flags & IFF_UP)
+			mod_timer(&br->hello_timer, jiffies + br->hello_time);
 		br_port_state_selection(br);
 	}
 

diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c98b3e5..60b6fe2 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c

@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
 
-		if (br->stp_enabled != BR_USER_STP)
+		if (br->stp_enabled == BR_KERNEL_STP)
 			mod_timer(&br->hello_timer,
 				  round_jiffies(jiffies + br->hello_time));
 	}

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 5929309..db85230 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c

@@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
 		return -EINVAL;
+	if (ebt_invalid_target(info->target))
+		return -EINVAL;
+
 	return 0;
 }
 

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9ec0c9f..9c6e619 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c

@@ -1373,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name,
 	strlcpy(name, _name, sizeof(name));
 	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
 	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
-	    xt_data_to_user(um + entrysize, data, usersize, datasize))
+	    xt_data_to_user(um + entrysize, data, usersize, datasize,
+			    XT_ALIGN(datasize)))
 		return -EFAULT;
 
 	return 0;
@@ -1658,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 		if (match->compat_to_user(cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, m->data, match->usersize, msize))
+		if (xt_data_to_user(cm->data, m->data, match->usersize, msize,
+				    COMPAT_XT_ALIGN(msize)))
 			return -EFAULT;
 	}
 
@@ -1687,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 		if (target->compat_to_user(cm->data, t->data))
 			return -EFAULT;
 	} else {
-		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize))
+		if (xt_data_to_user(cm->data, t->data, target->usersize, tsize,
+				    COMPAT_XT_ALIGN(tsize)))
 			return -EFAULT;
 	}
 

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index adcad344..21f18ea 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c

@@ -754,6 +754,10 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	lock_sock(sk);
 
+	err = -EINVAL;
+	if (addr_len < offsetofend(struct sockaddr, sa_family))
+		goto out;
+
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 59ce1fc..71b6ab2 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c

@@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
 {
 	struct sk_buff *skb;
 
-	if (likely(in_interrupt()))
-		skb = alloc_skb(len + pfx, GFP_ATOMIC);
-	else
-		skb = alloc_skb(len + pfx, GFP_KERNEL);
-
+	skb = alloc_skb(len + pfx, GFP_ATOMIC);
 	if (unlikely(skb == NULL))
 		return NULL;
 

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 1816fc9..fe3c53e 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c

@@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev)
 {
 	struct chnl_net *priv = netdev_priv(dev);
 	caif_free_client(&priv->chnl);
-	free_netdev(dev);
 }
 
 static void ipcaif_net_setup(struct net_device *dev)
 {
 	struct chnl_net *priv;
 	dev->netdev_ops = &netdev_ops;
-	dev->destructor = chnl_net_destructor;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = chnl_net_destructor;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
 	dev->mtu = GPRS_PDP_MTU;

diff --git a/net/can/af_can.c b/net/can/af_can.c
index b6406fe..88edac0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c

@@ -872,8 +872,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 static int can_pernet_init(struct net *net)
 {
-	net->can.can_rcvlists_lock =
-		__SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+	spin_lock_init(&net->can.can_rcvlists_lock);
 	net->can.can_rx_alldev_list =
 		kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
 

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2034fb92..8757fb8 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c

@@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	struct timespec validity;
 	void *tp, *tpend;
 	void **ptp;
-	struct ceph_crypto_key new_session_key;
+	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
 	unsigned long new_expires, new_renew_after;
 	u64 new_secret_id;
@@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	dout(" ticket blob is %d bytes\n", dlen);
 	ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad);
 	blob_struct_v = ceph_decode_8(ptp);
+	if (blob_struct_v != 1)
+		goto bad;
+
 	new_secret_id = ceph_decode_64(ptp);
 	ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend);
 	if (ret)
@@ -234,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	     type, ceph_entity_type_name(type), th->secret_id,
 	     (int)th->ticket_blob->vec.iov_len);
 	xi->have_keys |= th->service;
-
-out:
-	return ret;
+	return 0;
 
 bad:
 	ret = -EINVAL;
-	goto out;
+out:
+	ceph_crypto_key_destroy(&new_session_key);
+	return ret;
 }
 
 static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4fd0283..47e94b5 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = {
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
 		S_IRUGO);
 
-/*
- * find filename portion of a path (/foo/bar/baz -> baz)
- */
-const char *ceph_file_part(const char *s, int len)
-{
-	const char *e = s + len;
-
-	while (e != s && *(e-1) != '/')
-		e--;
-	return e;
-}
-EXPORT_SYMBOL(ceph_file_part);
-
 const char *ceph_msg_type_name(int type)
 {
 	switch (type) {

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5766a6c..588a919 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
  * Returns true if the result moves the cursor on to the next piece
  * of the data item.
  */
-static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
-				size_t bytes)
+static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				  size_t bytes)
 {
 	bool new_piece;
 
@@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = true;
 	}
 	cursor->need_crc = new_piece;
-
-	return new_piece;
 }
 
 static size_t sizeof_footer(struct ceph_connection *con)
@@ -1577,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con)
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
-		bool need_crc;
 		int ret;
 
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
@@ -1592,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 		}
 		if (do_datacrc && cursor->need_crc)
 			crc = ceph_crc32c_page(crc, page, page_offset, length);
-		need_crc = ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 
 	dout("%s %p msg %p done\n", __func__, con, msg);
@@ -2231,10 +2228,18 @@ static void process_ack(struct ceph_connection *con)
 	struct ceph_msg *m;
 	u64 ack = le64_to_cpu(con->in_temp_ack);
 	u64 seq;
+	bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ);
+	struct list_head *list = reconnect ? &con->out_queue : &con->out_sent;
 
-	while (!list_empty(&con->out_sent)) {
-		m = list_first_entry(&con->out_sent, struct ceph_msg,
-				     list_head);
+	/*
+	 * In the reconnect case, con_fault() has requeued messages
+	 * in out_sent. We should cleanup old messages according to
+	 * the reconnect seq.
+	 */
+	while (!list_empty(list)) {
+		m = list_first_entry(list, struct ceph_msg, list_head);
+		if (reconnect && m->needs_out_seq)
+			break;
 		seq = le64_to_cpu(m->hdr.seq);
 		if (seq > ack)
 			break;
@@ -2243,6 +2248,7 @@ static void process_ack(struct ceph_connection *con)
 		m->ack_stamp = jiffies;
 		ceph_msg_remove(m);
 	}
+
 	prepare_read_tag(con);
 }
 
@@ -2299,7 +2305,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 		if (do_datacrc)
 			crc = ceph_crc32c_page(crc, page, page_offset, ret);
-		(void) ceph_msg_data_advance(cursor, (size_t)ret);
+		ceph_msg_data_advance(cursor, (size_t)ret);
 	}
 	if (do_datacrc)
 		con->in_data_crc = crc;

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 29a0ef3..250f11f 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	int i, err = -EINVAL;
 	struct ceph_fsid fsid;
 	u32 epoch, num_mon;
-	u16 version;
 	u32 len;
 
 	ceph_decode_32_safe(&p, end, len, bad);
 	ceph_decode_need(&p, end, len, bad);
 
 	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
-
-	ceph_decode_16_safe(&p, end, version, bad);
+	p += sizeof(u16);  /* skip version */
 
 	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
 	ceph_decode_copy(&p, &fsid, sizeof(fsid));

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ffe9e90..55e3a47 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c

@@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		u32 yes;
 		struct crush_rule *r;
 
+		err = -EINVAL;
 		ceph_decode_32_safe(p, end, yes, bad);
 		if (!yes) {
 			dout("crush_decode NO rule %d off %x %p to %p\n",

diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2..3467882 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c

@@ -68,7 +68,7 @@ static inline int connection_based(struct sock *sk)
 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
-static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
+static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
 				  void *key)
 {
 	unsigned long bits = (unsigned long)key;

diff --git a/net/core/dev.c b/net/core/dev.c
index fca407b..416137c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1253,8 +1253,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 	if (!new_ifalias)
 		return -ENOMEM;
 	dev->ifalias = new_ifalias;
+	memcpy(dev->ifalias, alias, len);
+	dev->ifalias[len] = 0;
 
-	strlcpy(dev->ifalias, alias, len+1);
 	return len;
 }
 
@@ -4766,6 +4767,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+	skb_dst_drop(skb);
+	secpath_reset(skb);
+	kmem_cache_free(skbuff_head_cache, skb);
+}
+
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
@@ -4779,13 +4787,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 		break;
 
 	case GRO_MERGED_FREE:
-		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
-			skb_dst_drop(skb);
-			secpath_reset(skb);
-			kmem_cache_free(skbuff_head_cache, skb);
-		} else {
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
 			__kfree_skb(skb);
-		}
 		break;
 
 	case GRO_HELD:
@@ -4857,10 +4862,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
 		break;
 
 	case GRO_DROP:
-	case GRO_MERGED_FREE:
 		napi_reuse_skb(napi, skb);
 		break;
 
+	case GRO_MERGED_FREE:
+		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+			napi_skb_free_stolen_head(skb);
+		else
+			napi_reuse_skb(napi, skb);
+		break;
+
 	case GRO_MERGED:
 	case GRO_CONSUMED:
 		break;
@@ -4948,6 +4959,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__skb_gro_checksum_complete);
 
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+	while (remsd) {
+		struct softnet_data *next = remsd->rps_ipi_next;
+
+		if (cpu_online(remsd->cpu))
+			smp_call_function_single_async(remsd->cpu, &remsd->csd);
+		remsd = next;
+	}
+#endif
+}
+
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
@@ -4963,14 +4987,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remsd) {
-			struct softnet_data *next = remsd->rps_ipi_next;
-
-			if (cpu_online(remsd->cpu))
-				smp_call_function_single_async(remsd->cpu,
-							   &remsd->csd);
-			remsd = next;
-		}
+		net_rps_send_ipi(remsd);
 	} else
 #endif
 		local_irq_enable();
@@ -5199,8 +5216,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 	if (rc == BUSY_POLL_BUDGET)
 		__napi_schedule(napi);
 	local_bh_enable();
-	if (local_softirq_pending())
-		do_softirq();
 }
 
 void napi_busy_loop(unsigned int napi_id,
@@ -7501,6 +7516,8 @@ int register_netdevice(struct net_device *dev)
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7708,8 +7725,10 @@ void netdev_run_todo(void)
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
-		if (dev->destructor)
-			dev->destructor(dev);
+		if (dev->priv_destructor)
+			dev->priv_destructor(dev);
+		if (dev->needs_free_netdev)
+			free_netdev(dev);
 
 		/* Report a network device has been unregistered */
 		rtnl_lock();
@@ -7774,9 +7793,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
 	}
-	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
-	storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
-	storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+	storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+	storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+	storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
@@ -8192,7 +8211,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
 	struct sk_buff **list_skb;
 	struct sk_buff *skb;
 	unsigned int cpu;
-	struct softnet_data *sd, *oldsd;
+	struct softnet_data *sd, *oldsd, *remsd = NULL;
 
 	local_irq_disable();
 	cpu = smp_processor_id();
@@ -8233,6 +8252,13 @@ static int dev_cpu_dead(unsigned int oldcpu)
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
 
+#ifdef CONFIG_RPS
+	remsd = oldsd->rps_ipi_list;
+	oldsd->rps_ipi_list = NULL;
+#endif
+	/* send out pending IPI's on offline CPU */
+	net_rps_send_ipi(remsd);
+
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 		netif_rx_ni(skb);

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..27fad31 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c

@@ -410,6 +410,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (cmd == SIOCGIFNAME)
 		return dev_ifname(net, (struct ifreq __user *)arg);
 
+	/*
+	 * Take care of Wireless Extensions. Unfortunately struct iwreq
+	 * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+	 * so we need to treat it specially, otherwise applications may
+	 * fault if the struct they're passing happens to land at the
+	 * end of a mapped page.
+	 */
+	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+		struct iwreq iwr;
+
+		if (copy_from_user(&iwr, arg, sizeof(iwr)))
+			return -EFAULT;
+
+		return wext_handle_ioctl(net, &iwr, cmd, arg);
+	}
+
 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 		return -EFAULT;
 
@@ -559,9 +575,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 				ret = -EFAULT;
 			return ret;
 		}
-		/* Take care of Wireless Extensions */
-		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-			return wext_handle_ioctl(net, &ifr, cmd, arg);
 		return -ENOTTY;
 	}
 }

diff --git a/net/core/devlink.c b/net/core/devlink.c
index b0b87a2..a0adfc3 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c

@@ -1680,8 +1680,10 @@ static int devlink_dpipe_tables_fill(struct genl_info *info,
 
 	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
 			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;
@@ -2098,8 +2100,10 @@ static int devlink_dpipe_headers_fill(struct genl_info *info,
 
 	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
 			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (devlink_nl_put_handle(skb, devlink))
 		goto nla_put_failure;

diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503..13ba4a0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c

@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard_out);
 
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
 	/* This initializer is needed to force linker to place this variable
 	 * into const section. Otherwise it might end into bss section.
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	[RTAX_MAX] = 0xdeadbeef,
+	.refcnt = ATOMIC_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	if (dev)
 		dev_hold(dev);
 	dst->ops = ops;
-	dst_init_metrics(dst, dst_default_metrics, true);
+	dst_init_metrics(dst, dst_default_metrics.metrics, true);
 	dst->expires = 0UL;
 	dst->path = dst;
 	dst->from = NULL;
@@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release);
 
 u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
-	u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+	struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
 
 	if (p) {
-		u32 *old_p = __DST_METRICS_PTR(old);
+		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+		atomic_set(&p->refcnt, 1);
+		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
 		prev = cmpxchg(&dst->_metrics, old, new);
 
 		if (prev != old) {
 			kfree(p);
-			p = __DST_METRICS_PTR(prev);
+			p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
+		} else if (prev & DST_METRICS_REFCOUNTED) {
+			if (atomic_dec_and_test(&old_p->refcnt))
+				kfree(old_p);
 		}
 	}
-	return p;
+	BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+	return (u32 *)p;
 }
 EXPORT_SYMBOL(dst_cow_metrics_generic);
 
@@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 {
 	unsigned long prev, new;
 
-	new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+	new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
 	prev = cmpxchg(&dst->_metrics, old, new);
 	if (prev == old)
 		kfree(__DST_METRICS_PTR(old));
@@ -464,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
 		spin_lock_bh(&dst_garbage.lock);
 		dst = dst_garbage.list;
 		dst_garbage.list = NULL;
+		/* The code in dst_ifdown places a hold on the loopback device.
+		 * If the gc entry processing is set to expire after a lengthy
+		 * interval, this hold can cause netdev_wait_allrefs() to hang
+		 * out and wait for a long time -- until the the loopback
+		 * interface is released.  If we're really unlucky, it'll emit
+		 * pr_emerg messages to console too.  Reset the interval here,
+		 * so dst cleanups occur in a more timely fashion.
+		 */
+		if (dst_garbage.timer_inc > DST_GC_INC) {
+			dst_garbage.timer_inc = DST_GC_INC;
+			dst_garbage.timer_expires = DST_GC_MIN;
+			mod_delayed_work(system_wq, &dst_gc_work,
+					 dst_garbage.timer_expires);
+		}
 		spin_unlock_bh(&dst_garbage.lock);
 
 		if (last)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f21c4d3..3bba291 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c

@@ -568,7 +568,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
-	struct fib_rule *rule, *tmp;
+	struct fib_rule *rule, *r;
 	struct nlattr *tb[FRA_MAX+1];
 	struct fib_kuid_range range;
 	int err = -EINVAL;
@@ -668,16 +668,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		/*
 		 * Check if this rule is a target to any of them. If so,
+		 * adjust to the next one with the same preference or
 		 * disable them. As this operation is eventually very
-		 * expensive, it is only performed if goto rules have
-		 * actually been added.
+		 * expensive, it is only performed if goto rules, except
+		 * current if it is goto rule, have actually been added.
 		 */
 		if (ops->nr_goto_rules > 0) {
-			list_for_each_entry(tmp, &ops->rules_list, list) {
-				if (rtnl_dereference(tmp->ctarget) == rule) {
-					RCU_INIT_POINTER(tmp->ctarget, NULL);
+			struct fib_rule *n;
+
+			n = list_next_entry(rule, list);
+			if (&n->list == &ops->rules_list || n->pref != rule->pref)
+				n = NULL;
+			list_for_each_entry(r, &ops->rules_list, list) {
+				if (rtnl_dereference(r->ctarget) != rule)
+					continue;
+				rcu_assign_pointer(r->ctarget, n);
+				if (!n)
 					ops->unresolved_rules++;
-				}
 			}
 		}
 

diff --git a/net/core/filter.c b/net/core/filter.c
index a253a61..a6bb95f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -2281,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_change_head ||
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
+	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd..26bbfab 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c

@@ -315,6 +315,25 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	goto out;
 }
 
+static int __net_init net_defaults_init_net(struct net *net)
+{
+	net->core.sysctl_somaxconn = SOMAXCONN;
+	return 0;
+}
+
+static struct pernet_operations net_defaults_ops = {
+	.init = net_defaults_init_net,
+};
+
+static __init int net_defaults_init(void)
+{
+	if (register_pernet_subsys(&net_defaults_ops))
+		panic("Cannot initialize net default settings");
+
+	return 0;
+}
+
+core_initcall(net_defaults_init);
 
 #ifdef CONFIG_NET_NS
 static struct ucounts *inc_net_namespaces(struct user_namespace *ns)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49a279a..467a2f4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -931,6 +931,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
 	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
+	       + nla_total_size(4) /* IFLA_GROUP */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1124,6 +1125,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	struct ifla_vf_mac vf_mac;
 	struct ifla_vf_info ivi;
 
+	memset(&ivi, 0, sizeof(ivi));
+
 	/* Not all SR-IOV capable drivers support the
 	 * spoofcheck and "RSS query enable" query.  Preset to
 	 * -1 so the user space tool can detect that the driver
@@ -1132,7 +1135,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	ivi.spoofchk = -1;
 	ivi.rss_query_en = -1;
 	ivi.trusted = -1;
-	memset(ivi.mac, 0, sizeof(ivi.mac));
 	/* The default value for VF link state is "auto"
 	 * IFLA_VF_LINK_STATE_AUTO which equals zero
 	 */
@@ -1467,6 +1469,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_PROTO_DOWN]	= { .type = NLA_U8 },
 	[IFLA_XDP]		= { .type = NLA_NESTED },
+	[IFLA_GROUP]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -3231,8 +3234,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			IFLA_MAX, ifla_policy, NULL) == 0) {
+	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+			  IFLA_MAX, ifla_policy, NULL);
+	if (err < 0) {
+		return -EINVAL;
+	} else if (err == 0) {
 		if (tb[IFLA_MASTER])
 			br_idx = nla_get_u32(tb[IFLA_MASTER]);
 	}

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e8..b1be7c0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -3754,8 +3754,11 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 
 	spin_lock_irqsave(&q->lock, flags);
 	skb = __skb_dequeue(q);
-	if (skb && (skb_next = skb_peek(q)))
+	if (skb && (skb_next = skb_peek(q))) {
 		icmp_next = is_icmp_err_skb(skb_next);
+		if (icmp_next)
+			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+	}
 	spin_unlock_irqrestore(&q->lock, flags);
 
 	if (is_icmp_err_skb(skb) && !icmp_next)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ea23254..b7cd9aa 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c

@@ -479,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
 {
 	struct ctl_table *tbl;
 
-	net->core.sysctl_somaxconn = SOMAXCONN;
-
 	tbl = netns_core_table;
 	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 4b9518a..6f95612 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c

@@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt)
 	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
-static inline void dnrt_drop(struct dn_route *rt)
-{
-	dst_release(&rt->dst);
-	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
-}
-
 static void dn_dst_check_expire(unsigned long dummy)
 {
 	int i;
@@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops)
 			}
 			*rtp = rt->dst.dn_next;
 			rt->dst.dn_next = NULL;
-			dnrt_drop(rt);
+			dnrt_free(rt);
 			break;
 		}
 		spin_unlock_bh(&dn_rt_hash_table[i].lock);
@@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
 			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
-			dnrt_drop(rt);
+			dst_free(&rt->dst);
 			*rp = rth;
 			return 0;
 		}
@@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy)
 		for(; rt; rt = next) {
 			next = rcu_dereference_raw(rt->dst.dn_next);
 			RCU_INIT_POINTER(rt->dst.dn_next, NULL);
-			dst_free((struct dst_entry *)rt);
+			dnrt_free(rt);
 		}
 
 nothing_to_declare:
@@ -1187,7 +1181,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
+	rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
 	if (rt == NULL)
 		goto e_nobufs;
 

diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 1ed81ac..aa8ffec 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c

@@ -102,7 +102,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
-	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+	if (skb->len < sizeof(*nlh) ||
+	    nlh->nlmsg_len < sizeof(*nlh) ||
+	    skb->len < nlh->nlmsg_len)
 		return;
 
 	if (!netlink_capable(skb, CAP_NET_ADMIN))

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 26130ae..90038d4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c

@@ -223,6 +223,53 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds)
+{
+	int i, ret = 0;
+
+	/* Suspend slave network devices */
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_port_initialized(ds, i))
+			continue;
+
+		ret = dsa_slave_suspend(ds->ports[i].netdev);
+		if (ret)
+			return ret;
+	}
+
+	if (ds->ops->suspend)
+		ret = ds->ops->suspend(ds);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
+
+int dsa_switch_resume(struct dsa_switch *ds)
+{
+	int i, ret = 0;
+
+	if (ds->ops->resume)
+		ret = ds->ops->resume(ds);
+
+	if (ret)
+		return ret;
+
+	/* Resume slave network devices */
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_port_initialized(ds, i))
+			continue;
+
+		ret = dsa_slave_resume(ds->ports[i].netdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
+
 static struct packet_type dsa_pack_type __read_mostly = {
 	.type	= cpu_to_be16(ETH_P_XDSA),
 	.func	= dsa_switch_rcv,

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 033b3bf..7796580 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c

@@ -484,8 +484,10 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
 		dsa_ds_unapply(dst, ds);
 	}
 
-	if (dst->cpu_switch)
+	if (dst->cpu_switch) {
 		dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+		dst->cpu_switch = NULL;
+	}
 
 	pr_info("DSA: tree %d unapplied\n", dst->tree);
 	dst->applied = false;

diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index ad345c8..7281098 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c

@@ -289,53 +289,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 	dsa_switch_unregister_notifier(ds);
 }
 
-#ifdef CONFIG_PM_SLEEP
-int dsa_switch_suspend(struct dsa_switch *ds)
-{
-	int i, ret = 0;
-
-	/* Suspend slave network devices */
-	for (i = 0; i < ds->num_ports; i++) {
-		if (!dsa_is_port_initialized(ds, i))
-			continue;
-
-		ret = dsa_slave_suspend(ds->ports[i].netdev);
-		if (ret)
-			return ret;
-	}
-
-	if (ds->ops->suspend)
-		ret = ds->ops->suspend(ds);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_suspend);
-
-int dsa_switch_resume(struct dsa_switch *ds)
-{
-	int i, ret = 0;
-
-	if (ds->ops->resume)
-		ret = ds->ops->resume(ds);
-
-	if (ret)
-		return ret;
-
-	/* Resume slave network devices */
-	for (i = 0; i < ds->num_ports; i++) {
-		if (!dsa_is_port_initialized(ds, i))
-			continue;
-
-		ret = dsa_slave_resume(ds->ports[i].netdev);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_switch_resume);
-#endif
-
 /* platform driver init and cleanup *****************************************/
 static int dev_is_class(struct device *dev, void *class)
 {

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c73160f..0a0a392 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c

@@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
 	del_timer_sync(&hsr->announce_timer);
 
 	synchronize_rcu();
-	free_netdev(hsr_dev);
 }
 
 static const struct net_device_ops hsr_device_ops = {
@@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev)
 	SET_NETDEV_DEVTYPE(dev, &hsr_type);
 	dev->priv_flags |= IFF_NO_QUEUE;
 
-	dev->destructor = hsr_dev_destroy;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = hsr_dev_destroy;
 
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
 			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |

diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 4ebe2aa..04b5450 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c

@@ -324,8 +324,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame,
 	unsigned long irqflags;
 
 	frame->is_supervision = is_supervision_frame(port->hsr, skb);
-	frame->node_src = hsr_get_node(&port->hsr->node_db, skb,
-				       frame->is_supervision);
+	frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
 	if (frame->node_src == NULL)
 		return -1; /* Unknown node and !is_supervision, or no mem */
 

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 7ea9258..284a9b8 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c

@@ -158,9 +158,10 @@ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 
 /* Get the hsr_node from which 'skb' was sent.
  */
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup)
 {
+	struct list_head *node_db = &port->hsr->node_db;
 	struct hsr_node *node;
 	struct ethhdr *ethhdr;
 	u16 seq_out;
@@ -186,7 +187,11 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
 		 */
 		seq_out = hsr_get_skb_sequence_nr(skb) - 1;
 	} else {
-		WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
+		/* this is called also for frames from master port and
+		 * so warn only for non master ports
+		 */
+		if (port->type != HSR_PT_MASTER)
+			WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
 		seq_out = HSR_SEQNR_START;
 	}
 

diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 438b40f..4e04f0e 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h

@@ -18,7 +18,7 @@ struct hsr_node;
 
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 			      u16 seq_out);
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup);
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
 			  struct hsr_port *port);

diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index d7efbf0..0a866f33 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c

@@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev)
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;
-	ldev->destructor	= free_netdev;
+	ldev->needs_free_netdev	= true;
 	ldev->features		|= NETIF_F_NETNS_LOCAL;
 }
 

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f3dad16..58925b6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c

@@ -1043,7 +1043,7 @@ static struct inet_protosw inetsw_array[] =
 		.type =       SOCK_DGRAM,
 		.protocol =   IPPROTO_ICMP,
 		.prot =       &ping_prot,
-		.ops =        &inet_dgram_ops,
+		.ops =        &inet_sockraw_ops,
 		.flags =      INET_PROTOSW_REUSE,
        },
 

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d54345a..e9f3386 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c

@@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(arp_xmit);
 
+static bool arp_is_garp(struct net *net, struct net_device *dev,
+			int *addr_type, __be16 ar_op,
+			__be32 sip, __be32 tip,
+			unsigned char *sha, unsigned char *tha)
+{
+	bool is_garp = tip == sip;
+
+	/* Gratuitous ARP _replies_ also require target hwaddr to be
+	 * the same as source.
+	 */
+	if (is_garp && ar_op == htons(ARPOP_REPLY))
+		is_garp =
+			/* IPv4 over IEEE 1394 doesn't provide target
+			 * hardware address field in its ARP payload.
+			 */
+			tha &&
+			!memcmp(tha, sha, dev->addr_len);
+
+	if (is_garp) {
+		*addr_type = inet_addr_type_dev_table(net, dev, sip);
+		if (*addr_type != RTN_UNICAST)
+			is_garp = false;
+	}
+	return is_garp;
+}
+
 /*
  *	Process an arp request.
  */
@@ -837,29 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IN_DEV_ARP_ACCEPT(in_dev)) {
-		unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+	addr_type = -1;
+	if (n || IN_DEV_ARP_ACCEPT(in_dev)) {
+		is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op,
+				      sip, tip, sha, tha);
+	}
 
+	if (IN_DEV_ARP_ACCEPT(in_dev)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
 		 */
-		is_garp = tip == sip && addr_type == RTN_UNICAST;
-
-		/* Unsolicited ARP _replies_ also require target hwaddr to be
-		 * the same as source.
-		 */
-		if (is_garp && arp->ar_op == htons(ARPOP_REPLY))
-			is_garp =
-				/* IPv4 over IEEE 1394 doesn't provide target
-				 * hardware address field in its ARP payload.
-				 */
-				tha &&
-				!memcmp(tha, sha, dev->addr_len);
-
 		if (!n &&
-		    ((arp->ar_op == htons(ARPOP_REPLY)  &&
-				addr_type == RTN_UNICAST) || is_garp))
+		    (is_garp ||
+		     (arp->ar_op == htons(ARPOP_REPLY) &&
+		      (addr_type == RTN_UNICAST ||
+		       (addr_type < 0 &&
+			/* postpone calculation to as late as possible */
+			inet_addr_type_dev_table(net, dev, sip) ==
+				RTN_UNICAST)))))
 			n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
 	}
 

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 65cc02b..93322f8 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c

@@ -248,6 +248,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	u8 *tail;
 	u8 *vaddr;
 	int nfrags;
+	int esph_offset;
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
@@ -313,11 +314,13 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	}
 
 cow:
+	esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
 	nfrags = skb_cow_data(skb, tailen, &trailer);
 	if (nfrags < 0)
 		goto out;
 	tail = skb_tail_pointer(trailer);
-	esp->esph = ip_esp_hdr(skb);
+	esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
 
 skip_cow:
 	esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449dd..ad9ad4a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c

@@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 static void free_fib_info_rcu(struct rcu_head *head)
 {
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
+	struct dst_metrics *m;
 
 	change_nexthops(fi) {
 		if (nexthop_nh->nh_dev)
@@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
 		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
 	} endfor_nexthops(fi);
 
-	if (fi->fib_metrics != (u32 *) dst_default_metrics)
-		kfree(fi->fib_metrics);
+	m = fi->fib_metrics;
+	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+		kfree(m);
 	kfree(fi);
 }
 
@@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
-		fi->fib_metrics[type - 1] = val;
+		fi->fib_metrics->metrics[type - 1] = val;
 	}
 
 	if (ecn_ca)
-		fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+		fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 	return 0;
 }
@@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		goto failure;
 	fib_info_cnt++;
 	if (cfg->fc_mx) {
-		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
 		if (!fi->fib_metrics)
 			goto failure;
+		atomic_set(&fi->fib_metrics->refcnt, 1);
 	} else
-		fi->fib_metrics = (u32 *) dst_default_metrics;
+		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
@@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (fi->fib_priority &&
 	    nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
 		goto nla_put_failure;
-	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+	if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
 		goto nla_put_failure;
 
 	if (fi->fib_prefsrc &&

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 43318b5..9144fa7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c

@@ -657,8 +657,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
 	local_bh_disable();
 
-	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
-	if (!icmpv4_global_allow(net, type, code))
+	/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
+	 * incoming dev is loopback.  If outgoing dev change to not be
+	 * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
+	 */
+	if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
+	      !icmpv4_global_allow(net, type, code))
 		goto out_bh_enable;
 
 	sk = icmp_xmit_lock(net);

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44fd86d..ec9a396 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
+	spin_lock_init(&pmc->lock);
 	spin_lock_bh(&im->lock);
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 static void ip_mc_clear_src(struct ip_mc_list *pmc)
 {
-	struct ip_sf_list *psf, *nextpsf;
+	struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
 
-	for (psf = pmc->tomb; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
-	}
+	spin_lock_bh(&pmc->lock);
+	tomb = pmc->tomb;
 	pmc->tomb = NULL;
-	for (psf = pmc->sources; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
-	}
+	sources = pmc->sources;
 	pmc->sources = NULL;
 	pmc->sfmode = MCAST_EXCLUDE;
 	pmc->sfcount[MCAST_INCLUDE] = 0;
 	pmc->sfcount[MCAST_EXCLUDE] = 1;
+	spin_unlock_bh(&pmc->lock);
+
+	for (psf = tomb; psf; psf = nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
+	for (psf = sources; psf; psf = nextpsf) {
+		nextpsf = psf->sf_next;
+		kfree(psf);
+	}
 }
 
 /* Join a multicast group

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7a3fd25..532b36e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c

@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index b878ecb..129d1a3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c

@@ -446,6 +446,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 	return 0;
 
 drop:
+	if (tun_dst)
+		dst_release((struct dst_entry *)tun_dst);
 	kfree_skb(skb);
 	return 0;
 }
@@ -967,7 +969,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
 	gro_cells_destroy(&tunnel->gro_cells);
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1155,7 +1156,8 @@ int ip_tunnel_init(struct net_device *dev)
 	struct iphdr *iph = &tunnel->parms.iph;
 	int err;
 
-	dev->destructor	= ip_tunnel_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip_tunnel_dev_free;
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 551de4d..8ae425c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-			  struct sk_buff *skb, struct mfc_cache *cache,
-			  int local);
+			  struct net_device *dev, struct sk_buff *skb,
+			  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
@@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 
 			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else {
-			ip_mr_forward(net, mrt, skb, c, 0);
+			ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
 		}
 	}
 }
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 
 /* Queue a packet for resolution. It gets locked cache entry! */
 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
-				 struct sk_buff *skb)
+				 struct sk_buff *skb, struct net_device *dev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct mfc_cache *c;
@@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
+		if (dev) {
+			skb->dev = dev;
+			skb->skb_iif = dev->ifindex;
+		}
 		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
@@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-			  struct sk_buff *skb, struct mfc_cache *cache,
-			  int local)
+			  struct net_device *dev, struct sk_buff *skb,
+			  struct mfc_cache *cache, int local)
 {
-	int true_vifi = ipmr_find_vif(mrt, skb->dev);
+	int true_vifi = ipmr_find_vif(mrt, dev);
 	int psend = -1;
 	int vif, ct;
 
@@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 	}
 
 	/* Wrong interface: drop packet and (maybe) send PIM assert. */
-	if (mrt->vif_table[vif].dev != skb->dev) {
-		struct net_device *mdev;
-
-		mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-		if (mdev == skb->dev)
-			goto forward;
-
+	if (mrt->vif_table[vif].dev != dev) {
 		if (rt_is_output_route(skb_rtable(skb))) {
 			/* It is our own packet, looped back.
 			 * Very complicated situation...
@@ -2053,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb)
 		read_lock(&mrt_lock);
 		vif = ipmr_find_vif(mrt, dev);
 		if (vif >= 0) {
-			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+			int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
 			read_unlock(&mrt_lock);
 
 			return err2;
@@ -2064,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	ip_mr_forward(net, mrt, skb, cache, local);
+	ip_mr_forward(net, mrt, dev, skb, cache, local);
 	read_unlock(&mrt_lock);
 
 	if (local)
@@ -2238,7 +2236,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		iph->saddr = saddr;
 		iph->daddr = daddr;
 		iph->version = 0;
-		err = ipmr_cache_unresolved(mrt, vif, skb2);
+		err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
 		read_unlock(&mrt_lock);
 		rcu_read_unlock();
 		return err;

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9ee..6883b3d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt)
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
 {
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
+	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+		kfree(p);
+
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 			rt->rt_uses_gateway = 1;
 		}
-		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+		if (fi->fib_metrics != &dst_default_metrics) {
+			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+			atomic_inc(&fi->fib_metrics->refcnt);
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e4c76d..40aca78 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
 	if (tp->fastopen_req)
 		return -EALREADY; /* Another Fast Open is in progress */
@@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 		}
 	}
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+	err = __inet_stream_connect(sk->sk_socket, uaddr,
 				    msg->msg_namelen, flags, 1);
 	/* fastopen_req could already be freed in __inet_stream_connect
 	 * if the connection times out or gets rst
@@ -2320,9 +2323,15 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
+	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+	 * issue in __tcp_select_window()
+	 */
+	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
+	dst_release(sk->sk_rx_dst);
+	sk->sk_rx_dst = NULL;
 	tcp_saved_syn_free(tp);
 
 	/* Clean up fastopen related fields */
@@ -2374,9 +2383,10 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
 	return 0;
 }
 
-static int tcp_repair_options_est(struct tcp_sock *tp,
+static int tcp_repair_options_est(struct sock *sk,
 		struct tcp_repair_opt __user *optbuf, unsigned int len)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_repair_opt opt;
 
 	while (len >= sizeof(opt)) {
@@ -2389,6 +2399,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
 		switch (opt.opt_code) {
 		case TCPOPT_MSS:
 			tp->rx_opt.mss_clamp = opt.opt_val;
+			tcp_mtup_init(sk);
 			break;
 		case TCPOPT_WINDOW:
 			{
@@ -2548,7 +2559,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		if (!tp->repair)
 			err = -EINVAL;
 		else if (sk->sk_state == TCP_ESTABLISHED)
-			err = tcp_repair_options_est(tp,
+			err = tcp_repair_options_est(sk,
 					(struct tcp_repair_opt __user *)optval,
 					optlen);
 		else

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6e3c512..324c9bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c

@@ -180,6 +180,7 @@ void tcp_init_congestion_control(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
+	tcp_sk(sk)->prior_ssthresh = 0;
 	if (icsk->icsk_ca_ops->init)
 		icsk->icsk_ca_ops->init(sk);
 	if (tcp_ca_needs_ecn(sk))

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a4fb1e..1d2dbac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -332,9 +332,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
 				   unsigned long delay)
 {
-	if (!delayed_work_pending(&ifp->dad_work))
-		in6_ifa_hold(ifp);
-	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
+	in6_ifa_hold(ifp);
+	if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
+		in6_ifa_put(ifp);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -3369,6 +3369,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info;
 	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct net *net = dev_net(dev);
 	int run_pending = 0;
 	int err;
 
@@ -3384,7 +3385,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	case NETDEV_CHANGEMTU:
 		/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
 		if (dev->mtu < IPV6_MIN_MTU) {
-			addrconf_ifdown(dev, 1);
+			addrconf_ifdown(dev, dev != net->loopback_dev);
 			break;
 		}
 
@@ -3500,7 +3501,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, 1);
+				addrconf_ifdown(dev, dev != net->loopback_dev);
 		}
 		break;
 

diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 37ac9de..8d772fe 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c

@@ -1319,7 +1319,7 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 	struct ipv6hdr *ip6_hdr;
 	struct ipv6_opt_hdr *hop;
 	unsigned char buf[CALIPSO_MAX_BUFFER];
-	int len_delta, new_end, pad;
+	int len_delta, new_end, pad, payload;
 	unsigned int start, end;
 
 	ip6_hdr = ipv6_hdr(skb);
@@ -1346,6 +1346,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 	if (ret_val < 0)
 		return ret_val;
 
+	ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */
+
 	if (len_delta) {
 		if (len_delta > 0)
 			skb_push(skb, len_delta);
@@ -1355,6 +1357,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
 			sizeof(*ip6_hdr) + start);
 		skb_reset_network_header(skb);
 		ip6_hdr = ipv6_hdr(skb);
+		payload = ntohs(ip6_hdr->payload_len);
+		ip6_hdr->payload_len = htons(payload + len_delta);
 	}
 
 	hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e011122..5c786f5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c

@@ -250,8 +250,14 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 	 */
 
 	err = ip6_datagram_dst_update(sk, true);
-	if (err)
+	if (err) {
+		/* Reset daddr and dport so that udp_v6_early_demux()
+		 * fails to find this socket
+		 */
+		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+		inet->inet_dport = 0;
 		goto out;
+	}
 
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);

diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d950d43..f02f131 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c

@@ -30,6 +30,25 @@
 #include <net/ipv6.h>
 #include <linux/icmpv6.h>
 
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+	int off = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr;
+
+	if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+		return offsetof(struct ipv6hdr, nexthdr);
+
+	while (off < nhlen) {
+		exthdr = (void *)ipv6_hdr + off;
+		if (exthdr->nexthdr == NEXTHDR_ESP)
+			return off;
+
+		off += ipv6_optlen(exthdr);
+	}
+
+	return 0;
+}
+
 static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 	struct xfrm_state *x;
 	__be32 seq;
 	__be32 spi;
+	int nhoff;
 	int err;
 
 	skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 
 	xo->flags |= XFRM_GRO;
 
+	nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+	if (!nhoff)
+		goto out;
+
+	IP6CB(skb)->nhoff = nhoff;
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index eea23b5..ec849d8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c

@@ -32,7 +32,6 @@ struct fib6_rule {
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags, pol_lookup_t lookup)
 {
-	struct rt6_info *rt;
 	struct fib_lookup_arg arg = {
 		.lookup_ptr = lookup,
 		.flags = FIB_LOOKUP_NOREF,
@@ -44,21 +43,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	fib_rules_lookup(net->ipv6.fib6_rules_ops,
 			 flowi6_to_flowi(fl6), flags, &arg);
 
-	rt = arg.result;
+	if (arg.result)
+		return arg.result;
 
-	if (!rt) {
-		dst_hold(&net->ipv6.ip6_null_entry->dst);
-		return &net->ipv6.ip6_null_entry->dst;
-	}
-
-	if (rt->rt6i_flags & RTF_REJECT &&
-	    rt->dst.error == -EAGAIN) {
-		ip6_rt_put(rt);
-		rt = net->ipv6.ip6_null_entry;
-		dst_hold(&rt->dst);
-	}
-
-	return &rt->dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -121,7 +110,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			flp6->saddr = saddr;
 		}
 		err = rt->dst.error;
-		goto out;
+		if (err != -EAGAIN)
+			goto out;
 	}
 again:
 	ip6_rt_put(rt);

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 230b5aa..8d7b113 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c

@@ -491,7 +491,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	local_bh_disable();
 
 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
-	if (!icmpv6_global_allow(type))
+	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
 		goto out_bh_enable;
 
 	mip6_addr_swap(skb);

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 2fd5ca1..77f7f8c 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c

@@ -62,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
 {
 	u32 *v = (u32 *)loc.v32;
 
+	__ila_hash_secret_init();
 	return jhash_2words(v[0], v[1], hashrnd);
 }
 

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c6..e6b78ba 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c

@@ -289,8 +289,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	struct rt6_info *rt;
 
 	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
-	if (rt->rt6i_flags & RTF_REJECT &&
-	    rt->dst.error == -EAGAIN) {
+	if (rt->dst.error == -EAGAIN) {
 		ip6_rt_put(rt);
 		rt = net->ipv6.ip6_null_entry;
 		dst_hold(&rt->dst);

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8d128ba..64eea396 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c

@@ -537,11 +537,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
+		dsfield = ipv4_get_dsfield(iph);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 	else
@@ -598,9 +597,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 
-	dsfield = ipv6_get_dsfield(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+		dsfield = ipv6_get_dsfield(ipv6h);
+	else
+		dsfield = ip6_tclass(t->parms.flowinfo);
+
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 		fl6.flowlabel |= ip6_flowlabel(ipv6h);
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -990,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ip6gre_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6gre_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->type = ARPHRD_IP6GRE;
 
@@ -1147,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net)
 	return 0;
 
 err_reg_dev:
-	ip6gre_dev_free(ign->fb_tunnel_dev);
+	free_netdev(ign->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
 }
@@ -1299,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &ip6gre_tap_netdev_ops;
-	dev->destructor = ip6gre_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6gre_dev_free;
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 280268f..cdb3728 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c

@@ -116,8 +116,10 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 
 		if (udpfrag) {
 			int err = ip6_find_1stfragopt(skb, &prevhdr);
-			if (err < 0)
+			if (err < 0) {
+				kfree_skb_list(segs);
 				return ERR_PTR(err);
+			}
 			fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
 			fptr->frag_off = htons(offset);
 			if (skb->next)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d4a31be..1699acb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c

@@ -1390,7 +1390,7 @@ static int __ip6_append_data(struct sock *sk,
 	 */
 
 	cork->length += length;
-	if ((((length + fragheaderlen) > mtu) ||
+	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
@@ -1466,6 +1466,11 @@ static int __ip6_append_data(struct sock *sk,
 			 */
 			alloclen += sizeof(struct frag_hdr);
 
+			copy = datalen - transhdrlen - fraggap;
+			if (copy < 0) {
+				err = -EINVAL;
+				goto error;
+			}
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
 						alloclen + hh_len,
@@ -1515,13 +1520,9 @@ static int __ip6_append_data(struct sock *sk,
 				data += fraggap;
 				pskb_trim_unique(skb_prev, maxfraglen);
 			}
-			copy = datalen - transhdrlen - fraggap;
-
-			if (copy < 0) {
-				err = -EINVAL;
-				kfree_skb(skb);
-				goto error;
-			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+			if (copy > 0 &&
+			    getfrag(from, data + transhdrlen, offset,
+				    copy, fraggap, skb) < 0) {
 				err = -EFAULT;
 				kfree_skb(skb);
 				goto error;

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6eb2ae5..8c6c3c8 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev)
 	gro_cells_destroy(&t->gro_cells);
 	dst_cache_destroy(&t->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int ip6_tnl_create2(struct net_device *dev)
@@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	return t;
 
 failed_free:
-	ip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return ERR_PTR(err);
 }
@@ -859,6 +858,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 	return 0;
 
 drop:
+	if (tun_dst)
+		dst_release((struct dst_entry *)tun_dst);
 	kfree_skb(skb);
 	return 0;
 }
@@ -1095,6 +1096,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
 	if (!dst) {
 route_lookup:
+		/* add dsfield to flowlabel for route lookup */
+		fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
+
 		dst = ip6_route_output(net, NULL, fl6);
 
 		if (dst->error)
@@ -1196,7 +1200,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+	ip6_flow_hdr(ipv6h, dsfield,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
 	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
@@ -1231,8 +1235,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	dsfield = ipv4_get_dsfield(iph);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1246,6 +1248,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield =  key->tos;
 	} else {
 		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 			encap_limit = t->parms.encap_limit;
@@ -1254,8 +1257,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPIP;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					 & IPV6_TCLASS_MASK;
+			dsfield = ipv4_get_dsfield(iph);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 			fl6.flowi6_mark = skb->mark;
 		else
@@ -1267,6 +1271,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1300,8 +1306,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-
 	if (t->parms.collect_md) {
 		struct ip_tunnel_info *tun_info;
 		const struct ip_tunnel_key *key;
@@ -1315,6 +1319,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
+		dsfield = key->tos;
 	} else {
 		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
@@ -1337,7 +1342,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		fl6.flowi6_proto = IPPROTO_IPV6;
 
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+			dsfield = ipv6_get_dsfield(ipv6h);
+		else
+			dsfield = ip6_tclass(t->parms.flowinfo);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 			fl6.flowlabel |= ip6_flowlabel(ipv6h);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -1351,6 +1358,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
 	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1769,7 +1778,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 static void ip6_tnl_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &ip6_tnl_netdev_ops;
-	dev->destructor = ip6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ip6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->flags |= IFF_NOARP;
@@ -2216,7 +2226,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	return 0;
 
 err_register:
-	ip6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d67ef56..837ea1e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c

@@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 static void vti6_dev_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static int vti6_tnl_create2(struct net_device *dev)
@@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
 	return t;
 
 failed_free:
-	vti6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = {
 static void vti6_dev_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &vti6_netdev_ops;
-	dev->destructor = vti6_dev_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = vti6_dev_free;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
@@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net)
 	return 0;
 
 err_register:
-	vti6_dev_free(ip6n->fb_tnl_dev);
+	free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
 	return err;
 }

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 374997d..2ecb39b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c

@@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
 	dev->flags		= IFF_NOARP;
 	dev->netdev_ops		= &reg_vif_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 9b522fa..ac826dd 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c

@@ -192,7 +192,7 @@ static struct inet_protosw pingv6_protosw = {
 	.type =      SOCK_DGRAM,
 	.protocol =  IPPROTO_ICMPV6,
 	.prot =      &pingv6_prot,
-	.ops =       &inet6_dgram_ops,
+	.ops =       &inet6_sockraw_ops,
 	.flags =     INET_PROTOSW_REUSE,
 };
 

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index cc8e3ae..e88bcb8 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c

@@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
 	u64 buff64[SNMP_MIB_MAX];
 	int i;
 
-	memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+	memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
 
 	snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
 	for (i = 0; itemlist[i].name; i++)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f992d9..60be012 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c

@@ -1338,7 +1338,7 @@ void raw6_proc_exit(void)
 #endif	/* CONFIG_PROC_FS */
 
 /* Same as inet6_dgram_ops, sans udp_poll.  */
-static const struct proto_ops inet6_sockraw_ops = {
+const struct proto_ops inet6_sockraw_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
 	.release	   = inet6_release,

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc61b0b..322bd62 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
 	if ((rt->dst.dev == dev || !dev) &&
 	    rt != adn->net->ipv6.ip6_null_entry &&
 	    (rt->rt6i_nsiblings == 0 ||
+	     (dev && netdev_unregistering(dev)) ||
 	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
 		return -1;
 
@@ -3721,7 +3722,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
-	 } else if (event == NETDEV_UNREGISTER) {
+	 } else if (event == NETDEV_UNREGISTER &&
+		    dev->reg_state != NETREG_UNREGISTERED) {
+		/* NETDEV_UNREGISTER could be fired for multiple times by
+		 * netdev_wait_allrefs(). Make sure we only call this once.
+		 */
 		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 61e5902..f8ad158 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	return nt;
 
 failed_free:
-	ipip6_dev_free(dev);
+	free_netdev(dev);
 failed:
 	return NULL;
 }
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 	 * we try harder to allocate.
 	 */
 	kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-		kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+		kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
 		NULL;
 
 	rcu_read_lock();
@@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev)
 
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 #define SIT_FEATURES (NETIF_F_SG	   | \
@@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
 	dev->netdev_ops		= &ipip6_netdev_ops;
-	dev->destructor		= ipip6_dev_free;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= ipip6_dev_free;
 
 	dev->type		= ARPHRD_SIT;
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 06ec39b..75703fd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -879,7 +879,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 	struct sock *sk;
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		if (sk->sk_state == TCP_ESTABLISHED &&
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
 			return sk;
 		/* Only check first socket in chain */
 		break;

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 08a807b..3ef5d91 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c

@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		return 1;
 #endif
 
-	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);

diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 0e01590..07d3657 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c

@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
 	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->transport_header = skb->network_header + hdr_len;

diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 7a92c0f..9ad07a9 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c

@@ -30,6 +30,8 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	if (hdr_len < 0)
+		return hdr_len;
 	skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
 	skb_set_network_header(skb, -x->props.header_len);
 	skb->transport_header = skb->network_header + hdr_len;

diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 74d09f9..3be8528 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c

@@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops		= &irlan_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 	dev->min_mtu		= 0;
 	dev->max_mtu		= ETH_MAX_MTU;
 

diff --git a/net/key/af_key.c b/net/key/af_key.c
index c1950bb..b1432b6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c

@@ -1157,6 +1157,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			goto out;
 	}
 
+	err = -ENOBUFS;
 	key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1168,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		if (key)
 			keysize = (key->sadb_key_bits + 7) / 8;
 		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
-		if (!x->aalg)
+		if (!x->aalg) {
+			err = -ENOMEM;
 			goto out;
+		}
 		strcpy(x->aalg->alg_name, a->name);
 		x->aalg->alg_key_len = 0;
 		if (key) {
@@ -1188,8 +1191,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 				goto out;
 			}
 			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
-			if (!x->calg)
+			if (!x->calg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->calg->alg_name, a->name);
 			x->props.calgo = sa->sadb_sa_encrypt;
 		} else {
@@ -1203,8 +1208,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 			if (key)
 				keysize = (key->sadb_key_bits + 7) / 8;
 			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
-			if (!x->ealg)
+			if (!x->ealg) {
+				err = -ENOMEM;
 				goto out;
+			}
 			strcpy(x->ealg->alg_name, a->name);
 			x->ealg->alg_key_len = 0;
 			if (key) {
@@ -1249,8 +1256,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		struct xfrm_encap_tmpl *natt;
 
 		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
-		if (!x->encap)
+		if (!x->encap) {
+			err = -ENOMEM;
 			goto out;
+		}
 
 		natt = x->encap;
 		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2755,6 +2764,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+	if (!err)
+		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3285,7 +3296,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len) {
+		    sec_ctx->sadb_x_sec_len*8) {
 			*dir = -EINVAL;
 			goto out;
 		}

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8b21af7..4de2ec9 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c

@@ -114,12 +114,13 @@ static void l2tp_eth_get_stats64(struct net_device *dev,
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
 
-	stats->tx_bytes   = atomic_long_read(&priv->tx_bytes);
-	stats->tx_packets = atomic_long_read(&priv->tx_packets);
-	stats->tx_dropped = atomic_long_read(&priv->tx_dropped);
-	stats->rx_bytes   = atomic_long_read(&priv->rx_bytes);
-	stats->rx_packets = atomic_long_read(&priv->rx_packets);
-	stats->rx_errors  = atomic_long_read(&priv->rx_errors);
+	stats->tx_bytes   = (unsigned long) atomic_long_read(&priv->tx_bytes);
+	stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
+	stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
+	stats->rx_bytes   = (unsigned long) atomic_long_read(&priv->rx_bytes);
+	stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
+	stats->rx_errors  = (unsigned long) atomic_long_read(&priv->rx_errors);
+
 }
 
 static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -141,7 +142,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
 	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
 	dev->features		|= NETIF_F_LLTX;
 	dev->netdev_ops		= &l2tp_eth_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8364fe5..c38d16f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c

@@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	int rc = -EINVAL;
 
 	dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
+
+	lock_sock(sk);
 	if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
 		goto out;
 	rc = -EAFNOSUPPORT;
@@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
+	release_sock(sk);
 	return rc;
 }
 

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 60e2a62..cf2392b2 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c

@@ -7,7 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -741,7 +741,47 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 	ieee80211_agg_start_txq(sta, tid, true);
 }
 
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+			      struct tid_ampdu_tx *tid_tx)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+
+	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
+		return;
+
+	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
+		ieee80211_agg_tx_operational(local, sta, tid);
+}
+
+static struct tid_ampdu_tx *
+ieee80211_lookup_tid_tx(struct ieee80211_sub_if_data *sdata,
+			const u8 *ra, u16 tid, struct sta_info **sta)
+{
+	struct tid_ampdu_tx *tid_tx;
+
+	if (tid >= IEEE80211_NUM_TIDS) {
+		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
+		       tid, IEEE80211_NUM_TIDS);
+		return NULL;
+	}
+
+	*sta = sta_info_get_bss(sdata, ra);
+	if (!*sta) {
+		ht_dbg(sdata, "Could not find station: %pM\n", ra);
+		return NULL;
+	}
+
+	tid_tx = rcu_dereference((*sta)->ampdu_mlme.tid_tx[tid]);
+
+	if (WARN_ON(!tid_tx))
+		ht_dbg(sdata, "addBA was not requested!\n");
+
+	return tid_tx;
+}
+
+void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
+				      const u8 *ra, u16 tid)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
@@ -750,57 +790,15 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 
 	trace_api_start_tx_ba_cb(sdata, ra, tid);
 
-	if (tid >= IEEE80211_NUM_TIDS) {
-		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
-		       tid, IEEE80211_NUM_TIDS);
-		return;
-	}
+	rcu_read_lock();
+	tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+	if (!tid_tx)
+		goto out;
 
-	mutex_lock(&local->sta_mtx);
-	sta = sta_info_get_bss(sdata, ra);
-	if (!sta) {
-		mutex_unlock(&local->sta_mtx);
-		ht_dbg(sdata, "Could not find station: %pM\n", ra);
-		return;
-	}
-
-	mutex_lock(&sta->ampdu_mlme.mtx);
-	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
-
-	if (WARN_ON(!tid_tx)) {
-		ht_dbg(sdata, "addBA was not requested!\n");
-		goto unlock;
-	}
-
-	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
-		goto unlock;
-
-	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
-		ieee80211_agg_tx_operational(local, sta, tid);
-
- unlock:
-	mutex_unlock(&sta->ampdu_mlme.mtx);
-	mutex_unlock(&local->sta_mtx);
-}
-
-void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
-				      const u8 *ra, u16 tid)
-{
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
-
-	if (unlikely(!skb))
-		return;
-
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
-
-	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
-	skb_queue_tail(&sdata->skb_queue, skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	set_bit(HT_AGG_STATE_START_CB, &tid_tx->state);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
 
@@ -860,37 +858,18 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
 
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+			     struct tid_ampdu_tx *tid_tx)
 {
-	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
-	struct tid_ampdu_tx *tid_tx;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	bool send_delba = false;
 
-	trace_api_stop_tx_ba_cb(sdata, ra, tid);
+	ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n",
+	       sta->sta.addr, tid);
 
-	if (tid >= IEEE80211_NUM_TIDS) {
-		ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
-		       tid, IEEE80211_NUM_TIDS);
-		return;
-	}
-
-	ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid);
-
-	mutex_lock(&local->sta_mtx);
-
-	sta = sta_info_get_bss(sdata, ra);
-	if (!sta) {
-		ht_dbg(sdata, "Could not find station: %pM\n", ra);
-		goto unlock;
-	}
-
-	mutex_lock(&sta->ampdu_mlme.mtx);
 	spin_lock_bh(&sta->lock);
-	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
-	if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+	if (!test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
 		ht_dbg(sdata,
 		       "unexpected callback to A-MPDU stop for %pM tid %d\n",
 		       sta->sta.addr, tid);
@@ -906,12 +885,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	spin_unlock_bh(&sta->lock);
 
 	if (send_delba)
-		ieee80211_send_delba(sdata, ra, tid,
+		ieee80211_send_delba(sdata, sta->sta.addr, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
-
-	mutex_unlock(&sta->ampdu_mlme.mtx);
- unlock:
-	mutex_unlock(&local->sta_mtx);
 }
 
 void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -919,19 +894,20 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_ra_tid *ra_tid;
-	struct sk_buff *skb = dev_alloc_skb(0);
+	struct sta_info *sta;
+	struct tid_ampdu_tx *tid_tx;
 
-	if (unlikely(!skb))
-		return;
+	trace_api_stop_tx_ba_cb(sdata, ra, tid);
 
-	ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
-	memcpy(&ra_tid->ra, ra, ETH_ALEN);
-	ra_tid->tid = tid;
+	rcu_read_lock();
+	tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+	if (!tid_tx)
+		goto out;
 
-	skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
-	skb_queue_tail(&sdata->skb_queue, skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
+	set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
 

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6c2e606..4a388fe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c

@@ -902,6 +902,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	default:
 		return -EINVAL;
 	}
+	sdata->u.ap.req_smps = sdata->smps_mode;
+
 	sdata->needed_rx_chains = sdata->local->rx_chains;
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f4a5287..6ca5442 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c

@@ -7,6 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
+ * Copyright 2017	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -289,8 +290,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 {
 	int i;
 
-	cancel_work_sync(&sta->ampdu_mlme.work);
-
 	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
 		__ieee80211_stop_tx_ba_session(sta, i, reason);
 		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
@@ -298,6 +297,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					       reason != AGG_STOP_DESTROY_STA &&
 					       reason != AGG_STOP_PEER_REQUEST);
 	}
+
+	/* stopping might queue the work again - so cancel only afterwards */
+	cancel_work_sync(&sta->ampdu_mlme.work);
 }
 
 void ieee80211_ba_session_work(struct work_struct *work)
@@ -352,10 +354,16 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		spin_unlock_bh(&sta->lock);
 
 		tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
-		if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
-						 &tid_tx->state))
+		if (!tid_tx)
+			continue;
+
+		if (test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state))
+			ieee80211_start_tx_ba_cb(sta, tid, tid_tx);
+		if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))
 			___ieee80211_stop_tx_ba_session(sta, tid,
 							AGG_STOP_LOCAL_REQUEST);
+		if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+			ieee80211_stop_tx_ba_cb(sta, tid, tid_tx);
 	}
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 }

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f8f6c14..5e002f6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h

@@ -1036,8 +1036,6 @@ struct ieee80211_rx_agg {
 
 enum sdata_queue_type {
 	IEEE80211_SDATA_QUEUE_TYPE_FRAME	= 0,
-	IEEE80211_SDATA_QUEUE_AGG_START		= 1,
-	IEEE80211_SDATA_QUEUE_AGG_STOP		= 2,
 	IEEE80211_SDATA_QUEUE_RX_AGG_START	= 3,
 	IEEE80211_SDATA_QUEUE_RX_AGG_STOP	= 4,
 };
@@ -1427,12 +1425,6 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
 	return local->hw.wiphy->bands[band];
 }
 
-/* this struct represents 802.11n's RA/TID combination */
-struct ieee80211_ra_tid {
-	u8 ra[ETH_ALEN];
-	u16 tid;
-};
-
 /* this struct holds the value parsing from channel switch IE  */
 struct ieee80211_csa_ie {
 	struct cfg80211_chan_def chandef;
@@ -1539,7 +1531,7 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
 		return true;
 	/* can't handle non-legacy preamble yet */
 	if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
-	    status->encoding != RX_ENC_LEGACY)
+	    status->encoding == RX_ENC_LEGACY)
 		return true;
 	return false;
 }
@@ -1794,8 +1786,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				   enum ieee80211_agg_stop_reason reason);
 int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 				    enum ieee80211_agg_stop_reason reason);
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+			      struct tid_ampdu_tx *tid_tx);
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+			     struct tid_ampdu_tx *tid_tx);
 void ieee80211_ba_session_work(struct work_struct *work);
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3bd5b81..f5f5015 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c

@@ -1213,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
 static void ieee80211_if_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
-	free_netdev(dev);
 }
 
 static void ieee80211_if_setup(struct net_device *dev)
@@ -1221,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev)
 	ether_setup(dev);
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->netdev_ops = &ieee80211_dataif_ops;
-	dev->destructor = ieee80211_if_free;
+	dev->needs_free_netdev = true;
+	dev->priv_destructor = ieee80211_if_free;
 }
 
 static void ieee80211_if_setup_no_queue(struct net_device *dev)
@@ -1237,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work)
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct sta_info *sta;
-	struct ieee80211_ra_tid *ra_tid;
 	struct ieee80211_rx_agg *rx_agg;
 
 	if (!ieee80211_sdata_running(sdata))
@@ -1253,15 +1252,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
 		struct ieee80211_mgmt *mgmt = (void *)skb->data;
 
-		if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) {
-			ra_tid = (void *)&skb->cb;
-			ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra,
-						 ra_tid->tid);
-		} else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) {
-			ra_tid = (void *)&skb->cb;
-			ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
-						ra_tid->tid);
-		} else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
+		if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
 			rx_agg = (void *)&skb->cb;
 			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get_bss(sdata, rx_agg->addr);
@@ -1825,6 +1816,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		ret = dev_alloc_name(ndev, ndev->name);
 		if (ret < 0) {
 			ieee80211_if_free(ndev);
+			free_netdev(ndev);
 			return ret;
 		}
 
@@ -1914,7 +1906,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = register_netdevice(ndev);
 		if (ret) {
-			ieee80211_if_free(ndev);
+			free_netdev(ndev);
 			return ret;
 		}
 	}

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ea9712..cc8e6ea 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c

@@ -601,7 +601,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *chan;
-	u32 rate_flags, rates = 0;
+	u32 rates = 0;
 
 	sdata_assert_lock(sdata);
 
@@ -612,7 +612,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 	chan = chanctx_conf->def.chan;
-	rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
 	rcu_read_unlock();
 	sband = local->hw.wiphy->bands[chan->band];
 	shift = ieee80211_vif_get_shift(&sdata->vif);
@@ -636,9 +635,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		 */
 		rates_len = 0;
 		for (i = 0; i < sband->n_bitrates; i++) {
-			if ((rate_flags & sband->bitrates[i].flags)
-			    != rate_flags)
-				continue;
 			rates |= BIT(i);
 			rates_len++;
 		}
@@ -2818,7 +2814,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 				u32 *rates, u32 *basic_rates,
 				bool *have_higher_than_11mbit,
 				int *min_rate, int *min_rate_index,
-				int shift, u32 rate_flags)
+				int shift)
 {
 	int i, j;
 
@@ -2846,8 +2842,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 			int brate;
 
 			br = &sband->bitrates[j];
-			if ((rate_flags & br->flags) != rate_flags)
-				continue;
 
 			brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
 			if (brate == rate) {
@@ -4398,40 +4392,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 			return -ENOMEM;
 	}
 
-	if (new_sta || override) {
-		err = ieee80211_prep_channel(sdata, cbss);
-		if (err) {
-			if (new_sta)
-				sta_info_free(local, new_sta);
-			return -EINVAL;
-		}
-	}
-
+	/*
+	 * Set up the information for the new channel before setting the
+	 * new channel. We can't - completely race-free - change the basic
+	 * rates bitmap and the channel (sband) that it refers to, but if
+	 * we set it up before we at least avoid calling into the driver's
+	 * bss_info_changed() method with invalid information (since we do
+	 * call that from changing the channel - only for IDLE and perhaps
+	 * some others, but ...).
+	 *
+	 * So to avoid that, just set up all the new information before the
+	 * channel, but tell the driver to apply it only afterwards, since
+	 * it might need the new channel for that.
+	 */
 	if (new_sta) {
 		u32 rates = 0, basic_rates = 0;
 		bool have_higher_than_11mbit;
 		int min_rate = INT_MAX, min_rate_index = -1;
-		struct ieee80211_chanctx_conf *chanctx_conf;
 		const struct cfg80211_bss_ies *ies;
 		int shift = ieee80211_vif_get_shift(&sdata->vif);
-		u32 rate_flags;
-
-		rcu_read_lock();
-		chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-		if (WARN_ON(!chanctx_conf)) {
-			rcu_read_unlock();
-			sta_info_free(local, new_sta);
-			return -EINVAL;
-		}
-		rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
-		rcu_read_unlock();
 
 		ieee80211_get_rates(sband, bss->supp_rates,
 				    bss->supp_rates_len,
 				    &rates, &basic_rates,
 				    &have_higher_than_11mbit,
 				    &min_rate, &min_rate_index,
-				    shift, rate_flags);
+				    shift);
 
 		/*
 		 * This used to be a workaround for basic rates missing
@@ -4489,8 +4475,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 			sdata->vif.bss_conf.sync_dtim_count = 0;
 		}
 		rcu_read_unlock();
+	}
 
-		/* tell driver about BSSID, basic rates and timing */
+	if (new_sta || override) {
+		err = ieee80211_prep_channel(sdata, cbss);
+		if (err) {
+			if (new_sta)
+				sta_info_free(local, new_sta);
+			return -EINVAL;
+		}
+	}
+
+	if (new_sta) {
+		/*
+		 * tell driver about BSSID, basic rates and timing
+		 * this was set up above, before setting the channel
+		 */
 		ieee80211_bss_info_change_notify(sdata,
 			BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES |
 			BSS_CHANGED_BEACON_INT);

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 35f4c7d..3674fe3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c

@@ -1613,12 +1613,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 */
 	if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
 	    !ieee80211_has_morefrags(hdr->frame_control) &&
+	    !ieee80211_is_back_req(hdr->frame_control) &&
 	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
 	    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	     rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
-	    /* PM bit is only checked in frames where it isn't reserved,
+	    /*
+	     * PM bit is only checked in frames where it isn't reserved,
 	     * in AP mode it's reserved in non-bufferable management frames
 	     * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+	     * BAR frames should be ignored as specified in
+	     * IEEE 802.11-2012 10.2.1.2.
 	     */
 	    (!ieee80211_is_mgmt(hdr->frame_control) ||
 	     ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
@@ -2492,7 +2496,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 		if (is_multicast_ether_addr(hdr->addr1)) {
 			mpp_addr = hdr->addr3;
 			proxied_addr = mesh_hdr->eaddr1;
-		} else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
+		} else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+			    MESH_FLAGS_AE_A5_A6) {
 			/* has_a4 already checked in ieee80211_rx_mesh_check */
 			mpp_addr = hdr->addr4;
 			proxied_addr = mesh_hdr->eaddr2;

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 7cdf7a8..403e3cc 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -2155,7 +2155,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			struct ieee80211_sta_rx_stats *cpurxs;
 
 			cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
-			sinfo->rx_packets += cpurxs->dropped;
+			sinfo->rx_dropped_misc += cpurxs->dropped;
 		}
 	}
 

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5609cac..ea0747d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h

@@ -116,6 +116,8 @@ enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_STOPPING		3
 #define HT_AGG_STATE_WANT_START		4
 #define HT_AGG_STATE_WANT_STOP		5
+#define HT_AGG_STATE_START_CB		6
+#define HT_AGG_STATE_STOP_CB		7
 
 enum ieee80211_agg_stop_reason {
 	AGG_STOP_DECLINED,

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index c1ef22d..cc19614 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c

@@ -17,6 +17,7 @@
 #include <asm/unaligned.h>
 #include <net/mac80211.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 
 #include "ieee80211_i.h"
 #include "michael.h"
@@ -153,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 	data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
 	key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
 	michael_mic(key, hdr, data, data_len, mic);
-	if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
+	if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN))
 		goto mic_fail;
 
 	/* remove Michael MIC from payload */
@@ -1048,7 +1049,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
 		bip_aad(skb, aad);
 		ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad,
 				   skb->data + 24, skb->len - 24, mic);
-		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_cmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}
@@ -1098,7 +1099,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
 		bip_aad(skb, aad);
 		ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
 				       skb->data + 24, skb->len - 24, mic);
-		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_cmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}
@@ -1202,7 +1203,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
 		if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
 				       skb->data + 24, skb->len - 24,
 				       mic) < 0 ||
-		    memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+		    crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
 			key->u.aes_gmac.icverrors++;
 			return RX_DROP_UNUSABLE;
 		}

diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 06019db..bd88a9b 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c

@@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev)
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
 
 	mac802154_llsec_destroy(&sdata->sec);
-
-	free_netdev(dev);
 }
 
 static void ieee802154_if_setup(struct net_device *dev)
@@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 					sdata->dev->dev_addr);
 
 		sdata->dev->header_ops = &mac802154_header_ops;
-		sdata->dev->destructor = mac802154_wpan_free;
+		sdata->dev->needs_free_netdev = true;
+		sdata->dev->priv_destructor = mac802154_wpan_free;
 		sdata->dev->netdev_ops = &mac802154_wpan_ops;
 		sdata->dev->ml_priv = &mac802154_mlme_wpan;
 		wpan_dev->promiscuous_mode = false;
@@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 
 		break;
 	case NL802154_IFTYPE_MONITOR:
-		sdata->dev->destructor = free_netdev;
+		sdata->dev->needs_free_netdev = true;
 		sdata->dev->netdev_ops = &mac802154_monitor_ops;
 		wpan_dev->promiscuous_mode = true;
 		break;

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 257ec66..7b05fd1 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c

@@ -1418,7 +1418,7 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags)
 				continue;
 			alive++;
 			nh_flags &= ~flags;
-			WRITE_ONCE(nh->nh_flags, flags);
+			WRITE_ONCE(nh->nh_flags, nh_flags);
 		} endfor_nexthops(rt);
 
 		WRITE_ONCE(rt->rt_nhn_alive, alive);

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d2d7bdf..ad99c1c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c

@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 {
 	unsigned int verdict = NF_DROP;
 
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		pr_err("shouldn't reach here, because the box is on the "
-		       "half connection in the tun/dr module.\n");
-	}
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		goto ignore_cp;
 
 	/* Ensure the checksum is correct */
 	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		ip_vs_notrack(skb);
 	else
 		ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
 	verdict = NF_ACCEPT;
 
 out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 	 */
 	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
-	if (likely(cp))
+	if (likely(cp)) {
+		if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+			goto ignore_cp;
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
+	}
 
 	/* Check for real-server-started requests */
 	if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
 			}
 		}
 	}
+
+out:
 	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
+
+ignore_cp:
+	__ip_vs_conn_put(cp);
+	goto out;
 }
 
 /*

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a60efa..7f6100c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c

@@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 #endif
 	if (h != NULL && !try_module_get(h->me))
 		h = NULL;
+	if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) {
+		module_put(h->me);
+		h = NULL;
+	}
 
 	rcu_read_unlock();
 
@@ -181,6 +185,13 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
 
+void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
+{
+	refcount_dec(&helper->refcnt);
+	module_put(helper->me);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
+
 struct nf_conn_help *
 nf_ct_helper_ext_add(struct nf_conn *ct,
 		     struct nf_conntrack_helper *helper, gfp_t gfp)
@@ -417,6 +428,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 			}
 		}
 	}
+	refcount_set(&me->refcnt, 1);
 	hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
 	nf_ct_helper_count++;
 out:

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dcf561b..a8be9b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c

@@ -45,6 +45,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -888,8 +890,13 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 out:
 	local_bh_enable();
-	if (last)
+	if (last) {
+		/* nf ct hash resize happened, now clear the leftover. */
+		if ((struct nf_conn *)cb->args[1] == last)
+			cb->args[1] = 0;
+
 		nf_ct_put(last);
+	}
 
 	while (i) {
 		i--;
@@ -1007,9 +1014,8 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple,
-		      enum ctattr_type type, u_int8_t l3num,
-		      struct nf_conntrack_zone *zone)
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1828,6 +1834,8 @@ ctnetlink_create_conntrack(struct net *net,
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 	nf_ct_labels_ext_add(ct);
+	nfct_seqadj_ext_add(ct);
+	nfct_synproxy_ext_add(ct);
 
 	/* we must add conntrack extensions before confirmation. */
 	ct->status |= IPS_CONFIRMED;
@@ -2447,7 +2455,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = {
 
 static int ctnetlink_exp_dump_tuple(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    enum ctattr_expect type)
+				    u32 type)
 {
 	struct nlattr *nest_parms;
 

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 13875d5..1c5b14a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c

@@ -512,16 +512,19 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
 		      u8 pf, unsigned int hooknum)
 {
 	const struct sctphdr *sh;
-	struct sctphdr _sctph;
 	const char *logmsg;
 
-	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
-	if (!sh) {
+	if (skb->len < dataoff + sizeof(struct sctphdr)) {
 		logmsg = "nf_ct_sctp: short packet ";
 		goto out_invalid;
 	}
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    skb->ip_summed == CHECKSUM_NONE) {
+		if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+			logmsg = "nf_ct_sctp: failed to read header ";
+			goto out_invalid;
+		}
+		sh = (const struct sctphdr *)(skb->data + dataoff);
 		if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
 			logmsg = "nf_ct_sctp: bad CRC ";
 			goto out_invalid;

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b48d6b5..6c72922 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c

@@ -409,6 +409,10 @@ nf_nat_setup_info(struct nf_conn *ct,
 {
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
+	/* Can't setup nat info for confirmed ct. */
+	if (nf_ct_is_confirmed(ct))
+		return NF_ACCEPT;
+
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
@@ -562,7 +566,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
 	 * will delete entry from already-freed table.
 	 */
-	ct->status &= ~IPS_NAT_DONE_MASK;
+	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
 	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
 			nf_nat_bysource_params);
 

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5592250..da314be 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c

@@ -3367,35 +3367,50 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
 	return nf_tables_fill_setelem(args->skb, set, elem);
 }
 
+struct nft_set_dump_ctx {
+	const struct nft_set	*set;
+	struct nft_ctx		ctx;
+};
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
-	u8 genmask = nft_genmask_cur(net);
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
-	struct nft_ctx ctx;
-	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+	bool set_found = false;
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
-	int event, err;
+	int event;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
-			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy,
-			  NULL);
-	if (err < 0)
-		return err;
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+		if (afi != dump_ctx->ctx.afi)
+			continue;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
-					 (void *)nla, genmask);
-	if (err < 0)
-		return err;
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			if (table != dump_ctx->ctx.table)
+				continue;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+			list_for_each_entry_rcu(set, &table->sets, list) {
+				if (set == dump_ctx->set) {
+					set_found = true;
+					break;
+				}
+			}
+			break;
+		}
+		break;
+	}
+
+	if (!set_found) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
 
 	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
 	portid = NETLINK_CB(cb->skb).portid;
@@ -3407,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 		goto nla_put_failure;
 
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = ctx.afi->family;
+	nfmsg->nfgen_family = afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
 		goto nla_put_failure;
@@ -3422,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	args.cb			= cb;
 	args.skb		= skb;
-	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	set->ops->walk(&ctx, set, &args.iter);
+	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
@@ -3441,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 
 nla_put_failure:
+	rcu_read_unlock();
 	return -ENOSPC;
 }
 
+static int nf_tables_dump_set_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
@@ -3465,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
 		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 	return -EOPNOTSUPP;
@@ -3593,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 {
 	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
-	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
 	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
 		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
@@ -3604,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
+/* Only called from commit path, nft_set_elem_deactivate() already deals with
+ * the refcounting from the preparation phase.
+ */
+static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+	kfree(elem);
+}
+
 static int nft_setelem_parse_flags(const struct nft_set *set,
 				   const struct nlattr *attr, u32 *flags)
 {
@@ -3815,9 +3861,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&data, d2.type);
+		nft_data_release(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key.val, d1.type);
+	nft_data_release(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3862,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+/**
+ *	nft_data_hold - hold a nft_data item
+ *
+ *	@data: struct nft_data to release
+ *	@type: type of data
+ *
+ *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
+ *	NFT_GOTO verdicts. This function must be called on active data objects
+ *	from the second phase of the commit protocol.
+ */
+static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+{
+	if (type == NFT_DATA_VERDICT) {
+		switch (data->verdict.code) {
+		case NFT_JUMP:
+		case NFT_GOTO:
+			data->verdict.chain->use++;
+			break;
+		}
+	}
+}
+
+static void nft_set_elem_activate(const struct net *net,
+				  const struct nft_set *set,
+				  struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_hold(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use++;
+}
+
+static void nft_set_elem_deactivate(const struct net *net,
+				    const struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_release(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
+		(*nft_set_ext_obj(ext))->use--;
+}
+
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
@@ -3927,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
+	nft_set_elem_deactivate(ctx->net, set, &elem);
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -3936,7 +4031,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 err3:
 	kfree(elem.priv);
 err2:
-	nft_data_uninit(&elem.key.val, desc.type);
+	nft_data_release(&elem.key.val, desc.type);
 err1:
 	return err;
 }
@@ -4743,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_DELSETELEM:
-		nft_set_elem_destroy(nft_trans_elem_set(trans),
-				     nft_trans_elem(trans).priv, true);
+		nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+					   nft_trans_elem(trans).priv);
 		break;
 	case NFT_MSG_DELOBJ:
 		nft_obj_destroy(nft_trans_obj(trans));
@@ -4979,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
+			nft_set_elem_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
@@ -5464,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx,
 EXPORT_SYMBOL_GPL(nft_data_init);
 
 /**
- *	nft_data_uninit - release a nft_data item
+ *	nft_data_release - release a nft_data item
  *
  *	@data: struct nft_data to release
  *	@type: type of data
@@ -5472,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init);
  *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
  *	all others need to be released by calling this function.
  */
-void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+void nft_data_release(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type < NFT_DATA_VERDICT)
 		return;
@@ -5483,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
 		WARN_ON(1);
 	}
 }
-EXPORT_SYMBOL_GPL(nft_data_uninit);
+EXPORT_SYMBOL_GPL(nft_data_release);
 
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 950bf6e..be678a3 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c

@@ -686,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
+	ret = -ENOENT;
 	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
 		cur = &nlcth->helper;
 		j++;
@@ -699,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		     tuple.dst.protonum != cur->tuple.dst.protonum))
 			continue;
 
-		found = true;
-		nf_conntrack_helper_unregister(cur);
-		kfree(cur->expect_policy);
+		if (refcount_dec_if_one(&cur->refcnt)) {
+			found = true;
+			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
 
-		list_del(&nlcth->list);
-		kfree(nlcth);
+			list_del(&nlcth->list);
+			kfree(nlcth);
+		} else {
+			ret = -EBUSY;
+		}
 	}
 
 	/* Make sure we return success if we flush and there is no helpers */
-	return (found || j == 0) ? 0 : -ENOENT;
+	return (found || j == 0) ? 0 : ret;
 }
 
 static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {

diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 877d9ac..fff8073 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c

@@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 			    tb[NFTA_BITWISE_MASK]);
 	if (err < 0)
 		return err;
-	if (d1.len != priv->len)
-		return -EINVAL;
+	if (d1.len != priv->len) {
+		err = -EINVAL;
+		goto err1;
+	}
 
 	err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
 			    tb[NFTA_BITWISE_XOR]);
 	if (err < 0)
-		return err;
-	if (d2.len != priv->len)
-		return -EINVAL;
+		goto err1;
+	if (d2.len != priv->len) {
+		err = -EINVAL;
+		goto err2;
+	}
 
 	return 0;
+err2:
+	nft_data_release(&priv->xor, d2.type);
+err1:
+	nft_data_release(&priv->mask, d1.type);
+	return err;
 }
 
 static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)

diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 2b96eff..c2945eb 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c

@@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (desc.type != NFT_DATA_VALUE) {
+		err = -EINVAL;
+		goto err1;
+	}
+
 	if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
 		return &nft_cmp_fast_ops;
-	else
-		return &nft_cmp_ops;
+
+	return &nft_cmp_ops;
+err1:
+	nft_data_release(&data, desc.type);
+	return ERR_PTR(-EINVAL);
 }
 
 struct nft_expr_type nft_cmp_type __read_mostly = {

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a34ceb3..1678e9e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c

@@ -826,9 +826,9 @@ static void nft_ct_helper_obj_destroy(struct nft_object *obj)
 	struct nft_ct_helper_obj *priv = nft_obj_data(obj);
 
 	if (priv->helper4)
-		module_put(priv->helper4->me);
+		nf_conntrack_helper_put(priv->helper4);
 	if (priv->helper6)
-		module_put(priv->helper6->me);
+		nf_conntrack_helper_put(priv->helper6);
 }
 
 static void nft_ct_helper_obj_eval(struct nft_object *obj,

diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 728baf8..4717d77 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c

@@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
 	return 0;
 
 err1:
-	nft_data_uninit(&priv->data, desc.type);
+	nft_data_release(&priv->data, desc.type);
 	return err;
 }
 
@@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
 				  const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
-	return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+
+	return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)

diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 9edc74e..cedb96c 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c

@@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
 	priv->len = desc_from.len;
 	return 0;
 err2:
-	nft_data_uninit(&priv->data_to, desc_to.type);
+	nft_data_release(&priv->data_to, desc_to.type);
 err1:
-	nft_data_uninit(&priv->data_from, desc_from.type);
+	nft_data_release(&priv->data_from, desc_from.type);
 	return err;
 }
 

diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 8ec086b..3d3a6df 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c

@@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	int err;
 
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
 	iter->err = err;
 	if (err)
 		return;

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e97e2fb..fbdbaa0 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c

@@ -116,17 +116,17 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 		else if (d > 0)
 			p = &parent->rb_right;
 		else {
-			if (nft_set_elem_active(&rbe->ext, genmask)) {
-				if (nft_rbtree_interval_end(rbe) &&
-				    !nft_rbtree_interval_end(new))
-					p = &parent->rb_left;
-				else if (!nft_rbtree_interval_end(rbe) &&
-					 nft_rbtree_interval_end(new))
-					p = &parent->rb_right;
-				else {
-					*ext = &rbe->ext;
-					return -EEXIST;
-				}
+			if (nft_rbtree_interval_end(rbe) &&
+			    !nft_rbtree_interval_end(new)) {
+				p = &parent->rb_left;
+			} else if (!nft_rbtree_interval_end(rbe) &&
+				   nft_rbtree_interval_end(new)) {
+				p = &parent->rb_right;
+			} else if (nft_set_elem_active(&rbe->ext, genmask)) {
+				*ext = &rbe->ext;
+				return -EEXIST;
+			} else {
+				p = &parent->rb_left;
 			}
 		}
 	}

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8876b7d..1770c1d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c

@@ -283,28 +283,30 @@ static int xt_obj_to_user(u16 __user *psize, u16 size,
 		       &U->u.user.revision, K->u.kernel.TYPE->revision)
 
 int xt_data_to_user(void __user *dst, const void *src,
-		    int usersize, int size)
+		    int usersize, int size, int aligned_size)
 {
 	usersize = usersize ? : size;
 	if (copy_to_user(dst, src, usersize))
 		return -EFAULT;
-	if (usersize != size && clear_user(dst + usersize, size - usersize))
+	if (usersize != aligned_size &&
+	    clear_user(dst + usersize, aligned_size - usersize))
 		return -EFAULT;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_data_to_user);
 
-#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE)				\
+#define XT_DATA_TO_USER(U, K, TYPE)					\
 	xt_data_to_user(U->data, K->data,				\
 			K->u.kernel.TYPE->usersize,			\
-			C_SIZE ? : K->u.kernel.TYPE->TYPE##size)
+			K->u.kernel.TYPE->TYPE##size,			\
+			XT_ALIGN(K->u.kernel.TYPE->TYPE##size))
 
 int xt_match_to_user(const struct xt_entry_match *m,
 		     struct xt_entry_match __user *u)
 {
 	return XT_OBJ_TO_USER(u, m, match, 0) ||
-	       XT_DATA_TO_USER(u, m, match, 0);
+	       XT_DATA_TO_USER(u, m, match);
 }
 EXPORT_SYMBOL_GPL(xt_match_to_user);
 
@@ -312,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t,
 		      struct xt_entry_target __user *u)
 {
 	return XT_OBJ_TO_USER(u, t, target, 0) ||
-	       XT_DATA_TO_USER(u, t, target, 0);
+	       XT_DATA_TO_USER(u, t, target);
 }
 EXPORT_SYMBOL_GPL(xt_target_to_user);
 
@@ -611,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
+#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE)			\
+	xt_data_to_user(U->data, K->data,				\
+			K->u.kernel.TYPE->usersize,			\
+			C_SIZE,						\
+			COMPAT_XT_ALIGN(C_SIZE))
+
 int xt_compat_match_to_user(const struct xt_entry_match *m,
 			    void __user **dstptr, unsigned int *size)
 {
@@ -626,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
 		if (match->compat_to_user((void __user *)cm->data, m->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
+		if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm)))
 			return -EFAULT;
 	}
 
@@ -972,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t,
 		if (target->compat_to_user((void __user *)ct->data, t->data))
 			return -EFAULT;
 	} else {
-		if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
+		if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct)))
 			return -EFAULT;
 	}
 

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index bb7ad82..623ef37 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c

@@ -96,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 
 	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
 	if (help == NULL) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -263,7 +263,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 err4:
 	help = nfct_help(ct);
 	if (help)
-		module_put(help->helper->me);
+		nf_conntrack_helper_put(help->helper);
 err3:
 	nf_ct_tmpl_free(ct);
 err2:
@@ -346,7 +346,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
 	if (ct) {
 		help = nfct_help(ct);
 		if (help)
-			module_put(help->helper->me);
+			nf_conntrack_helper_put(help->helper);
 
 		nf_ct_netns_put(par->net, par->family);
 

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ee841f0..7586d44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -62,6 +62,7 @@
 #include <asm/cacheflush.h>
 #include <linux/hash.h>
 #include <linux/genetlink.h>
+#include <linux/net_namespace.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -1415,7 +1416,8 @@ static void do_one_broadcast(struct sock *sk,
 		goto out;
 	}
 	NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
-	NETLINK_CB(p->skb2).nsid_is_set = true;
+	if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED)
+		NETLINK_CB(p->skb2).nsid_is_set = true;
 	val = netlink_broadcast_deliver(sk, p->skb2);
 	if (val < 0) {
 		netlink_overrun(sk);

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index bf602e3..08679eb 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c

@@ -1123,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
 	if (!help) {
-		module_put(helper->me);
+		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
 	}
 
@@ -1584,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a)
 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 {
 	if (ct_info->helper)
-		module_put(ct_info->helper->me);
+		nf_conntrack_helper_put(ct_info->helper);
 	if (ct_info->ct)
 		nf_ct_tmpl_free(ct_info->ct);
 }

diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 89193a6..04a3128 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c

@@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev)
 	struct vport *vport = ovs_internal_dev_get_vport(dev);
 
 	ovs_vport_free(vport);
-	free_netdev(dev);
 }
 
 static void
@@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev)
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
 			      IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
-	netdev->destructor = internal_dev_destructor;
+	netdev->needs_free_netdev = true;
+	netdev->priv_destructor = internal_dev_destructor;
 	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->rtnl_link_ops = &internal_dev_link_ops;
 

diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 21c28b5..2c93379 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c

@@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev)
 	dev->tx_queue_len	= 10;
 
 	dev->netdev_ops		= &gprs_netdev_ops;
-	dev->destructor		= free_netdev;
+	dev->needs_free_netdev	= true;
 }
 
 /*

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 0a4e284..5436922 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c

@@ -217,7 +217,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 				       unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, n_parts, loop, tmp;
+	unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen;
 
 	/* there must be at least one name, and at least #names+1 length
 	 * words */
@@ -247,16 +247,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 		toklen -= 4;
 		if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
 			return -EINVAL;
-		if (tmp > toklen)
+		paddedlen = (tmp + 3) & ~3;
+		if (paddedlen > toklen)
 			return -EINVAL;
 		princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
 		if (!princ->name_parts[loop])
 			return -ENOMEM;
 		memcpy(princ->name_parts[loop], xdr, tmp);
 		princ->name_parts[loop][tmp] = 0;
-		tmp = (tmp + 3) & ~3;
-		toklen -= tmp;
-		xdr += tmp >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	if (toklen < 4)
@@ -265,16 +265,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
 	toklen -= 4;
 	if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
 		return -EINVAL;
-	if (tmp > toklen)
+	paddedlen = (tmp + 3) & ~3;
+	if (paddedlen > toklen)
 		return -EINVAL;
 	princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
 	if (!princ->realm)
 		return -ENOMEM;
 	memcpy(princ->realm, xdr, tmp);
 	princ->realm[tmp] = 0;
-	tmp = (tmp + 3) & ~3;
-	toklen -= tmp;
-	xdr += tmp >> 2;
+	toklen -= paddedlen;
+	xdr += paddedlen >> 2;
 
 	_debug("%s/...@%s", princ->name_parts[0], princ->realm);
 
@@ -293,7 +293,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
 					 unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, len;
+	unsigned int toklen = *_toklen, len, paddedlen;
 
 	/* there must be at least one tag and one length word */
 	if (toklen <= 8)
@@ -307,15 +307,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
 	toklen -= 8;
 	if (len > max_data_size)
 		return -EINVAL;
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > toklen)
+		return -EINVAL;
 	td->data_len = len;
 
 	if (len > 0) {
 		td->data = kmemdup(xdr, len, GFP_KERNEL);
 		if (!td->data)
 			return -ENOMEM;
-		len = (len + 3) & ~3;
-		toklen -= len;
-		xdr += len >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	_debug("tag %x len %x", td->tag, td->data_len);
@@ -387,7 +389,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 				    const __be32 **_xdr, unsigned int *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-	unsigned int toklen = *_toklen, len;
+	unsigned int toklen = *_toklen, len, paddedlen;
 
 	/* there must be at least one length word */
 	if (toklen <= 4)
@@ -399,6 +401,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 	toklen -= 4;
 	if (len > AFSTOKEN_K5_TIX_MAX)
 		return -EINVAL;
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > toklen)
+		return -EINVAL;
 	*_tktlen = len;
 
 	_debug("ticket len %u", len);
@@ -407,9 +412,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 		*_ticket = kmemdup(xdr, len, GFP_KERNEL);
 		if (!*_ticket)
 			return -ENOMEM;
-		len = (len + 3) & ~3;
-		toklen -= len;
-		xdr += len >> 2;
+		toklen -= paddedlen;
+		xdr += paddedlen >> 2;
 	}
 
 	*_xdr = xdr;
@@ -552,7 +556,7 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 {
 	const __be32 *xdr = prep->data, *token;
 	const char *cp;
-	unsigned int len, tmp, loop, ntoken, toklen, sec_ix;
+	unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
 	size_t datalen = prep->datalen;
 	int ret;
 
@@ -578,22 +582,21 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 	if (len < 1 || len > AFSTOKEN_CELL_MAX)
 		goto not_xdr;
 	datalen -= 4;
-	tmp = (len + 3) & ~3;
-	if (tmp > datalen)
+	paddedlen = (len + 3) & ~3;
+	if (paddedlen > datalen)
 		goto not_xdr;
 
 	cp = (const char *) xdr;
 	for (loop = 0; loop < len; loop++)
 		if (!isprint(cp[loop]))
 			goto not_xdr;
-	if (len < tmp)
-		for (; loop < tmp; loop++)
-			if (cp[loop])
-				goto not_xdr;
+	for (; loop < paddedlen; loop++)
+		if (cp[loop])
+			goto not_xdr;
 	_debug("cellname: [%u/%u] '%*.*s'",
-	       len, tmp, len, len, (const char *) xdr);
-	datalen -= tmp;
-	xdr += tmp >> 2;
+	       len, paddedlen, len, len, (const char *) xdr);
+	datalen -= paddedlen;
+	xdr += paddedlen >> 2;
 
 	/* get the token count */
 	if (datalen < 12)
@@ -614,10 +617,11 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 		sec_ix = ntohl(*xdr);
 		datalen -= 4;
 		_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
-		if (toklen < 20 || toklen > datalen)
+		paddedlen = (toklen + 3) & ~3;
+		if (toklen < 20 || toklen > datalen || paddedlen > datalen)
 			goto not_xdr;
-		datalen -= (toklen + 3) & ~3;
-		xdr += (toklen + 3) >> 2;
+		datalen -= paddedlen;
+		xdr += paddedlen >> 2;
 
 	} while (--loop > 0);
 

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 164b5ac..7dc5892 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c

@@ -94,8 +94,10 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
 		k++;
 	}
 
-	if (n)
+	if (n) {
+		err = -EINVAL;
 		goto err_out;
+	}
 
 	return keys_ex;
 

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f42008b..b062bc8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c

@@ -132,21 +132,21 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
-	spin_lock_bh(&police->tcf_lock);
 	if (est) {
 		err = gen_replace_estimator(&police->tcf_bstats, NULL,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
 					    NULL, est);
 		if (err)
-			goto failure_unlock;
+			goto failure;
 	} else if (tb[TCA_POLICE_AVRATE] &&
 		   (ret == ACT_P_CREATED ||
 		    !gen_estimator_active(&police->tcf_rate_est))) {
 		err = -EINVAL;
-		goto failure_unlock;
+		goto failure;
 	}
 
+	spin_lock_bh(&police->tcf_lock);
 	/* No failure allowed after this point */
 	police->tcfp_mtu = parm->mtu;
 	if (police->tcfp_mtu == 0) {
@@ -192,8 +192,6 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 
 	return ret;
 
-failure_unlock:
-	spin_unlock_bh(&police->tcf_lock);
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index dee469f..51859b8 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c

@@ -203,7 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 
 	*arg = (unsigned long) head;
 	rcu_assign_pointer(tp->root, new);
-	call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;
 
 err_replace_hw_filter:

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e88342f..cfdbfa1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 		return sch;
 	}
 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
-	ops->destroy(sch);
+	if (ops->destroy)
+		ops->destroy(sch);
 err_out3:
 	dev_put(dev);
 	kfree((char *) sch - sch->padded);

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a9708da..9523828 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c

@@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc,
 
 		asoc->ctsn_ack_point = asoc->next_tsn - 1;
 		asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
-		if (!asoc->stream) {
+
+		if (sctp_state(asoc, COOKIE_WAIT)) {
+			sctp_stream_free(asoc->stream);
 			asoc->stream = new->stream;
 			new->stream = NULL;
 		}

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8c58923..3dcd0ec 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c

@@ -275,6 +275,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 		if (sctp_sk(sk)->bind_hash)
 			sctp_put_port(sk);
 
+		sctp_sk(sk)->ep = NULL;
 		sock_put(sk);
 	}
 

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0e06a27..ba9ad32 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c

@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 			     struct sctp_association **app,
 			     struct sctp_transport **tpp)
 {
+	struct sctp_init_chunk *chunkhdr, _chunkhdr;
 	union sctp_addr saddr;
 	union sctp_addr daddr;
 	struct sctp_af *af;
 	struct sock *sk = NULL;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport = NULL;
-	struct sctp_init_chunk *chunkhdr;
 	__u32 vtag = ntohl(sctphdr->vtag);
-	int len = skb->len - ((void *)sctphdr - (void *)skb->data);
 
 	*app = NULL; *tpp = NULL;
 
@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 	 * discard the packet.
 	 */
 	if (vtag == 0) {
-		chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
-		if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
-			  + sizeof(__be32) ||
+		/* chunk header + first 4 octects of init header */
+		chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
+					      sizeof(struct sctphdr),
+					      sizeof(struct sctp_chunkhdr) +
+					      sizeof(__be32), &_chunkhdr);
+		if (!chunkhdr ||
 		    chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
-		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+		    ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
 			goto out;
-		}
+
 	} else if (vtag != asoc->c.peer_vtag) {
 		goto out;
 	}

diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 048954e..9a64721 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c

@@ -278,7 +278,6 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
 
 static int sctp_sock_dump(struct sock *sk, void *p)
 {
-	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_comm_param *commp = p;
 	struct sk_buff *skb = commp->skb;
 	struct netlink_callback *cb = commp->cb;
@@ -287,7 +286,9 @@ static int sctp_sock_dump(struct sock *sk, void *p)
 	int err = 0;
 
 	lock_sock(sk);
-	list_for_each_entry(assoc, &ep->asocs, asocs) {
+	if (!sctp_sk(sk)->ep)
+		goto release;
+	list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
 		if (cb->args[4] < cb->args[1])
 			goto next;
 

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8a08f13..92e332e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c

@@ -2454,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * stream sequence number shall be set to 0.
 	 */
 
-	/* Allocate storage for the negotiated streams if it is not a temporary
-	 * association.
-	 */
-	if (!asoc->temp) {
-		if (sctp_stream_init(asoc, gfp))
-			goto clean_up;
+	if (sctp_stream_init(asoc, gfp))
+		goto clean_up;
 
-		if (sctp_assoc_set_id(asoc, gfp))
-			goto clean_up;
-	}
+	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
+		goto clean_up;
 
 	/* ADDIP Section 4.1 ASCONF Chunk Procedures
 	 *

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f5e6cf..f863b55 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c

@@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 		}
 	}
 
+	/* Set temp so that it won't be added into hashtable */
+	new_asoc->temp = 1;
+
 	/* Compare the tie_tag in cookie with the verification tag of
 	 * current association.
 	 */

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f16c8d9..3a8318e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c

@@ -4622,13 +4622,13 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
 
 	for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize;
 	     hash++, head++) {
-		read_lock(&head->lock);
+		read_lock_bh(&head->lock);
 		sctp_for_each_hentry(epb, &head->chain) {
 			err = cb(sctp_ep(epb), p);
 			if (err)
 				break;
 		}
-		read_unlock(&head->lock);
+		read_unlock_bh(&head->lock);
 	}
 
 	return err;
@@ -4666,9 +4666,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
 	if (err)
 		return err;
 
-	sctp_transport_get_idx(net, &hti, pos);
-	obj = sctp_transport_get_next(net, &hti);
-	for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) {
+	obj = sctp_transport_get_idx(net, &hti, pos + 1);
+	for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
 		struct sctp_transport *transport = obj;
 
 		if (!sctp_transport_hold(transport))

diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 24fedd4b..03f6b58 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c

@@ -119,11 +119,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
 
 	for (i = 0; i < (reqs << 1); i++) {
 		rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
-		if (!rqst) {
-			pr_err("RPC:       %s: Failed to create bc rpc_rqst\n",
-			       __func__);
+		if (!rqst)
 			goto out_free;
-		}
+
 		dprintk("RPC:       %s: new rqst %p\n", __func__, rqst);
 
 		rqst->rq_xprt = &r_xprt->rx_xprt;

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 16aff8d..d5b54c0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -2432,7 +2432,12 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	case -ENETUNREACH:
 	case -EADDRINUSE:
 	case -ENOBUFS:
-		/* retry with existing socket, after a delay */
+		/*
+		 * xs_tcp_force_close() wakes tasks with -EIO.
+		 * We need to wake them first to ensure the
+		 * correct error code.
+		 */
+		xprt_wake_pending_tasks(xprt, status);
 		xs_tcp_force_close(xprt);
 		goto out;
 	}

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 312ef7d..ab30876 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c

@@ -508,7 +508,7 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 	}
 
 	if (skb_cloned(_skb) &&
-	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
 		goto exit;
 
 	/* Now reverse the concerned fields */

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6a7fe76..c77ced0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c

@@ -343,7 +343,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
  * are still connected to it and there's no way to inform "a polling
  * implementation" that it should let go of a certain wait queue
  *
- * In order to propagate a wake up, a wait_queue_t of the client
+ * In order to propagate a wake up, a wait_queue_entry_t of the client
  * socket is enqueued on the peer_wait queue of the server socket
  * whose wake function does a wake_up on the ordinary client socket
  * wait queue. This connection is established whenever a write (or
@@ -352,7 +352,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
  * was relayed.
  */
 
-static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 				      void *key)
 {
 	struct unix_sock *u;
@@ -999,7 +999,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct path path = { };
 
 	err = -EINVAL;
-	if (sunaddr->sun_family != AF_UNIX)
+	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
+	    sunaddr->sun_family != AF_UNIX)
 		goto out;
 
 	if (addr_len == sizeof(short)) {
@@ -1110,6 +1111,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	unsigned int hash;
 	int err;
 
+	err = -EINVAL;
+	if (alen < offsetofend(struct sockaddr, sa_family))
+		goto out;
+
 	if (addr->sa_family != AF_UNSPEC) {
 		err = unix_mkname(sunaddr, alen, &hash);
 		if (err < 0)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 6f7f675..dfc8c51e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c

@@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	long timeout;
 	int err;
 	struct vsock_transport_send_notify_data send_data;
-
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	sk = sock->sk;
 	vsk = vsock_sk(sk);
@@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err < 0)
 		goto out;
 
-
 	while (total_written < len) {
 		ssize_t written;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		add_wait_queue(sk_sleep(sk), &wait);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
 			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
 
 			release_sock(sk);
-			timeout = schedule_timeout(timeout);
+			timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
+				remove_wait_queue(sk_sleep(sk), &wait);
 				goto out_err;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk_sleep(sk), &wait);
+		remove_wait_queue(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 14d5f0c..9f0901f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c

@@ -322,9 +322,9 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
 {
 	struct cfg80211_sched_scan_request *pos;
 
-	ASSERT_RTNL();
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
 
-	list_for_each_entry(pos, &rdev->sched_scan_req_list, list) {
+	list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) {
 		if (pos->reqid == reqid)
 			return pos;
 	}
@@ -398,13 +398,13 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
 	trace_cfg80211_sched_scan_results(wiphy, reqid);
 	/* ignore if we're not scanning */
 
-	rtnl_lock();
+	rcu_read_lock();
 	request = cfg80211_find_sched_scan_req(rdev, reqid);
 	if (request) {
 		request->report_results = true;
 		queue_work(cfg80211_wq, &rdev->sched_scan_res_wk);
 	}
-	rtnl_unlock();
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7198373..4992f10 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c

@@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (iftype == NL80211_IFTYPE_MESH_POINT)
 		skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
 
+	mesh_flags &= MESH_FLAGS_AE;
+
 	switch (hdr->frame_control &
 		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
 	case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 			     iftype != NL80211_IFTYPE_STATION))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_dest, 2 * ETH_ALEN);
@@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 		     ether_addr_equal(tmp.h_source, addr)))
 			return -1;
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
-			if (mesh_flags & MESH_FLAGS_AE_A5_A6)
+			if (mesh_flags == MESH_FLAGS_AE_A5_A6)
 				return -1;
-			if (mesh_flags & MESH_FLAGS_AE_A4)
+			if (mesh_flags == MESH_FLAGS_AE_A4)
 				skb_copy_bits(skb, hdrlen +
 					offsetof(struct ieee80211s_hdr, eaddr1),
 					tmp.h_source, ETH_ALEN);

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 1a4db67..6cdb054 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c

@@ -914,13 +914,12 @@ int call_commit_handler(struct net_device *dev)
  * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+static int wireless_process_ioctl(struct net *net, struct iwreq *iwr,
 				  unsigned int cmd,
 				  struct iw_request_info *info,
 				  wext_ioctl_func standard,
 				  wext_ioctl_func private)
 {
-	struct iwreq *iwr = (struct iwreq *) ifr;
 	struct net_device *dev;
 	iw_handler	handler;
 
@@ -928,7 +927,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 	 * The copy_to/from_user() of ifr is also dealt with in there */
 
 	/* Make sure the device exist */
-	if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+	if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL)
 		return -ENODEV;
 
 	/* A bunch of special cases, then the generic case...
@@ -957,9 +956,6 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
 		else if (private)
 			return private(dev, iwr, cmd, info, handler);
 	}
-	/* Old driver API : call driver ioctl handler */
-	if (dev->netdev_ops->ndo_do_ioctl)
-		return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
 	return -EOPNOTSUPP;
 }
 
@@ -977,7 +973,7 @@ static int wext_permission_check(unsigned int cmd)
 }
 
 /* entry point from dev ioctl */
-static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
+static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr,
 			       unsigned int cmd, struct iw_request_info *info,
 			       wext_ioctl_func standard,
 			       wext_ioctl_func private)
@@ -987,9 +983,9 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
 	if (ret)
 		return ret;
 
-	dev_load(net, ifr->ifr_name);
+	dev_load(net, iwr->ifr_name);
 	rtnl_lock();
-	ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private);
+	ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private);
 	rtnl_unlock();
 
 	return ret;
@@ -1039,18 +1035,18 @@ static int ioctl_standard_call(struct net_device *	dev,
 }
 
 
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
 		      void __user *arg)
 {
 	struct iw_request_info info = { .cmd = cmd, .flags = 0 };
 	int ret;
 
-	ret = wext_ioctl_dispatch(net, ifr, cmd, &info,
+	ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
 				  ioctl_standard_call,
 				  ioctl_private_call);
 	if (ret >= 0 &&
 	    IW_IS_GET(cmd) &&
-	    copy_to_user(arg, ifr, sizeof(struct iwreq)))
+	    copy_to_user(arg, iwr, sizeof(struct iwreq)))
 		return -EFAULT;
 
 	return ret;
@@ -1107,7 +1103,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 	info.cmd = cmd;
 	info.flags = IW_REQUEST_FLAG_COMPAT;
 
-	ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info,
+	ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
 				  compat_standard_call,
 				  compat_private_call);
 

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index abf81b3..55b2ac3 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile

@@ -4,8 +4,7 @@
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
 		      xfrm_input.o xfrm_output.o \
-		      xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+		      xfrm_sysctl.o xfrm_replay.o xfrm_device.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8ec8a3f..5aba036 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c

@@ -22,6 +22,7 @@
 #include <net/xfrm.h>
 #include <linux/notifier.h>
 
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	int err;
@@ -137,6 +138,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	return true;
 }
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
 
 int xfrm_dev_register(struct net_device *dev)
 {
@@ -170,7 +172,7 @@ static int xfrm_dev_feat_change(struct net_device *dev)
 
 static int xfrm_dev_down(struct net_device *dev)
 {
-	if (dev->hw_features & NETIF_F_HW_ESP)
+	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
 	xfrm_garbage_collect(dev_net(dev));

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b00a1d5..643a18f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c

@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 		err = -ESRCH;
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-	if (cnt)
-		xfrm_garbage_collect(net);
-
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1797,43 +1793,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	goto out;
 }
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
-{
-	if (!*target) {
-		*target = kmalloc(size, GFP_ATOMIC);
-		if (!*target)
-			return -ENOMEM;
-	}
-
-	memcpy(*target, src, size);
-	return 0;
-}
-#endif
-
-static int xfrm_dst_update_parent(struct dst_entry *dst,
-				  const struct xfrm_selector *sel)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
-				   sel, sizeof(*sel));
-#else
-	return 0;
-#endif
-}
-
-static int xfrm_dst_update_origin(struct dst_entry *dst,
-				  const struct flowi *fl)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
-	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
-#else
-	return 0;
-#endif
-}
-
 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 				struct xfrm_policy **pols,
 				int *num_pols, int *num_xfrms)
@@ -1905,16 +1864,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	xdst = (struct xfrm_dst *)dst;
 	xdst->num_xfrms = err;
-	if (num_pols > 1)
-		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-	else
-		err = xfrm_dst_update_origin(dst, fl);
-	if (unlikely(err)) {
-		dst_free(dst);
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-		return ERR_PTR(err);
-	}
-
 	xdst->num_pols = num_pols;
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fc3c5aa..2e291bc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c

@@ -1383,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
 	x->km.seq = orig->km.seq;
+	x->replay = orig->replay;
+	x->preplay = orig->preplay;
 
 	return x;
 

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 38614df..86116e9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
+	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index ce753a4..c583a1e 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst

@@ -14,7 +14,15 @@
 include scripts/Kbuild.include
 
 srcdir        := $(srctree)/$(obj)
-subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs       := $(patsubst $(srcdir)/%/,%,\
+		 $(filter-out $(srcdir)/,\
+		 $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
 # caller may set destination dir (when installing to asm/)
 _dst          := $(if $(dst),$(dst),$(obj))
 

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4b9569f..c7e4d73 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl

@@ -5533,23 +5533,6 @@
 			}
 		}
 
-# Check for expedited grace periods that interrupt non-idle non-nohz
-# online CPUs.  These expedited can therefore degrade real-time response
-# if used carelessly, and should be avoided where not absolutely
-# needed.  It is always OK to use synchronize_rcu_expedited() and
-# synchronize_sched_expedited() at boot time (before real-time applications
-# start) and in error situations where real-time response is compromised in
-# any case.  Note that synchronize_srcu_expedited() does -not- interrupt
-# other CPUs, so don't warn on uses of synchronize_srcu_expedited().
-# Of course, nothing comes for free, and srcu_read_lock() and
-# srcu_read_unlock() do contain full memory barriers in payment for
-# synchronize_srcu_expedited() non-interruption properties.
-		if ($line =~ /\b(synchronize_rcu_expedited|synchronize_sched_expedited)\(/) {
-			WARN("EXPEDITED_RCU_GRACE_PERIOD",
-			     "expedited RCU grace periods should be avoided where they can degrade real-time response\n" . $herecurr);
-
-		}
-
 # check of hardware specific defines
 		if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
 			CHK("ARCH_DEFINES",

diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index f9b92ec..5afd109 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py

@@ -23,10 +23,11 @@
         super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
 
     def invoke(self, arg, from_tty):
-        log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16)
-        log_first_idx = int(gdb.parse_and_eval("log_first_idx"))
-        log_next_idx = int(gdb.parse_and_eval("log_next_idx"))
-        log_buf_len = int(gdb.parse_and_eval("log_buf_len"))
+        log_buf_addr = int(str(gdb.parse_and_eval(
+            "'printk.c'::log_buf")).split()[0], 16)
+        log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
+        log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
+        log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
 
         inf = gdb.inferiors()[0]
         start = log_buf_addr + log_first_idx

diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h
index 3bffdca..b724a02 100644
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h

@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
 int yylex(void);
 int yyparse(void);
 
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
 
 /*----------------------------------------------------------------------*/
 #define xmalloc(size) ({ void *__ptr = malloc(size);		\

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 90a091b..eb81446 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile

@@ -196,7 +196,7 @@
 
 # Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
 PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
 $(obj)/dochecklxdialog:
 	$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
 

diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index a9bc533..0031147 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c

@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
 static int items_num;
 static int global_exit;
 /* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
 };
 
 static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
 	{
 		.key_str = "F1",
 		.func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
 	index = (index + items_num) % items_num;
 	while (true) {
 		char *str = k_menu_items[index].str;
-		if (strcasestr(str, match_str) != 0)
+		if (strcasestr(str, match_str) != NULL)
 			return index;
 		if (flag == FIND_NEXT_MATCH_UP ||
 		    flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
-	struct menu *submenu = 0;
+	struct menu *submenu = NULL;
 	const char *prompt = menu_get_prompt(menu);
 	struct symbol *sym;
 	int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
 static void conf_choice(struct menu *menu)
 {
 	const char *prompt = _(menu_get_prompt(menu));
-	struct menu *child = 0;
+	struct menu *child = NULL;
 	struct symbol *active;
 	int selected_index = 0;
 	int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
 	}
 }
 
-void setup_windows(void)
+static void setup_windows(void)
 {
 	int lines, columns;
 

diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 4b2f44c..a64b1c3 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c

@@ -129,7 +129,7 @@ static void no_colors_theme(void)
 	mkattrn(FUNCTION_TEXT, A_REVERSE);
 }
 
-void set_colors()
+void set_colors(void)
 {
 	start_color();
 	use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
 	int lines = 0;
 
 	if (!text)
-		return 0;
+		return NULL;
 
 	for (i = 0; text[i] != '\0' && lines < line_no; i++)
 		if (text[i] == '\n')

diff --git a/scripts/tags.sh b/scripts/tags.sh
index d661f2f..d23dcbf 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -106,6 +106,7 @@
 		case "$i" in
 			*.[cS])
 				j=${i/\.[cS]/\.o}
+				j="${j#$tree}"
 				if [ -e $j ]; then
 					echo $i
 				fi

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index d7f282d..1d32cd2 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c

@@ -164,7 +164,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
 	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
-		crypto_shash_update(desc, inode->i_sb->s_uuid,
+		crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0],
 				    sizeof(inode->i_sb->s_uuid));
 	crypto_shash_final(desc, digest);
 }

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 3ab1067..6f885fa 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c

@@ -61,7 +61,7 @@ struct ima_rule_entry {
 	enum ima_hooks func;
 	int mask;
 	unsigned long fsmagic;
-	u8 fsuuid[16];
+	uuid_t fsuuid;
 	kuid_t uid;
 	kuid_t fowner;
 	bool (*uid_op)(kuid_t, kuid_t);    /* Handlers for operators       */
@@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 	    && rule->fsmagic != inode->i_sb->s_magic)
 		return false;
 	if ((rule->flags & IMA_FSUUID) &&
-	    memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+	    !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
 		return false;
 	if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
 		return false;
@@ -711,14 +711,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 		case Opt_fsuuid:
 			ima_log_string(ab, "fsuuid", args[0].from);
 
-			if (memchr_inv(entry->fsuuid, 0x00,
-				       sizeof(entry->fsuuid))) {
+			if (uuid_is_null(&entry->fsuuid)) {
 				result = -EINVAL;
 				break;
 			}
 
-			result = blk_part_pack_uuid(args[0].from,
-						    entry->fsuuid);
+			result = uuid_parse(args[0].from, &entry->fsuuid);
 			if (!result)
 				entry->flags |= IMA_FSUUID;
 			break;
@@ -1087,7 +1085,7 @@ int ima_policy_show(struct seq_file *m, void *v)
 	}
 
 	if (entry->flags & IMA_FSUUID) {
-		seq_printf(m, "fsuuid=%pU", entry->fsuuid);
+		seq_printf(m, "fsuuid=%pU", &entry->fsuuid);
 		seq_puts(m, " ");
 	}
 

diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index 6fd95f7..a7a23b5 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig

@@ -20,6 +20,10 @@
 
 	  If you are unsure as to whether this is required, answer N.
 
+config KEYS_COMPAT
+	def_bool y
+	depends on COMPAT && KEYS
+
 config PERSISTENT_KEYRINGS
 	bool "Enable register of persistent per-UID keyrings"
 	depends on KEYS
@@ -89,9 +93,9 @@
 config KEY_DH_OPERATIONS
        bool "Diffie-Hellman operations on retained keys"
        depends on KEYS
-       select MPILIB
        select CRYPTO
        select CRYPTO_HASH
+       select CRYPTO_DH
        help
 	 This option provides support for calculating Diffie-Hellman
 	 public keys and shared secrets using values stored as keys

diff --git a/security/keys/dh.c b/security/keys/dh.c
index e603bd9..4755d4b 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c

@@ -8,34 +8,17 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/mpi.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/scatterlist.h>
 #include <linux/crypto.h>
 #include <crypto/hash.h>
+#include <crypto/kpp.h>
+#include <crypto/dh.h>
 #include <keys/user-type.h>
 #include "internal.h"
 
-/*
- * Public key or shared secret generation function [RFC2631 sec 2.1.1]
- *
- * ya = g^xa mod p;
- * or
- * ZZ = yb^xa mod p;
- *
- * where xa is the local private key, ya is the local public key, g is
- * the generator, p is the prime, yb is the remote public key, and ZZ
- * is the shared secret.
- *
- * Both are the same calculation, so g or yb are the "base" and ya or
- * ZZ are the "result".
- */
-static int do_dh(MPI result, MPI base, MPI xa, MPI p)
-{
-	return mpi_powm(result, base, xa, p);
-}
-
-static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
+static ssize_t dh_data_from_key(key_serial_t keyid, void **data)
 {
 	struct key *key;
 	key_ref_t key_ref;
@@ -56,19 +39,17 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
 		status = key_validate(key);
 		if (status == 0) {
 			const struct user_key_payload *payload;
+			uint8_t *duplicate;
 
 			payload = user_key_payload_locked(key);
 
-			if (maxlen == 0) {
-				*mpi = NULL;
+			duplicate = kmemdup(payload->data, payload->datalen,
+					    GFP_KERNEL);
+			if (duplicate) {
+				*data = duplicate;
 				ret = payload->datalen;
-			} else if (payload->datalen <= maxlen) {
-				*mpi = mpi_read_raw_data(payload->data,
-							 payload->datalen);
-				if (*mpi)
-					ret = payload->datalen;
 			} else {
-				ret = -EINVAL;
+				ret = -ENOMEM;
 			}
 		}
 		up_read(&key->sem);
@@ -79,6 +60,29 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
 	return ret;
 }
 
+static void dh_free_data(struct dh *dh)
+{
+	kzfree(dh->key);
+	kzfree(dh->p);
+	kzfree(dh->g);
+}
+
+struct dh_completion {
+	struct completion completion;
+	int err;
+};
+
+static void dh_crypto_done(struct crypto_async_request *req, int err)
+{
+	struct dh_completion *compl = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	compl->err = err;
+	complete(&compl->completion);
+}
+
 struct kdf_sdesc {
 	struct shash_desc shash;
 	char ctx[];
@@ -89,6 +93,7 @@ static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname)
 	struct crypto_shash *tfm;
 	struct kdf_sdesc *sdesc;
 	int size;
+	int err;
 
 	/* allocate synchronous hash */
 	tfm = crypto_alloc_shash(hashname, 0, 0);
@@ -97,16 +102,25 @@ static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname)
 		return PTR_ERR(tfm);
 	}
 
+	err = -EINVAL;
+	if (crypto_shash_digestsize(tfm) == 0)
+		goto out_free_tfm;
+
+	err = -ENOMEM;
 	size = sizeof(struct shash_desc) + crypto_shash_descsize(tfm);
 	sdesc = kmalloc(size, GFP_KERNEL);
 	if (!sdesc)
-		return -ENOMEM;
+		goto out_free_tfm;
 	sdesc->shash.tfm = tfm;
 	sdesc->shash.flags = 0x0;
 
 	*sdesc_ret = sdesc;
 
 	return 0;
+
+out_free_tfm:
+	crypto_free_shash(tfm);
+	return err;
 }
 
 static void kdf_dealloc(struct kdf_sdesc *sdesc)
@@ -120,14 +134,6 @@ static void kdf_dealloc(struct kdf_sdesc *sdesc)
 	kzfree(sdesc);
 }
 
-/* convert 32 bit integer into its string representation */
-static inline void crypto_kw_cpu_to_be32(u32 val, u8 *buf)
-{
-	__be32 *a = (__be32 *)buf;
-
-	*a = cpu_to_be32(val);
-}
-
 /*
  * Implementation of the KDF in counter mode according to SP800-108 section 5.1
  * as well as SP800-56A section 5.8.1 (Single-step KDF).
@@ -138,25 +144,39 @@ static inline void crypto_kw_cpu_to_be32(u32 val, u8 *buf)
  * 5.8.1.2).
  */
 static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen,
-		   u8 *dst, unsigned int dlen)
+		   u8 *dst, unsigned int dlen, unsigned int zlen)
 {
 	struct shash_desc *desc = &sdesc->shash;
 	unsigned int h = crypto_shash_digestsize(desc->tfm);
 	int err = 0;
 	u8 *dst_orig = dst;
-	u32 i = 1;
-	u8 iteration[sizeof(u32)];
+	__be32 counter = cpu_to_be32(1);
 
 	while (dlen) {
 		err = crypto_shash_init(desc);
 		if (err)
 			goto err;
 
-		crypto_kw_cpu_to_be32(i, iteration);
-		err = crypto_shash_update(desc, iteration, sizeof(u32));
+		err = crypto_shash_update(desc, (u8 *)&counter, sizeof(__be32));
 		if (err)
 			goto err;
 
+		if (zlen && h) {
+			u8 tmpbuffer[h];
+			size_t chunk = min_t(size_t, zlen, h);
+			memset(tmpbuffer, 0, chunk);
+
+			do {
+				err = crypto_shash_update(desc, tmpbuffer,
+							  chunk);
+				if (err)
+					goto err;
+
+				zlen -= chunk;
+				chunk = min_t(size_t, zlen, h);
+			} while (zlen);
+		}
+
 		if (src && slen) {
 			err = crypto_shash_update(desc, src, slen);
 			if (err)
@@ -179,7 +199,7 @@ static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen,
 
 			dlen -= h;
 			dst += h;
-			i++;
+			counter = cpu_to_be32(be32_to_cpu(counter) + 1);
 		}
 	}
 
@@ -192,7 +212,7 @@ static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen,
 
 static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc,
 				 char __user *buffer, size_t buflen,
-				 uint8_t *kbuf, size_t kbuflen)
+				 uint8_t *kbuf, size_t kbuflen, size_t lzero)
 {
 	uint8_t *outbuf = NULL;
 	int ret;
@@ -203,7 +223,7 @@ static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc,
 		goto err;
 	}
 
-	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen);
+	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen, lzero);
 	if (ret)
 		goto err;
 
@@ -221,21 +241,26 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 			 struct keyctl_kdf_params *kdfcopy)
 {
 	long ret;
-	MPI base, private, prime, result;
-	unsigned nbytes;
+	ssize_t dlen;
+	int secretlen;
+	int outlen;
 	struct keyctl_dh_params pcopy;
-	uint8_t *kbuf;
-	ssize_t keylen;
-	size_t resultlen;
+	struct dh dh_inputs;
+	struct scatterlist outsg;
+	struct dh_completion compl;
+	struct crypto_kpp *tfm;
+	struct kpp_request *req;
+	uint8_t *secret;
+	uint8_t *outbuf;
 	struct kdf_sdesc *sdesc = NULL;
 
 	if (!params || (!buffer && buflen)) {
 		ret = -EINVAL;
-		goto out;
+		goto out1;
 	}
 	if (copy_from_user(&pcopy, params, sizeof(pcopy)) != 0) {
 		ret = -EFAULT;
-		goto out;
+		goto out1;
 	}
 
 	if (kdfcopy) {
@@ -244,104 +269,147 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 		if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN ||
 		    kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) {
 			ret = -EMSGSIZE;
-			goto out;
+			goto out1;
 		}
 
 		/* get KDF name string */
 		hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME);
 		if (IS_ERR(hashname)) {
 			ret = PTR_ERR(hashname);
-			goto out;
+			goto out1;
 		}
 
 		/* allocate KDF from the kernel crypto API */
 		ret = kdf_alloc(&sdesc, hashname);
 		kfree(hashname);
 		if (ret)
-			goto out;
+			goto out1;
 	}
 
-	/*
-	 * If the caller requests postprocessing with a KDF, allow an
-	 * arbitrary output buffer size since the KDF ensures proper truncation.
-	 */
-	keylen = mpi_from_key(pcopy.prime, kdfcopy ? SIZE_MAX : buflen, &prime);
-	if (keylen < 0 || !prime) {
-		/* buflen == 0 may be used to query the required buffer size,
-		 * which is the prime key length.
-		 */
-		ret = keylen;
-		goto out;
+	memset(&dh_inputs, 0, sizeof(dh_inputs));
+
+	dlen = dh_data_from_key(pcopy.prime, &dh_inputs.p);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out1;
 	}
+	dh_inputs.p_size = dlen;
 
-	/* The result is never longer than the prime */
-	resultlen = keylen;
-
-	keylen = mpi_from_key(pcopy.base, SIZE_MAX, &base);
-	if (keylen < 0 || !base) {
-		ret = keylen;
-		goto error1;
+	dlen = dh_data_from_key(pcopy.base, &dh_inputs.g);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out2;
 	}
+	dh_inputs.g_size = dlen;
 
-	keylen = mpi_from_key(pcopy.private, SIZE_MAX, &private);
-	if (keylen < 0 || !private) {
-		ret = keylen;
-		goto error2;
+	dlen = dh_data_from_key(pcopy.private, &dh_inputs.key);
+	if (dlen < 0) {
+		ret = dlen;
+		goto out2;
 	}
+	dh_inputs.key_size = dlen;
 
-	result = mpi_alloc(0);
-	if (!result) {
+	secretlen = crypto_dh_key_len(&dh_inputs);
+	secret = kmalloc(secretlen, GFP_KERNEL);
+	if (!secret) {
 		ret = -ENOMEM;
-		goto error3;
+		goto out2;
 	}
-
-	/* allocate space for DH shared secret and SP800-56A otherinfo */
-	kbuf = kmalloc(kdfcopy ? (resultlen + kdfcopy->otherinfolen) : resultlen,
-		       GFP_KERNEL);
-	if (!kbuf) {
-		ret = -ENOMEM;
-		goto error4;
-	}
-
-	/*
-	 * Concatenate SP800-56A otherinfo past DH shared secret -- the
-	 * input to the KDF is (DH shared secret || otherinfo)
-	 */
-	if (kdfcopy && kdfcopy->otherinfo &&
-	    copy_from_user(kbuf + resultlen, kdfcopy->otherinfo,
-			   kdfcopy->otherinfolen) != 0) {
-		ret = -EFAULT;
-		goto error5;
-	}
-
-	ret = do_dh(result, base, private, prime);
+	ret = crypto_dh_encode_key(secret, secretlen, &dh_inputs);
 	if (ret)
-		goto error5;
+		goto out3;
 
-	ret = mpi_read_buffer(result, kbuf, resultlen, &nbytes, NULL);
-	if (ret != 0)
-		goto error5;
+	tfm = crypto_alloc_kpp("dh", CRYPTO_ALG_TYPE_KPP, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out3;
+	}
+
+	ret = crypto_kpp_set_secret(tfm, secret, secretlen);
+	if (ret)
+		goto out4;
+
+	outlen = crypto_kpp_maxsize(tfm);
+
+	if (!kdfcopy) {
+		/*
+		 * When not using a KDF, buflen 0 is used to read the
+		 * required buffer length
+		 */
+		if (buflen == 0) {
+			ret = outlen;
+			goto out4;
+		} else if (outlen > buflen) {
+			ret = -EOVERFLOW;
+			goto out4;
+		}
+	}
+
+	outbuf = kzalloc(kdfcopy ? (outlen + kdfcopy->otherinfolen) : outlen,
+			 GFP_KERNEL);
+	if (!outbuf) {
+		ret = -ENOMEM;
+		goto out4;
+	}
+
+	sg_init_one(&outsg, outbuf, outlen);
+
+	req = kpp_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		ret = -ENOMEM;
+		goto out5;
+	}
+
+	kpp_request_set_input(req, NULL, 0);
+	kpp_request_set_output(req, &outsg, outlen);
+	init_completion(&compl.completion);
+	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+				 CRYPTO_TFM_REQ_MAY_SLEEP,
+				 dh_crypto_done, &compl);
+
+	/*
+	 * For DH, generate_public_key and generate_shared_secret are
+	 * the same calculation
+	 */
+	ret = crypto_kpp_generate_public_key(req);
+	if (ret == -EINPROGRESS) {
+		wait_for_completion(&compl.completion);
+		ret = compl.err;
+		if (ret)
+			goto out6;
+	}
 
 	if (kdfcopy) {
-		ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, kbuf,
-					    resultlen + kdfcopy->otherinfolen);
-	} else {
-		ret = nbytes;
-		if (copy_to_user(buffer, kbuf, nbytes) != 0)
+		/*
+		 * Concatenate SP800-56A otherinfo past DH shared secret -- the
+		 * input to the KDF is (DH shared secret || otherinfo)
+		 */
+		if (copy_from_user(outbuf + req->dst_len, kdfcopy->otherinfo,
+				   kdfcopy->otherinfolen) != 0) {
 			ret = -EFAULT;
+			goto out6;
+		}
+
+		ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, outbuf,
+					    req->dst_len + kdfcopy->otherinfolen,
+					    outlen - req->dst_len);
+	} else if (copy_to_user(buffer, outbuf, req->dst_len) == 0) {
+		ret = req->dst_len;
+	} else {
+		ret = -EFAULT;
 	}
 
-error5:
-	kzfree(kbuf);
-error4:
-	mpi_free(result);
-error3:
-	mpi_free(private);
-error2:
-	mpi_free(base);
-error1:
-	mpi_free(prime);
-out:
+out6:
+	kpp_request_free(req);
+out5:
+	kzfree(outbuf);
+out4:
+	crypto_free_kpp(tfm);
+out3:
+	kzfree(secret);
+out2:
+	dh_free_data(&dh_inputs);
+out1:
 	kdf_dealloc(sdesc);
 	return ret;
 }

diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 0010955..bb6324d 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c

@@ -30,6 +30,7 @@
 #include <linux/scatterlist.h>
 #include <linux/ctype.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
@@ -54,13 +55,7 @@ static int blksize;
 #define MAX_DATA_SIZE 4096
 #define MIN_DATA_SIZE  20
 
-struct sdesc {
-	struct shash_desc shash;
-	char ctx[];
-};
-
-static struct crypto_shash *hashalg;
-static struct crypto_shash *hmacalg;
+static struct crypto_shash *hash_tfm;
 
 enum {
 	Opt_err = -1, Opt_new, Opt_load, Opt_update
@@ -141,23 +136,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc)
  */
 static int valid_master_desc(const char *new_desc, const char *orig_desc)
 {
-	if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) {
-		if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN)
-			goto out;
-		if (orig_desc)
-			if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN))
-				goto out;
-	} else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) {
-		if (strlen(new_desc) == KEY_USER_PREFIX_LEN)
-			goto out;
-		if (orig_desc)
-			if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN))
-				goto out;
-	} else
-		goto out;
+	int prefix_len;
+
+	if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN))
+		prefix_len = KEY_TRUSTED_PREFIX_LEN;
+	else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN))
+		prefix_len = KEY_USER_PREFIX_LEN;
+	else
+		return -EINVAL;
+
+	if (!new_desc[prefix_len])
+		return -EINVAL;
+
+	if (orig_desc && strncmp(new_desc, orig_desc, prefix_len))
+		return -EINVAL;
+
 	return 0;
-out:
-	return -EINVAL;
 }
 
 /*
@@ -321,53 +315,38 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
 	return ukey;
 }
 
-static struct sdesc *alloc_sdesc(struct crypto_shash *alg)
+static int calc_hash(struct crypto_shash *tfm, u8 *digest,
+		     const u8 *buf, unsigned int buflen)
 {
-	struct sdesc *sdesc;
-	int size;
+	SHASH_DESC_ON_STACK(desc, tfm);
+	int err;
 
-	size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
-	sdesc = kmalloc(size, GFP_KERNEL);
-	if (!sdesc)
-		return ERR_PTR(-ENOMEM);
-	sdesc->shash.tfm = alg;
-	sdesc->shash.flags = 0x0;
-	return sdesc;
+	desc->tfm = tfm;
+	desc->flags = 0;
+
+	err = crypto_shash_digest(desc, buf, buflen, digest);
+	shash_desc_zero(desc);
+	return err;
 }
 
 static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen,
 		     const u8 *buf, unsigned int buflen)
 {
-	struct sdesc *sdesc;
-	int ret;
+	struct crypto_shash *tfm;
+	int err;
 
-	sdesc = alloc_sdesc(hmacalg);
-	if (IS_ERR(sdesc)) {
-		pr_info("encrypted_key: can't alloc %s\n", hmac_alg);
-		return PTR_ERR(sdesc);
+	tfm = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		pr_err("encrypted_key: can't alloc %s transform: %ld\n",
+		       hmac_alg, PTR_ERR(tfm));
+		return PTR_ERR(tfm);
 	}
 
-	ret = crypto_shash_setkey(hmacalg, key, keylen);
-	if (!ret)
-		ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest);
-	kfree(sdesc);
-	return ret;
-}
-
-static int calc_hash(u8 *digest, const u8 *buf, unsigned int buflen)
-{
-	struct sdesc *sdesc;
-	int ret;
-
-	sdesc = alloc_sdesc(hashalg);
-	if (IS_ERR(sdesc)) {
-		pr_info("encrypted_key: can't alloc %s\n", hash_alg);
-		return PTR_ERR(sdesc);
-	}
-
-	ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest);
-	kfree(sdesc);
-	return ret;
+	err = crypto_shash_setkey(tfm, key, keylen);
+	if (!err)
+		err = calc_hash(tfm, digest, buf, buflen);
+	crypto_free_shash(tfm);
+	return err;
 }
 
 enum derived_key_type { ENC_KEY, AUTH_KEY };
@@ -385,10 +364,9 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
 		derived_buf_len = HASH_SIZE;
 
 	derived_buf = kzalloc(derived_buf_len, GFP_KERNEL);
-	if (!derived_buf) {
-		pr_err("encrypted_key: out of memory\n");
+	if (!derived_buf)
 		return -ENOMEM;
-	}
+
 	if (key_type)
 		strcpy(derived_buf, "AUTH_KEY");
 	else
@@ -396,8 +374,8 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
 
 	memcpy(derived_buf + strlen(derived_buf) + 1, master_key,
 	       master_keylen);
-	ret = calc_hash(derived_key, derived_buf, derived_buf_len);
-	kfree(derived_buf);
+	ret = calc_hash(hash_tfm, derived_key, derived_buf, derived_buf_len);
+	kzfree(derived_buf);
 	return ret;
 }
 
@@ -480,12 +458,9 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
 	u8 iv[AES_BLOCK_SIZE];
-	unsigned int padlen;
-	char pad[16];
 	int ret;
 
 	encrypted_datalen = roundup(epayload->decrypted_datalen, blksize);
-	padlen = encrypted_datalen - epayload->decrypted_datalen;
 
 	req = init_skcipher_req(derived_key, derived_keylen);
 	ret = PTR_ERR(req);
@@ -493,11 +468,10 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_decrypted_data(epayload);
 
-	memset(pad, 0, sizeof pad);
 	sg_init_table(sg_in, 2);
 	sg_set_buf(&sg_in[0], epayload->decrypted_data,
 		   epayload->decrypted_datalen);
-	sg_set_buf(&sg_in[1], pad, padlen);
+	sg_set_page(&sg_in[1], ZERO_PAGE(0), AES_BLOCK_SIZE, 0);
 
 	sg_init_table(sg_out, 1);
 	sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen);
@@ -533,6 +507,7 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload,
 	if (!ret)
 		dump_hmac(NULL, digest, HASH_SIZE);
 out:
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -561,8 +536,8 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
 	ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len);
 	if (ret < 0)
 		goto out;
-	ret = memcmp(digest, epayload->format + epayload->datablob_len,
-		     sizeof digest);
+	ret = crypto_memneq(digest, epayload->format + epayload->datablob_len,
+			    sizeof(digest));
 	if (ret) {
 		ret = -EINVAL;
 		dump_hmac("datablob",
@@ -571,6 +546,7 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
 		dump_hmac("calc", digest, HASH_SIZE);
 	}
 out:
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -584,9 +560,14 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
 	u8 iv[AES_BLOCK_SIZE];
-	char pad[16];
+	u8 *pad;
 	int ret;
 
+	/* Throwaway buffer to hold the unused zero padding at the end */
+	pad = kmalloc(AES_BLOCK_SIZE, GFP_KERNEL);
+	if (!pad)
+		return -ENOMEM;
+
 	encrypted_datalen = roundup(epayload->decrypted_datalen, blksize);
 	req = init_skcipher_req(derived_key, derived_keylen);
 	ret = PTR_ERR(req);
@@ -594,13 +575,12 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_encrypted_data(epayload, encrypted_datalen);
 
-	memset(pad, 0, sizeof pad);
 	sg_init_table(sg_in, 1);
 	sg_init_table(sg_out, 2);
 	sg_set_buf(sg_in, epayload->encrypted_data, encrypted_datalen);
 	sg_set_buf(&sg_out[0], epayload->decrypted_data,
 		   epayload->decrypted_datalen);
-	sg_set_buf(&sg_out[1], pad, sizeof pad);
+	sg_set_buf(&sg_out[1], pad, AES_BLOCK_SIZE);
 
 	memcpy(iv, epayload->iv, sizeof(iv));
 	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
@@ -612,6 +592,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload,
 		goto out;
 	dump_decrypted_data(epayload);
 out:
+	kfree(pad);
 	return ret;
 }
 
@@ -722,6 +703,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
 out:
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
@@ -828,13 +810,13 @@ static int encrypted_instantiate(struct key *key,
 	ret = encrypted_init(epayload, key->description, format, master_desc,
 			     decrypted_datalen, hex_encoded_iv);
 	if (ret < 0) {
-		kfree(epayload);
+		kzfree(epayload);
 		goto out;
 	}
 
 	rcu_assign_keypointer(key, epayload);
 out:
-	kfree(datablob);
+	kzfree(datablob);
 	return ret;
 }
 
@@ -843,8 +825,7 @@ static void encrypted_rcu_free(struct rcu_head *rcu)
 	struct encrypted_key_payload *epayload;
 
 	epayload = container_of(rcu, struct encrypted_key_payload, rcu);
-	memset(epayload->decrypted_data, 0, epayload->decrypted_datalen);
-	kfree(epayload);
+	kzfree(epayload);
 }
 
 /*
@@ -902,7 +883,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 	rcu_assign_keypointer(key, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
 out:
-	kfree(buf);
+	kzfree(buf);
 	return ret;
 }
 
@@ -960,33 +941,26 @@ static long encrypted_read(const struct key *key, char __user *buffer,
 
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 
 	if (copy_to_user(buffer, ascii_buf, asciiblob_len) != 0)
 		ret = -EFAULT;
-	kfree(ascii_buf);
+	kzfree(ascii_buf);
 
 	return asciiblob_len;
 out:
 	up_read(&mkey->sem);
 	key_put(mkey);
+	memzero_explicit(derived_key, sizeof(derived_key));
 	return ret;
 }
 
 /*
- * encrypted_destroy - before freeing the key, clear the decrypted data
- *
- * Before freeing the key, clear the memory containing the decrypted
- * key data.
+ * encrypted_destroy - clear and free the key's payload
  */
 static void encrypted_destroy(struct key *key)
 {
-	struct encrypted_key_payload *epayload = key->payload.data[0];
-
-	if (!epayload)
-		return;
-
-	memzero_explicit(epayload->decrypted_data, epayload->decrypted_datalen);
-	kfree(key->payload.data[0]);
+	kzfree(key->payload.data[0]);
 }
 
 struct key_type key_type_encrypted = {
@@ -999,47 +973,17 @@ struct key_type key_type_encrypted = {
 };
 EXPORT_SYMBOL_GPL(key_type_encrypted);
 
-static void encrypted_shash_release(void)
-{
-	if (hashalg)
-		crypto_free_shash(hashalg);
-	if (hmacalg)
-		crypto_free_shash(hmacalg);
-}
-
-static int __init encrypted_shash_alloc(void)
-{
-	int ret;
-
-	hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hmacalg)) {
-		pr_info("encrypted_key: could not allocate crypto %s\n",
-			hmac_alg);
-		return PTR_ERR(hmacalg);
-	}
-
-	hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hashalg)) {
-		pr_info("encrypted_key: could not allocate crypto %s\n",
-			hash_alg);
-		ret = PTR_ERR(hashalg);
-		goto hashalg_fail;
-	}
-
-	return 0;
-
-hashalg_fail:
-	crypto_free_shash(hmacalg);
-	return ret;
-}
-
 static int __init init_encrypted(void)
 {
 	int ret;
 
-	ret = encrypted_shash_alloc();
-	if (ret < 0)
-		return ret;
+	hash_tfm = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hash_tfm)) {
+		pr_err("encrypted_key: can't allocate %s transform: %ld\n",
+		       hash_alg, PTR_ERR(hash_tfm));
+		return PTR_ERR(hash_tfm);
+	}
+
 	ret = aes_get_sizes();
 	if (ret < 0)
 		goto out;
@@ -1048,14 +992,14 @@ static int __init init_encrypted(void)
 		goto out;
 	return 0;
 out:
-	encrypted_shash_release();
+	crypto_free_shash(hash_tfm);
 	return ret;
 
 }
 
 static void __exit cleanup_encrypted(void)
 {
-	encrypted_shash_release();
+	crypto_free_shash(hash_tfm);
 	unregister_key_type(&key_type_encrypted);
 }
 

diff --git a/security/keys/gc.c b/security/keys/gc.c
index 595becc..87cb260 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c

@@ -158,9 +158,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 
 		kfree(key->description);
 
-#ifdef KEY_DEBUGGING
-		key->magic = KEY_DEBUG_MAGIC_X;
-#endif
+		memzero_explicit(key, sizeof(*key));
 		kmem_cache_free(key_jar, key);
 	}
 }

diff --git a/security/keys/internal.h b/security/keys/internal.h
index c0f8682..91bc621 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h

@@ -13,6 +13,7 @@
 #define _INTERNAL_H
 
 #include <linux/sched.h>
+#include <linux/wait_bit.h>
 #include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>

diff --git a/security/keys/key.c b/security/keys/key.c
index 455c04d..83da68d 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c

@@ -660,14 +660,11 @@ struct key *key_lookup(key_serial_t id)
 	goto error;
 
 found:
-	/* pretend it doesn't exist if it is awaiting deletion */
-	if (refcount_read(&key->usage) == 0)
-		goto not_found;
-
-	/* this races with key_put(), but that doesn't matter since key_put()
-	 * doesn't actually change the key
+	/* A key is allowed to be looked up only if someone still owns a
+	 * reference to it - otherwise it's awaiting the gc.
 	 */
-	__key_get(key);
+	if (!refcount_inc_not_zero(&key->usage))
+		goto not_found;
 
 error:
 	spin_unlock(&key_serial_lock);
@@ -966,12 +963,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 	/* the key must be writable */
 	ret = key_permission(key_ref, KEY_NEED_WRITE);
 	if (ret < 0)
-		goto error;
+		return ret;
 
 	/* attempt to update it if supported */
-	ret = -EOPNOTSUPP;
 	if (!key->type->update)
-		goto error;
+		return -EOPNOTSUPP;
 
 	memset(&prep, 0, sizeof(prep));
 	prep.data = payload;

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 447a7d5..ab0b337 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c

@@ -99,7 +99,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 	/* pull the payload in if one was supplied */
 	payload = NULL;
 
-	if (_payload) {
+	if (plen) {
 		ret = -ENOMEM;
 		payload = kvmalloc(plen, GFP_KERNEL);
 		if (!payload)
@@ -132,7 +132,10 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 
 	key_ref_put(keyring_ref);
  error3:
-	kvfree(payload);
+	if (payload) {
+		memzero_explicit(payload, plen);
+		kvfree(payload);
+	}
  error2:
 	kfree(description);
  error:
@@ -324,7 +327,7 @@ long keyctl_update_key(key_serial_t id,
 
 	/* pull the payload in if one was supplied */
 	payload = NULL;
-	if (_payload) {
+	if (plen) {
 		ret = -ENOMEM;
 		payload = kmalloc(plen, GFP_KERNEL);
 		if (!payload)
@@ -347,7 +350,7 @@ long keyctl_update_key(key_serial_t id,
 
 	key_ref_put(key_ref);
 error2:
-	kfree(payload);
+	kzfree(payload);
 error:
 	return ret;
 }
@@ -1093,7 +1096,10 @@ long keyctl_instantiate_key_common(key_serial_t id,
 		keyctl_change_reqkey_auth(NULL);
 
 error2:
-	kvfree(payload);
+	if (payload) {
+		memzero_explicit(payload, plen);
+		kvfree(payload);
+	}
 error:
 	return ret;
 }

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 4d1678e4..de81793 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c

@@ -706,7 +706,7 @@ static bool search_nested_keyrings(struct key *keyring,
 	 * Non-keyrings avoid the leftmost branch of the root entirely (root
 	 * slots 1-15).
 	 */
-	ptr = ACCESS_ONCE(keyring->keys.root);
+	ptr = READ_ONCE(keyring->keys.root);
 	if (!ptr)
 		goto not_this_keyring;
 
@@ -720,7 +720,7 @@ static bool search_nested_keyrings(struct key *keyring,
 		if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
 			goto not_this_keyring;
 
-		ptr = ACCESS_ONCE(shortcut->next_node);
+		ptr = READ_ONCE(shortcut->next_node);
 		node = assoc_array_ptr_to_node(ptr);
 		goto begin_node;
 	}
@@ -740,7 +740,7 @@ static bool search_nested_keyrings(struct key *keyring,
 	if (assoc_array_ptr_is_shortcut(ptr)) {
 		shortcut = assoc_array_ptr_to_shortcut(ptr);
 		smp_read_barrier_depends();
-		ptr = ACCESS_ONCE(shortcut->next_node);
+		ptr = READ_ONCE(shortcut->next_node);
 		BUG_ON(!assoc_array_ptr_is_node(ptr));
 	}
 	node = assoc_array_ptr_to_node(ptr);
@@ -752,7 +752,7 @@ static bool search_nested_keyrings(struct key *keyring,
 ascend_to_node:
 	/* Go through the slots in a node */
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = ACCESS_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]);
 
 		if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
 			goto descend_to_node;
@@ -790,13 +790,13 @@ static bool search_nested_keyrings(struct key *keyring,
 	/* We've dealt with all the slots in the current node, so now we need
 	 * to ascend to the parent and continue processing there.
 	 */
-	ptr = ACCESS_ONCE(node->back_pointer);
+	ptr = READ_ONCE(node->back_pointer);
 	slot = node->parent_slot;
 
 	if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
 		shortcut = assoc_array_ptr_to_shortcut(ptr);
 		smp_read_barrier_depends();
-		ptr = ACCESS_ONCE(shortcut->back_pointer);
+		ptr = READ_ONCE(shortcut->back_pointer);
 		slot = shortcut->parent_slot;
 	}
 	if (!ptr)

diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 2217dfec..86bced9 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c

@@ -809,15 +809,14 @@ long join_session_keyring(const char *name)
 		ret = PTR_ERR(keyring);
 		goto error2;
 	} else if (keyring == new->session_keyring) {
-		key_put(keyring);
 		ret = 0;
-		goto error2;
+		goto error3;
 	}
 
 	/* we've got a keyring - now to install it */
 	ret = install_session_keyring_to_cred(new, keyring);
 	if (ret < 0)
-		goto error2;
+		goto error3;
 
 	commit_creds(new);
 	mutex_unlock(&key_session_mutex);
@@ -827,6 +826,8 @@ long join_session_keyring(const char *name)
 okay:
 	return ret;
 
+error3:
+	key_put(keyring);
 error2:
 	mutex_unlock(&key_session_mutex);
 error:

diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 2ae31c5..435e86e 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c

@@ -70,7 +70,7 @@ static int TSS_sha1(const unsigned char *data, unsigned int datalen,
 	}
 
 	ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest);
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -114,7 +114,7 @@ static int TSS_rawhmac(unsigned char *digest, const unsigned char *key,
 	if (!ret)
 		ret = crypto_shash_final(&sdesc->shash, digest);
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -165,7 +165,7 @@ static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 				  paramdigest, TPM_NONCE_SIZE, h1,
 				  TPM_NONCE_SIZE, h2, 1, &c, 0, 0);
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -246,7 +246,7 @@ static int TSS_checkhmac1(unsigned char *buffer,
 	if (memcmp(testhmac, authdata, SHA1_DIGEST_SIZE))
 		ret = -EINVAL;
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -347,7 +347,7 @@ static int TSS_checkhmac2(unsigned char *buffer,
 	if (memcmp(testhmac2, authdata2, SHA1_DIGEST_SIZE))
 		ret = -EINVAL;
 out:
-	kfree(sdesc);
+	kzfree(sdesc);
 	return ret;
 }
 
@@ -564,7 +564,7 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
 		*bloblen = storedsize;
 	}
 out:
-	kfree(td);
+	kzfree(td);
 	return ret;
 }
 
@@ -678,7 +678,7 @@ static int key_seal(struct trusted_key_payload *p,
 	if (ret < 0)
 		pr_info("trusted_key: srkseal failed (%d)\n", ret);
 
-	kfree(tb);
+	kzfree(tb);
 	return ret;
 }
 
@@ -703,7 +703,7 @@ static int key_unseal(struct trusted_key_payload *p,
 		/* pull migratable flag out of sealed key */
 		p->migratable = p->key[--p->key_len];
 
-	kfree(tb);
+	kzfree(tb);
 	return ret;
 }
 
@@ -1037,12 +1037,12 @@ static int trusted_instantiate(struct key *key,
 	if (!ret && options->pcrlock)
 		ret = pcrlock(options->pcrlock);
 out:
-	kfree(datablob);
-	kfree(options);
+	kzfree(datablob);
+	kzfree(options);
 	if (!ret)
 		rcu_assign_keypointer(key, payload);
 	else
-		kfree(payload);
+		kzfree(payload);
 	return ret;
 }
 
@@ -1051,8 +1051,7 @@ static void trusted_rcu_free(struct rcu_head *rcu)
 	struct trusted_key_payload *p;
 
 	p = container_of(rcu, struct trusted_key_payload, rcu);
-	memset(p->key, 0, p->key_len);
-	kfree(p);
+	kzfree(p);
 }
 
 /*
@@ -1094,13 +1093,13 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 	ret = datablob_parse(datablob, new_p, new_o);
 	if (ret != Opt_update) {
 		ret = -EINVAL;
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 
 	if (!new_o->keyhandle) {
 		ret = -EINVAL;
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 
@@ -1114,22 +1113,22 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 	ret = key_seal(new_p, new_o);
 	if (ret < 0) {
 		pr_info("trusted_key: key_seal failed (%d)\n", ret);
-		kfree(new_p);
+		kzfree(new_p);
 		goto out;
 	}
 	if (new_o->pcrlock) {
 		ret = pcrlock(new_o->pcrlock);
 		if (ret < 0) {
 			pr_info("trusted_key: pcrlock failed (%d)\n", ret);
-			kfree(new_p);
+			kzfree(new_p);
 			goto out;
 		}
 	}
 	rcu_assign_keypointer(key, new_p);
 	call_rcu(&p->rcu, trusted_rcu_free);
 out:
-	kfree(datablob);
-	kfree(new_o);
+	kzfree(datablob);
+	kzfree(new_o);
 	return ret;
 }
 
@@ -1158,24 +1157,19 @@ static long trusted_read(const struct key *key, char __user *buffer,
 	for (i = 0; i < p->blob_len; i++)
 		bufp = hex_byte_pack(bufp, p->blob[i]);
 	if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) {
-		kfree(ascii_buf);
+		kzfree(ascii_buf);
 		return -EFAULT;
 	}
-	kfree(ascii_buf);
+	kzfree(ascii_buf);
 	return 2 * p->blob_len;
 }
 
 /*
- * trusted_destroy - before freeing the key, clear the decrypted data
+ * trusted_destroy - clear and free the key's payload
  */
 static void trusted_destroy(struct key *key)
 {
-	struct trusted_key_payload *p = key->payload.data[0];
-
-	if (!p)
-		return;
-	memset(p->key, 0, p->key_len);
-	kfree(key->payload.data[0]);
+	kzfree(key->payload.data[0]);
 }
 
 struct key_type key_type_trusted = {

diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 2660513..3d8c68e 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c

@@ -86,10 +86,18 @@ EXPORT_SYMBOL_GPL(user_preparse);
  */
 void user_free_preparse(struct key_preparsed_payload *prep)
 {
-	kfree(prep->payload.data[0]);
+	kzfree(prep->payload.data[0]);
 }
 EXPORT_SYMBOL_GPL(user_free_preparse);
 
+static void user_free_payload_rcu(struct rcu_head *head)
+{
+	struct user_key_payload *payload;
+
+	payload = container_of(head, struct user_key_payload, rcu);
+	kzfree(payload);
+}
+
 /*
  * update a user defined key
  * - the key's semaphore is write-locked
@@ -112,7 +120,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 	prep->payload.data[0] = NULL;
 
 	if (zap)
-		kfree_rcu(zap, rcu);
+		call_rcu(&zap->rcu, user_free_payload_rcu);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(user_update);
@@ -130,7 +138,7 @@ void user_revoke(struct key *key)
 
 	if (upayload) {
 		rcu_assign_keypointer(key, NULL);
-		kfree_rcu(upayload, rcu);
+		call_rcu(&upayload->rcu, user_free_payload_rcu);
 	}
 }
 
@@ -143,7 +151,7 @@ void user_destroy(struct key *key)
 {
 	struct user_key_payload *upayload = key->payload.data[0];
 
-	kfree(upayload);
+	kzfree(upayload);
 }
 
 EXPORT_SYMBOL_GPL(user_destroy);

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e67a526..819fd68 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -1106,10 +1106,8 @@ static int selinux_parse_opts_str(char *options,
 
 	opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int),
 				       GFP_KERNEL);
-	if (!opts->mnt_opts_flags) {
-		kfree(opts->mnt_opts);
+	if (!opts->mnt_opts_flags)
 		goto out_err;
-	}
 
 	if (fscontext) {
 		opts->mnt_opts[num_mnt_opts] = fscontext;
@@ -1132,6 +1130,7 @@ static int selinux_parse_opts_str(char *options,
 	return 0;
 
 out_err:
+	security_free_mnt_opts(opts);
 	kfree(context);
 	kfree(defcontext);
 	kfree(fscontext);

diff --git a/sound/core/control.c b/sound/core/control.c
index c109b82..6362da1 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c

@@ -1577,7 +1577,7 @@ static ssize_t snd_ctl_read(struct file *file, char __user *buffer,
 		struct snd_ctl_event ev;
 		struct snd_kctl_event *kev;
 		while (list_empty(&ctl->events)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
 				goto __end_lock;

diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 9602a7e..a73baa1 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c

@@ -85,7 +85,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file)
 	int major = imajor(inode);
 	struct snd_hwdep *hw;
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (major == snd_major) {
 		hw = snd_lookup_minor_data(iminor(inode),

diff --git a/sound/core/init.c b/sound/core/init.c
index 6bda843..d61d2b3 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c

@@ -989,7 +989,7 @@ EXPORT_SYMBOL(snd_card_file_remove);
  */
 int snd_power_wait(struct snd_card *card, unsigned int power_state)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 
 	/* fastpath */

diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 36baf96..cd8b7be 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c

@@ -1554,7 +1554,7 @@ static int snd_pcm_oss_sync1(struct snd_pcm_substream *substream, size_t size)
 	ssize_t result = 0;
 	snd_pcm_state_t state;
 	long res;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	runtime = substream->runtime;
 	init_waitqueue_entry(&wait, current);
@@ -2387,7 +2387,7 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file)
 	struct snd_pcm_oss_file *pcm_oss_file;
 	struct snd_pcm_oss_setup setup[2];
 	int nonblock;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	err = nonseekable_open(inode, file);
 	if (err < 0)

diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 5088d4b..8771760 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c

@@ -1904,7 +1904,7 @@ static int wait_for_avail(struct snd_pcm_substream *substream,
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int err = 0;
 	snd_pcm_uframes_t avail = 0;
 	long wait_time, tout;
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
 	struct snd_pcm_substream *substream;
 	const struct snd_pcm_chmap_elem *map;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	substream = snd_pcm_chmap_substream(info, idx);
 	if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 	unsigned int __user *dst;
 	int c, count = 0;
 
-	if (snd_BUG_ON(!info->chmap))
+	if (!info->chmap)
 		return -EINVAL;
 	if (size < 8)
 		return -ENOMEM;

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 13dec5e..faa2e2b 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c

@@ -1652,7 +1652,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	struct snd_card *card;
 	struct snd_pcm_runtime *runtime;
 	struct snd_pcm_substream *s;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int result = 0;
 	int nonblock = 0;
 
@@ -2353,7 +2353,7 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file)
 static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pcm == NULL) {
 		err = -ENODEV;

diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index ab89033..32588ad 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c

@@ -368,7 +368,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file)
 	int err;
 	struct snd_rawmidi *rmidi;
 	struct snd_rawmidi_file *rawmidi_file = NULL;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if ((file->f_flags & O_APPEND) && !(file->f_flags & O_NONBLOCK)) 
 		return -EINVAL;		/* invalid combination */
@@ -1002,7 +1002,7 @@ static ssize_t snd_rawmidi_read(struct file *file, char __user *buf, size_t coun
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready(substream)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1306,7 +1306,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	while (count > 0) {
 		spin_lock_irq(&runtime->lock);
 		while (!snd_rawmidi_ready_append(substream, count)) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			if (file->f_flags & O_NONBLOCK) {
 				spin_unlock_irq(&runtime->lock);
 				return result > 0 ? result : -EAGAIN;
@@ -1338,7 +1338,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf,
 	if (file->f_flags & O_DSYNC) {
 		spin_lock_irq(&runtime->lock);
 		while (runtime->avail != runtime->buffer_size) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 			unsigned int last_avail = runtime->avail;
 			init_waitqueue_entry(&wait, current);
 			add_wait_queue(&runtime->sleep, &wait);

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 01c4cfe..a8c2822 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c

@@ -179,7 +179,7 @@ int snd_seq_fifo_cell_out(struct snd_seq_fifo *f,
 {
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (snd_BUG_ON(!f))
 		return -EINVAL;

diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index d4c61ec..d6e9aac 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c

@@ -227,7 +227,7 @@ static int snd_seq_cell_alloc(struct snd_seq_pool *pool,
 	struct snd_seq_event_cell *cell;
 	unsigned long flags;
 	int err = -EAGAIN;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	if (pool == NULL)
 		return -EINVAL;

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 2f836ca..884c306 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c

@@ -1618,6 +1618,7 @@ static int snd_timer_user_tselect(struct file *file,
 	if (err < 0)
 		goto __err;
 
+	tu->qhead = tu->qtail = tu->qused = 0;
 	kfree(tu->queue);
 	tu->queue = NULL;
 	kfree(tu->tqueue);
@@ -1959,10 +1960,11 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 
 	tu = file->private_data;
 	unit = tu->tread ? sizeof(struct snd_timer_tread) : sizeof(struct snd_timer_read);
+	mutex_lock(&tu->ioctl_lock);
 	spin_lock_irq(&tu->qlock);
 	while ((long)count - result >= unit) {
 		while (!tu->qused) {
-			wait_queue_t wait;
+			wait_queue_entry_t wait;
 
 			if ((file->f_flags & O_NONBLOCK) != 0 || result > 0) {
 				err = -EAGAIN;
@@ -1974,7 +1976,9 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 			add_wait_queue(&tu->qchange_sleep, &wait);
 
 			spin_unlock_irq(&tu->qlock);
+			mutex_unlock(&tu->ioctl_lock);
 			schedule();
+			mutex_lock(&tu->ioctl_lock);
 			spin_lock_irq(&tu->qlock);
 
 			remove_wait_queue(&tu->qchange_sleep, &wait);
@@ -1994,7 +1998,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 		tu->qused--;
 		spin_unlock_irq(&tu->qlock);
 
-		mutex_lock(&tu->ioctl_lock);
 		if (tu->tread) {
 			if (copy_to_user(buffer, &tu->tqueue[qhead],
 					 sizeof(struct snd_timer_tread)))
@@ -2004,7 +2007,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 					 sizeof(struct snd_timer_read)))
 				err = -EFAULT;
 		}
-		mutex_unlock(&tu->ioctl_lock);
 
 		spin_lock_irq(&tu->qlock);
 		if (err < 0)
@@ -2014,6 +2016,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
 	}
  _error:
 	spin_unlock_irq(&tu->qlock);
+	mutex_unlock(&tu->ioctl_lock);
 	return result > 0 ? result : err;
 }
 

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 9e6f54f..1e26854 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c

@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
 		cycle = increment_cycle_count(cycle, 1);
 		if (s->handle_packet(s, 0, cycle, i) < 0) {
 			s->packet_index = -1;
-			amdtp_stream_pcm_abort(s);
+			if (in_interrupt())
+				amdtp_stream_pcm_abort(s);
+			WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 			return;
 		}
 	}
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
 	/* Queueing error or detecting invalid payload. */
 	if (i < packets) {
 		s->packet_index = -1;
-		amdtp_stream_pcm_abort(s);
+		if (in_interrupt())
+			amdtp_stream_pcm_abort(s);
+		WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 		return;
 	}
 

diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h
index 7e88317..ea1a91e 100644
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h

@@ -135,7 +135,7 @@ struct amdtp_stream {
 	/* For a PCM substream processing. */
 	struct snd_pcm_substream *pcm;
 	struct tasklet_struct period_tasklet;
-	unsigned int pcm_buffer_pointer;
+	snd_pcm_uframes_t pcm_buffer_pointer;
 	unsigned int pcm_period_pointer;
 
 	/* To wait for first packet. */

diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 4dae9ff..0b1e4b3 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c

@@ -1782,7 +1782,7 @@ wavefront_should_cause_interrupt (snd_wavefront_t *dev,
 				  int val, int port, unsigned long timeout)
 
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 
 	init_waitqueue_entry(&wait, current);
 	spin_lock_irq(&dev->irq_lock);

diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index d6fb2d5..60ce1cf 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h

@@ -295,6 +295,8 @@ struct hda_codec {
 
 #define list_for_each_codec(c, bus) \
 	list_for_each_entry(c, &(bus)->core.codec_list, core.list)
+#define list_for_each_codec_safe(c, n, bus)				\
+	list_for_each_entry_safe(c, n, &(bus)->core.codec_list, core.list)
 
 /* snd_hda_codec_read/write optional flags */
 #define HDA_RW_NO_RESPONSE_FALLBACK	(1 << 0)

diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 3715a57..1c60beb 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c

@@ -1337,8 +1337,12 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs);
 /* configure each codec instance */
 int azx_codec_configure(struct azx *chip)
 {
-	struct hda_codec *codec;
-	list_for_each_codec(codec, &chip->bus) {
+	struct hda_codec *codec, *next;
+
+	/* use _safe version here since snd_hda_codec_configure() deregisters
+	 * the device upon error and deletes itself from the bus list.
+	 */
+	list_for_each_codec_safe(codec, next, &chip->bus) {
 		snd_hda_codec_configure(codec);
 	}
 	return 0;

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 2842c82..71545b5 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c

@@ -3174,6 +3174,7 @@ static int check_dyn_adc_switch(struct hda_codec *codec)
 						spec->input_paths[i][nums]);
 					spec->input_paths[i][nums] =
 						spec->input_paths[i][n];
+					spec->input_paths[i][n] = 0;
 				}
 			}
 			nums++;

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1770f08..01eb1dc 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -370,10 +370,12 @@ enum {
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
 #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
 #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-			IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)	|| \
-			IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+			  IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+			  IS_KBL_H(pci)	|| IS_GLK(pci) || IS_CFL(pci))
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Kabylake-H */
 	{ PCI_DEVICE(0x8086, 0xa2f0),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Coffelake */
+	{ PCI_DEVICE(0x8086, 0xa348),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58df440..cbeebc0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -2324,10 +2324,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF),
 
 	SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD),
-	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
-	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
+	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
+	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -2342,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
 	{.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"},
 	{.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"},
+	{.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"},
 	{}
 };
 
@@ -5852,7 +5854,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+	SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+	SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
@@ -5860,13 +5866,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
 	SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
+	SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
-	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
-	SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
 	SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
@@ -6014,6 +6017,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
 	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{.id = ALC292_FIXUP_TPT460, .name = "tpt460"},
+	{.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
@@ -6465,8 +6469,11 @@ static int patch_alc269(struct hda_codec *codec)
 		break;
 	case 0x10ec0225:
 	case 0x10ec0295:
+		spec->codec_variant = ALC269_TYPE_ALC225;
+		break;
 	case 0x10ec0299:
 		spec->codec_variant = ALC269_TYPE_ALC225;
+		spec->gen.mixer_nid = 0; /* no loopback on ALC299 */
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
@@ -7338,6 +7345,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = {
 	{.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"},
 	{.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"},
 	{.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
+	{.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"},
 	{}
 };
 

diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index faa3d38..6cefdf6 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c

@@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = {
 		      "Dell Inspiron 1501", STAC_9200_DELL_M26),
 	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6,
 		      "unknown Dell", STAC_9200_DELL_M26),
+	SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201,
+		      "Dell Latitude D430", STAC_9200_DELL_M22),
 	/* Panasonic */
 	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC),
 	/* Gateway machines needs EAPD to be set on resume */

diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c
index dccf3db4..8bf2ce3 100644
--- a/sound/pci/mixart/mixart_core.c
+++ b/sound/pci/mixart/mixart_core.c

@@ -239,7 +239,7 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int
 	struct mixart_msg resp;
 	u32 msg_frame = 0; /* set to 0, so it's no notification to wait for, but the answer */
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	init_waitqueue_entry(&wait, current);
@@ -284,7 +284,7 @@ int snd_mixart_send_msg_wait_notif(struct mixart_mgr *mgr,
 				   struct mixart_msg *request, u32 notif_event)
 {
 	int err;
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	long timeout;
 
 	if (snd_BUG_ON(!notif_event))

diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index fe4ba46..1114166 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c

@@ -781,7 +781,7 @@ static snd_pcm_uframes_t snd_ymfpci_capture_pointer(struct snd_pcm_substream *su
 
 static void snd_ymfpci_irq_wait(struct snd_ymfpci *chip)
 {
-	wait_queue_t wait;
+	wait_queue_entry_t wait;
 	int loops = 4;
 
 	while (loops-- > 0) {

diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 7ae46c2..b7ef8c5 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c

@@ -301,6 +301,14 @@ static int atmel_classd_codec_probe(struct snd_soc_codec *codec)
 	return 0;
 }
 
+static int atmel_classd_codec_resume(struct snd_soc_codec *codec)
+{
+	struct snd_soc_card *card = snd_soc_codec_get_drvdata(codec);
+	struct atmel_classd *dd = snd_soc_card_get_drvdata(card);
+
+	return regcache_sync(dd->regmap);
+}
+
 static struct regmap *atmel_classd_codec_get_remap(struct device *dev)
 {
 	return dev_get_regmap(dev, NULL);
@@ -308,6 +316,7 @@ static struct regmap *atmel_classd_codec_get_remap(struct device *dev)
 
 static struct snd_soc_codec_driver soc_codec_dev_classd = {
 	.probe		= atmel_classd_codec_probe,
+	.resume		= atmel_classd_codec_resume,
 	.get_regmap	= atmel_classd_codec_get_remap,
 	.component_driver = {
 		.controls		= atmel_classd_snd_controls,

diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index 6dd7578..024d83f 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c

@@ -772,7 +772,7 @@ static int da7213_dai_event(struct snd_soc_dapm_widget *w,
 				++i;
 				msleep(50);
 			}
-		} while ((i < DA7213_SRM_CHECK_RETRIES) & (!srm_lock));
+		} while ((i < DA7213_SRM_CHECK_RETRIES) && (!srm_lock));
 
 		if (!srm_lock)
 			dev_warn(codec->dev, "SRM failed to lock\n");

diff --git a/sound/soc/codecs/rt286.c b/sound/soc/codecs/rt286.c
index 9c365a7..7899a2c 100644
--- a/sound/soc/codecs/rt286.c
+++ b/sound/soc/codecs/rt286.c

@@ -1108,6 +1108,13 @@ static const struct dmi_system_id force_combo_jack_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Kabylake Client platform")
 		}
 	},
+	{
+		.ident = "Thinkpad Helix 2nd",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix 2nd")
+		}
+	},
 
 	{ }
 };

diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 2c9deda..bc136d2 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c

@@ -202,7 +202,7 @@ static int asoc_simple_card_dai_init(struct snd_soc_pcm_runtime *rtd)
 	if (ret < 0)
 		return ret;
 
-	ret = asoc_simple_card_init_mic(rtd->card, &priv->hp_jack, PREFIX);
+	ret = asoc_simple_card_init_mic(rtd->card, &priv->mic_jack, PREFIX);
 	if (ret < 0)
 		return ret;
 

diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index e3f0667..e7d766d 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c

@@ -21,8 +21,9 @@
 #include "skl.h"
 
 /* Unique identification for getting NHLT blobs */
-static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45,
-				0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53};
+static guid_t osc_guid =
+	GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+		  0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
 
 struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 {
@@ -37,7 +38,7 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 		return NULL;
 	}
 
-	obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
+	obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
 	if (obj && obj->type == ACPI_TYPE_BUFFER) {
 		nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
 		nhlt_table = (struct nhlt_acpi_table *)

diff --git a/sound/soc/intel/skylake/skl-sst-ipc.c b/sound/soc/intel/skylake/skl-sst-ipc.c
index 58c5250..498b153 100644
--- a/sound/soc/intel/skylake/skl-sst-ipc.c
+++ b/sound/soc/intel/skylake/skl-sst-ipc.c

@@ -413,8 +413,11 @@ static void skl_ipc_process_reply(struct sst_generic_ipc *ipc,
 	u32 reply = header.primary & IPC_GLB_REPLY_STATUS_MASK;
 	u64 *ipc_header = (u64 *)(&header);
 	struct skl_sst *skl = container_of(ipc, struct skl_sst, ipc);
+	unsigned long flags;
 
+	spin_lock_irqsave(&ipc->dsp->spinlock, flags);
 	msg = skl_ipc_reply_get_msg(ipc, *ipc_header);
+	spin_unlock_irqrestore(&ipc->dsp->spinlock, flags);
 	if (msg == NULL) {
 		dev_dbg(ipc->dev, "ipc: rx list is empty\n");
 		return;
@@ -456,8 +459,10 @@ static void skl_ipc_process_reply(struct sst_generic_ipc *ipc,
 		}
 	}
 
+	spin_lock_irqsave(&ipc->dsp->spinlock, flags);
 	list_del(&msg->list);
 	sst_ipc_tx_msg_reply_complete(ipc, msg);
+	spin_unlock_irqrestore(&ipc->dsp->spinlock, flags);
 }
 
 irqreturn_t skl_dsp_irq_thread_handler(int irq, void *context)

diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 3a99712..64a0f8e 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c

@@ -2502,7 +2502,7 @@ static int skl_tplg_get_manifest_tkn(struct device *dev,
 
 			if (ret < 0)
 				return ret;
-			tkn_count += ret;
+			tkn_count = ret;
 
 			tuple_size += tkn_count *
 				sizeof(struct snd_soc_tplg_vendor_string_elem);

diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 6df3b31..4c9b578 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c

@@ -410,7 +410,7 @@ static int skl_free(struct hdac_ext_bus *ebus)
 	struct skl *skl  = ebus_to_skl(ebus);
 	struct hdac_bus *bus = ebus_to_hbus(ebus);
 
-	skl->init_failed = 1; /* to be sure */
+	skl->init_done = 0; /* to be sure */
 
 	snd_hdac_ext_stop_streams(ebus);
 
@@ -428,8 +428,10 @@ static int skl_free(struct hdac_ext_bus *ebus)
 
 	snd_hdac_ext_bus_exit(ebus);
 
+	cancel_work_sync(&skl->probe_work);
 	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
 		snd_hdac_i915_exit(&ebus->bus);
+
 	return 0;
 }
 
@@ -566,6 +568,84 @@ static const struct hdac_bus_ops bus_core_ops = {
 	.get_response = snd_hdac_bus_get_response,
 };
 
+static int skl_i915_init(struct hdac_bus *bus)
+{
+	int err;
+
+	/*
+	 * The HDMI codec is in GPU so we need to ensure that it is powered
+	 * up and ready for probe
+	 */
+	err = snd_hdac_i915_init(bus);
+	if (err < 0)
+		return err;
+
+	err = snd_hdac_display_power(bus, true);
+	if (err < 0)
+		dev_err(bus->dev, "Cannot turn on display power on i915\n");
+
+	return err;
+}
+
+static void skl_probe_work(struct work_struct *work)
+{
+	struct skl *skl = container_of(work, struct skl, probe_work);
+	struct hdac_ext_bus *ebus = &skl->ebus;
+	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_ext_link *hlink = NULL;
+	int err;
+
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
+		err = skl_i915_init(bus);
+		if (err < 0)
+			return;
+	}
+
+	err = skl_init_chip(bus, true);
+	if (err < 0) {
+		dev_err(bus->dev, "Init chip failed with err: %d\n", err);
+		goto out_err;
+	}
+
+	/* codec detection */
+	if (!bus->codec_mask)
+		dev_info(bus->dev, "no hda codecs found!\n");
+
+	/* create codec instances */
+	err = skl_codec_create(ebus);
+	if (err < 0)
+		goto out_err;
+
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
+		err = snd_hdac_display_power(bus, false);
+		if (err < 0) {
+			dev_err(bus->dev, "Cannot turn off display power on i915\n");
+			return;
+		}
+	}
+
+	/* register platform dai and controls */
+	err = skl_platform_register(bus->dev);
+	if (err < 0)
+		return;
+	/*
+	 * we are done probing so decrement link counts
+	 */
+	list_for_each_entry(hlink, &ebus->hlink_list, list)
+		snd_hdac_ext_bus_link_put(ebus, hlink);
+
+	/* configure PM */
+	pm_runtime_put_noidle(bus->dev);
+	pm_runtime_allow(bus->dev);
+	skl->init_done = 1;
+
+	return;
+
+out_err:
+	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
+		err = snd_hdac_display_power(bus, false);
+}
+
 /*
  * constructor
  */
@@ -593,6 +673,7 @@ static int skl_create(struct pci_dev *pci,
 	snd_hdac_ext_bus_init(ebus, &pci->dev, &bus_core_ops, io_ops);
 	ebus->bus.use_posbuf = 1;
 	skl->pci = pci;
+	INIT_WORK(&skl->probe_work, skl_probe_work);
 
 	ebus->bus.bdl_pos_adj = 0;
 
@@ -601,27 +682,6 @@ static int skl_create(struct pci_dev *pci,
 	return 0;
 }
 
-static int skl_i915_init(struct hdac_bus *bus)
-{
-	int err;
-
-	/*
-	 * The HDMI codec is in GPU so we need to ensure that it is powered
-	 * up and ready for probe
-	 */
-	err = snd_hdac_i915_init(bus);
-	if (err < 0)
-		return err;
-
-	err = snd_hdac_display_power(bus, true);
-	if (err < 0) {
-		dev_err(bus->dev, "Cannot turn on display power on i915\n");
-		return err;
-	}
-
-	return err;
-}
-
 static int skl_first_init(struct hdac_ext_bus *ebus)
 {
 	struct skl *skl = ebus_to_skl(ebus);
@@ -684,20 +744,7 @@ static int skl_first_init(struct hdac_ext_bus *ebus)
 	/* initialize chip */
 	skl_init_pci(skl);
 
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
-		err = skl_i915_init(bus);
-		if (err < 0)
-			return err;
-	}
-
-	skl_init_chip(bus, true);
-
-	/* codec detection */
-	if (!bus->codec_mask) {
-		dev_info(bus->dev, "no hda codecs found!\n");
-	}
-
-	return 0;
+	return skl_init_chip(bus, true);
 }
 
 static int skl_probe(struct pci_dev *pci,
@@ -706,7 +753,6 @@ static int skl_probe(struct pci_dev *pci,
 	struct skl *skl;
 	struct hdac_ext_bus *ebus = NULL;
 	struct hdac_bus *bus = NULL;
-	struct hdac_ext_link *hlink = NULL;
 	int err;
 
 	/* we use ext core ops, so provide NULL for ops here */
@@ -729,7 +775,7 @@ static int skl_probe(struct pci_dev *pci,
 
 	if (skl->nhlt == NULL) {
 		err = -ENODEV;
-		goto out_display_power_off;
+		goto out_free;
 	}
 
 	err = skl_nhlt_create_sysfs(skl);
@@ -760,56 +806,24 @@ static int skl_probe(struct pci_dev *pci,
 	if (bus->mlcap)
 		snd_hdac_ext_bus_get_ml_capabilities(ebus);
 
+	snd_hdac_bus_stop_chip(bus);
+
 	/* create device for soc dmic */
 	err = skl_dmic_device_register(skl);
 	if (err < 0)
 		goto out_dsp_free;
 
-	/* register platform dai and controls */
-	err = skl_platform_register(bus->dev);
-	if (err < 0)
-		goto out_dmic_free;
-
-	/* create codec instances */
-	err = skl_codec_create(ebus);
-	if (err < 0)
-		goto out_unregister;
-
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
-		err = snd_hdac_display_power(bus, false);
-		if (err < 0) {
-			dev_err(bus->dev, "Cannot turn off display power on i915\n");
-			return err;
-		}
-	}
-
-	/*
-	 * we are done probling so decrement link counts
-	 */
-	list_for_each_entry(hlink, &ebus->hlink_list, list)
-		snd_hdac_ext_bus_link_put(ebus, hlink);
-
-	/* configure PM */
-	pm_runtime_put_noidle(bus->dev);
-	pm_runtime_allow(bus->dev);
+	schedule_work(&skl->probe_work);
 
 	return 0;
 
-out_unregister:
-	skl_platform_unregister(bus->dev);
-out_dmic_free:
-	skl_dmic_device_unregister(skl);
 out_dsp_free:
 	skl_free_dsp(skl);
 out_mach_free:
 	skl_machine_device_unregister(skl);
 out_nhlt_free:
 	skl_nhlt_free(skl->nhlt);
-out_display_power_off:
-	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
-		snd_hdac_display_power(bus, false);
 out_free:
-	skl->init_failed = 1;
 	skl_free(ebus);
 
 	return err;
@@ -828,7 +842,7 @@ static void skl_shutdown(struct pci_dev *pci)
 
 	skl = ebus_to_skl(ebus);
 
-	if (skl->init_failed)
+	if (!skl->init_done)
 		return;
 
 	snd_hdac_ext_stop_streams(ebus);

diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index a454f60..2a630fc 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h

@@ -46,7 +46,7 @@ struct skl {
 	struct hdac_ext_bus ebus;
 	struct pci_dev *pci;
 
-	unsigned int init_failed:1; /* delayed init failed */
+	unsigned int init_done:1; /* delayed init status */
 	struct platform_device *dmic_dev;
 	struct platform_device *i2s_dev;
 	struct snd_soc_platform *platform;
@@ -64,6 +64,8 @@ struct skl {
 	const struct firmware *tplg;
 
 	int supend_active;
+
+	struct work_struct probe_work;
 };
 
 #define skl_to_ebus(s)	(&(s)->ebus)

diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c
index 66203d1..d3b0dc1 100644
--- a/sound/soc/sh/rcar/adg.c
+++ b/sound/soc/sh/rcar/adg.c

@@ -507,7 +507,8 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 				rbga = rbgx;
 				adg->rbga_rate_for_441khz = rate / div;
 				ckr |= brg_table[i] << 20;
-				if (req_441kHz_rate)
+				if (req_441kHz_rate &&
+				    !(adg_mode_flags(adg) & AUDIO_OUT_48))
 					parent_clk_name = __clk_get_name(clk);
 			}
 		}
@@ -522,7 +523,8 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv,
 				rbgb = rbgx;
 				adg->rbgb_rate_for_48khz = rate / div;
 				ckr |= brg_table[i] << 16;
-				if (req_48kHz_rate)
+				if (req_48kHz_rate &&
+				    (adg_mode_flags(adg) & AUDIO_OUT_48))
 					parent_clk_name = __clk_get_name(clk);
 			}
 		}

diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c
index 7d92a24..d879c01 100644
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c

@@ -89,6 +89,7 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
 	dev_dbg(dev, "ctu/mix path = 0x%08x", data);
 
 	rsnd_mod_write(mod, CMD_ROUTE_SLCT, data);
+	rsnd_mod_write(mod, CMD_BUSIF_MODE, rsnd_get_busif_shift(io, mod) | 1);
 	rsnd_mod_write(mod, CMD_BUSIF_DALIGN, rsnd_get_dalign(mod, io));
 
 	rsnd_adg_set_cmd_timsel_gen2(mod, io);

diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 1744015..8c1f4e2 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c

@@ -343,6 +343,57 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io)
 		return 0x76543210;
 }
 
+u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod)
+{
+	enum rsnd_mod_type playback_mods[] = {
+		RSND_MOD_SRC,
+		RSND_MOD_CMD,
+		RSND_MOD_SSIU,
+	};
+	enum rsnd_mod_type capture_mods[] = {
+		RSND_MOD_CMD,
+		RSND_MOD_SRC,
+		RSND_MOD_SSIU,
+	};
+	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
+	struct rsnd_mod *tmod = NULL;
+	enum rsnd_mod_type *mods =
+		rsnd_io_is_play(io) ?
+		playback_mods : capture_mods;
+	int i;
+
+	/*
+	 * This is needed for 24bit data
+	 * We need to shift 8bit
+	 *
+	 * Linux 24bit data is located as 0x00******
+	 * HW    24bit data is located as 0x******00
+	 *
+	 */
+	switch (runtime->sample_bits) {
+	case 16:
+		return 0;
+	case 32:
+		break;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(playback_mods); i++) {
+		tmod = rsnd_io_to_mod(io, mods[i]);
+		if (tmod)
+			break;
+	}
+
+	if (tmod != mod)
+		return 0;
+
+	if (rsnd_io_is_play(io))
+		return  (0 << 20) | /* shift to Left */
+			(8 << 16);  /* 8bit */
+	else
+		return  (1 << 20) | /* shift to Right */
+			(8 << 16);  /* 8bit */
+}
+
 /*
  *	rsnd_dai functions
  */

diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index 63b6d3c..4b09807 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c

@@ -236,6 +236,7 @@ static int rsnd_gen2_probe(struct rsnd_priv *priv)
 		RSND_GEN_M_REG(SRC_ROUTE_MODE0,	0xc,	0x20),
 		RSND_GEN_M_REG(SRC_CTRL,	0x10,	0x20),
 		RSND_GEN_M_REG(SRC_INT_ENABLE0,	0x18,	0x20),
+		RSND_GEN_M_REG(CMD_BUSIF_MODE,	0x184,	0x20),
 		RSND_GEN_M_REG(CMD_BUSIF_DALIGN,0x188,	0x20),
 		RSND_GEN_M_REG(CMD_ROUTE_SLCT,	0x18c,	0x20),
 		RSND_GEN_M_REG(CMD_CTRL,	0x190,	0x20),

diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h
index dbf4163..323af41 100644
--- a/sound/soc/sh/rcar/rsnd.h
+++ b/sound/soc/sh/rcar/rsnd.h

@@ -73,6 +73,7 @@ enum rsnd_reg {
 	RSND_REG_SCU_SYS_INT_EN0,
 	RSND_REG_SCU_SYS_INT_EN1,
 	RSND_REG_CMD_CTRL,
+	RSND_REG_CMD_BUSIF_MODE,
 	RSND_REG_CMD_BUSIF_DALIGN,
 	RSND_REG_CMD_ROUTE_SLCT,
 	RSND_REG_CMDOUT_TIMSEL,
@@ -204,6 +205,7 @@ void rsnd_bset(struct rsnd_priv *priv, struct rsnd_mod *mod, enum rsnd_reg reg,
 		    u32 mask, u32 data);
 u32 rsnd_get_adinr_bit(struct rsnd_mod *mod, struct rsnd_dai_stream *io);
 u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io);
+u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod);
 
 /*
  *	R-Car DMA

diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 20b5b2e..76a477a 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c

@@ -190,11 +190,13 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
 	struct device *dev = rsnd_priv_to_dev(priv);
 	struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
+	int is_play = rsnd_io_is_play(io);
 	int use_src = 0;
 	u32 fin, fout;
 	u32 ifscr, fsrate, adinr;
 	u32 cr, route;
 	u32 bsdsr, bsisr;
+	u32 i_busif, o_busif, tmp;
 	uint ratio;
 
 	if (!runtime)
@@ -270,6 +272,11 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 		break;
 	}
 
+	/* BUSIF_MODE */
+	tmp = rsnd_get_busif_shift(io, mod);
+	i_busif = ( is_play ? tmp : 0) | 1;
+	o_busif = (!is_play ? tmp : 0) | 1;
+
 	rsnd_mod_write(mod, SRC_ROUTE_MODE0, route);
 
 	rsnd_mod_write(mod, SRC_SRCIR, 1);	/* initialize */
@@ -281,8 +288,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
 	rsnd_mod_write(mod, SRC_BSISR, bsisr);
 	rsnd_mod_write(mod, SRC_SRCIR, 0);	/* cancel initialize */
 
-	rsnd_mod_write(mod, SRC_I_BUSIF_MODE, 1);
-	rsnd_mod_write(mod, SRC_O_BUSIF_MODE, 1);
+	rsnd_mod_write(mod, SRC_I_BUSIF_MODE, i_busif);
+	rsnd_mod_write(mod, SRC_O_BUSIF_MODE, o_busif);
+
 	rsnd_mod_write(mod, SRC_BUSIF_DALIGN, rsnd_get_dalign(mod, io));
 
 	rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout);

diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index 135c566..91e5c07 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c

@@ -302,7 +302,7 @@ static void rsnd_ssi_config_init(struct rsnd_mod *mod,
 	 * always use 32bit system word.
 	 * see also rsnd_ssi_master_clk_enable()
 	 */
-	cr_own = FORCE | SWL_32 | PDTA;
+	cr_own = FORCE | SWL_32;
 
 	if (rdai->bit_clk_inv)
 		cr_own |= SCKP;
@@ -550,6 +550,13 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod,
 		struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 		u32 *buf = (u32 *)(runtime->dma_area +
 				   rsnd_dai_pointer_offset(io, 0));
+		int shift = 0;
+
+		switch (runtime->sample_bits) {
+		case 32:
+			shift = 8;
+			break;
+		}
 
 		/*
 		 * 8/16/32 data can be assesse to TDR/RDR register
@@ -557,9 +564,9 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod,
 		 * see rsnd_ssi_init()
 		 */
 		if (rsnd_io_is_play(io))
-			rsnd_mod_write(mod, SSITDR, *buf);
+			rsnd_mod_write(mod, SSITDR, (*buf) << shift);
 		else
-			*buf = rsnd_mod_read(mod, SSIRDR);
+			*buf = (rsnd_mod_read(mod, SSIRDR) >> shift);
 
 		elapsed = rsnd_dai_pointer_update(io, sizeof(*buf));
 	}
@@ -709,6 +716,11 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod,
 			       struct rsnd_priv *priv)
 {
 	struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+	struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io);
+
+	/* Do nothing for SSI parent mod */
+	if (ssi_parent_mod == mod)
+		return 0;
 
 	/* PIO will request IRQ again */
 	free_irq(ssi->irq, mod);

diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 14fafda..512d238 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c

@@ -144,7 +144,8 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 			       (rsnd_io_is_play(io) ?
 				rsnd_runtime_channel_after_ctu(io) :
 				rsnd_runtime_channel_original(io)));
-		rsnd_mod_write(mod, SSI_BUSIF_MODE,  1);
+		rsnd_mod_write(mod, SSI_BUSIF_MODE,
+			       rsnd_get_busif_shift(io, mod) | 1);
 		rsnd_mod_write(mod, SSI_BUSIF_DALIGN,
 			       rsnd_get_dalign(mod, io));
 	}

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index aae099c..754e3ef 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c

@@ -2286,6 +2286,9 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	list_for_each_entry(rtd, &card->rtd_list, list)
 		flush_delayed_work(&rtd->delayed_work);
 
+	/* free the ALSA card at first; this syncs with pending operations */
+	snd_card_free(card->snd_card);
+
 	/* remove and free each DAI */
 	soc_remove_dai_links(card);
 	soc_remove_pcm_runtimes(card);
@@ -2300,9 +2303,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	if (card->remove)
 		card->remove(card);
 
-	snd_card_free(card->snd_card);
 	return 0;
-
 }
 
 /* removes a socdev */

diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c
index dc48eed..26ed23b 100644
--- a/sound/usb/mixer_us16x08.c
+++ b/sound/usb/mixer_us16x08.c

@@ -698,16 +698,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
 	struct snd_usb_audio *chip = elem->head.mixer->chip;
 	struct snd_us16x08_meter_store *store = elem->private_data;
 	u8 meter_urb[64];
-	char tmp[sizeof(mix_init_msg2)] = {0};
 
 	switch (kcontrol->private_value) {
-	case 0:
-		snd_us16x08_send_urb(chip, (char *)mix_init_msg1,
-				     sizeof(mix_init_msg1));
+	case 0: {
+		char tmp[sizeof(mix_init_msg1)];
+
+		memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1));
+		snd_us16x08_send_urb(chip, tmp, 4);
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
 		kcontrol->private_value++;
 		break;
+	}
 	case 1:
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
@@ -718,15 +720,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol,
 			sizeof(meter_urb));
 		kcontrol->private_value++;
 		break;
-	case 3:
+	case 3: {
+		char tmp[sizeof(mix_init_msg2)];
+
 		memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2));
 		tmp[2] = snd_get_meter_comp_index(store);
-		snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2));
+		snd_us16x08_send_urb(chip, tmp, 10);
 		snd_us16x08_recv_urb(chip, meter_urb,
 			sizeof(meter_urb));
 		kcontrol->private_value = 0;
 		break;
 	}
+	}
 
 	for (set = 0; set < 6; set++)
 		get_meter_levels_from_urb(set, store, meter_urb);
@@ -1135,7 +1140,7 @@ static const struct snd_us16x08_control_params eq_controls[] = {
 		.control_id = SND_US16X08_ID_EQLOWMIDWIDTH,
 		.type = USB_MIXER_U8,
 		.num_channels = 16,
-		.name = "EQ MidQLow Q",
+		.name = "EQ MidLow Q",
 	},
 	{ /* EQ mid high gain */
 		.kcontrol_new = &snd_us16x08_eq_gain_ctl,

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 01eff6c..d7b0b0a 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c

@@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	/* Amanero Combo384 USB interface with native DSD support */
 	case USB_ID(0x16d0, 0x071a):
 		if (fp->altsetting == 2) {
-			switch (chip->dev->descriptor.bcdDevice) {
+			switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
 			case 0x199:
 				return SNDRV_PCM_FMTBIT_DSD_U32_LE;
 			case 0x19b:

diff --git a/tools/Makefile b/tools/Makefile
index c8a90d0..221e1ce 100644
--- a/tools/Makefile
+++ b/tools/Makefile

@@ -19,6 +19,7 @@
 	@echo '  kvm_stat               - top-like utility for displaying kvm statistics'
 	@echo '  leds                   - LEDs  tools'
 	@echo '  lguest                 - a minimal 32-bit x86 hypervisor'
+	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
 	@echo '  net                    - misc networking tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
 	@echo '  selftests              - various kernel selftests'
@@ -89,7 +90,7 @@
 kvm_stat: FORCE
 	$(call descend,kvm/$@)
 
-all: acpi cgroup cpupower gpio hv firewire lguest \
+all: acpi cgroup cpupower gpio hv firewire lguest liblockdep \
 		perf selftests turbostat usb \
 		virtio vm net x86_energy_perf_policy \
 		tmon freefall objtool kvm_stat
@@ -103,6 +104,9 @@
 cgroup_install firewire_install gpio_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install:
 	$(call descend,$(@:_install=),install)
 
+liblockdep_install:
+	$(call descend,lib/lockdep,install)
+
 selftests_install:
 	$(call descend,testing/$(@:_install=),install)
 
@@ -119,7 +123,7 @@
 	$(call descend,kvm/$(@:_install=),install)
 
 install: acpi_install cgroup_install cpupower_install gpio_install \
-		hv_install firewire_install lguest_install \
+		hv_install firewire_install lguest_install liblockdep_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install net_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install

diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 6ebd3e6..5e3c673 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h

@@ -27,6 +27,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -114,6 +116,8 @@ struct kvm_debug_exit_arch {
 };
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -192,13 +196,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24

diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index c286035..70eea2e 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h

@@ -39,6 +39,8 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
 
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -143,6 +145,8 @@ struct kvm_debug_exit_arch {
 #define KVM_GUESTDBG_USE_HW		(1 << 17)
 
 struct kvm_sync_regs {
+	/* Used with KVM_CAP_ARM_USER_IRQ */
+	__u64 device_irq_level;
 };
 
 struct kvm_arch_memory_slot {
@@ -212,13 +216,17 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
 #define VGIC_LEVEL_INFO_LINE_LEVEL	0
 
-#define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
+#define   KVM_DEV_ARM_VGIC_CTRL_INIT		0
+#define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
+#define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
+#define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0

diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4b..07fbeb9 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h

@@ -29,6 +29,9 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_GUEST_DEBUG
 
+/* Not always available, but if it is, this is the correct offset.  */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
 struct kvm_regs {
 	__u64 pc;
 	__u64 cr;

diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 7f4fd65..3dd2a1d 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h

@@ -26,6 +26,8 @@
 #define KVM_DEV_FLIC_ADAPTER_REGISTER	6
 #define KVM_DEV_FLIC_ADAPTER_MODIFY	7
 #define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
+#define KVM_DEV_FLIC_AISM		9
+#define KVM_DEV_FLIC_AIRQ_INJECT	10
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
 	__u8 isc;
 	__u8 maskable;
 	__u8 swap;
-	__u8 pad;
+	__u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+	__u8 isc;
+	__u16 mode;
 };
 
 #define KVM_S390_IO_ADAPTER_MASK 1
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine {
 #define KVM_S390_VM_CPU_FEAT_CMMA	10
 #define KVM_S390_VM_CPU_FEAT_PFMFI	11
 #define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+#define KVM_S390_VM_CPU_FEAT_KSS	13
 struct kvm_s390_vm_cpu_feat {
 	__u64 feat[16];
 };
@@ -198,6 +208,10 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_VRS    (1UL << 6)
 #define KVM_SYNC_RICCB  (1UL << 7)
 #define KVM_SYNC_FPRS   (1UL << 8)
+#define KVM_SYNC_GSCB   (1UL << 9)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -218,8 +232,16 @@ struct kvm_sync_regs {
 	};
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
-	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 padding1[52];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
+	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
+	union {
+		__u8 sdnx[SDNXL];  /* state description annex */
+		struct {
+			__u64 reserved1[2];
+			__u64 gscb[4];
+		};
+	};
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 0fe0044..2701e5f 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h

@@ -202,6 +202,8 @@
 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
+#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
+
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */

diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 85599ad..5dff775 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h

@@ -36,6 +36,12 @@
 # define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
 #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
 
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57	0
+#else
+# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -55,7 +61,7 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 

diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fac9a5c..d91ba04 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h

@@ -53,6 +53,12 @@
 # define NEED_MOVBE	0
 #endif
 
+#ifdef CONFIG_X86_5LEVEL
+# define NEED_LA57	(1<<(X86_FEATURE_LA57 & 31))
+#else
+# define NEED_LA57	0
+#endif
+
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
@@ -98,7 +104,7 @@
 #define REQUIRED_MASK13	0
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
-#define REQUIRED_MASK16	0
+#define REQUIRED_MASK16	(NEED_LA57)
 #define REQUIRED_MASK17	0
 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 739c0c5..c2824d0 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h

@@ -9,6 +9,9 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
 #define DE_VECTOR 0
 #define DB_VECTOR 1
 #define BP_VECTOR 3

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 1445865..690a2dc 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h

@@ -76,7 +76,11 @@
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
+#define EXIT_REASON_RDRAND              57
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_VMFUNC              59
+#define EXIT_REASON_ENCLS               60
+#define EXIT_REASON_RDSEED              61
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
@@ -90,6 +94,7 @@
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
 	{ EXIT_REASON_CPUID,                 "CPUID" }, \
 	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
 	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
 	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
@@ -108,6 +113,8 @@
 	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
 	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
 	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
+	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
 	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
 	{ EXIT_REASON_MONITOR_TRAP_FLAG,     "MONITOR_TRAP_FLAG" }, \
 	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
 	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
 	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
 	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
-	{ EXIT_REASON_GDTR_IDTR,	     "GDTR_IDTR" }, \
-	{ EXIT_REASON_LDTR_TR,		     "LDTR_TR" }, \
+	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
+	{ EXIT_REASON_GDTR_IDTR,             "GDTR_IDTR" }, \
+	{ EXIT_REASON_LDTR_TR,               "LDTR_TR" }, \
 	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
 	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
 	{ EXIT_REASON_INVEPT,                "INVEPT" }, \
+	{ EXIT_REASON_RDTSCP,                "RDTSCP" }, \
 	{ EXIT_REASON_PREEMPTION_TIMER,      "PREEMPTION_TIMER" }, \
-	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
-	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
-	{ EXIT_REASON_EOI_INDUCED,           "EOI_INDUCED" }, \
-	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
-	{ EXIT_REASON_MSR_LOAD_FAIL,         "MSR_LOAD_FAIL" }, \
-	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
+	{ EXIT_REASON_WBINVD,                "WBINVD" }, \
+	{ EXIT_REASON_XSETBV,                "XSETBV" }, \
+	{ EXIT_REASON_APIC_WRITE,            "APIC_WRITE" }, \
+	{ EXIT_REASON_RDRAND,                "RDRAND" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_VMFUNC,                "VMFUNC" }, \
+	{ EXIT_REASON_ENCLS,                 "ENCLS" }, \
+	{ EXIT_REASON_RDSEED,                "RDSEED" }, \
+	{ EXIT_REASON_PML_FULL,              "PML_FULL" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 

diff --git a/tools/include/asm/sections.h b/tools/include/asm/sections.h
new file mode 100644
index 0000000..a80643d
--- /dev/null
+++ b/tools/include/asm/sections.h

@@ -0,0 +1,4 @@
+#ifndef __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+#define __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+
+#endif /* __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H */

diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index 1aecad3..969db19 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h

@@ -61,4 +61,14 @@ static inline unsigned fls_long(unsigned long l)
 	return fls64(l);
 }
 
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
 #endif

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 825d44f..bd39b20 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h

@@ -19,3 +19,13 @@
 
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+
+#define  noinline	__attribute__((noinline))
+
+#define __packed	__attribute__((packed))
+
+#define __noreturn	__attribute__((noreturn))
+
+#define __aligned(x)	__attribute__((aligned(x)))
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+#define __scanf(a, b)	__attribute__((format(scanf, a, b)))

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 23299d7..d7a5604 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h

@@ -17,6 +17,10 @@
 # define __always_inline	inline __attribute__((always_inline))
 #endif
 
+#ifndef noinline
+#define noinline
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
@@ -45,6 +49,10 @@
 # define __maybe_unused		__attribute__((unused))
 #endif
 
+#ifndef __used
+# define __used		__attribute__((__unused__))
+#endif
+
 #ifndef __packed
 # define __packed		__attribute__((__packed__))
 #endif
@@ -65,6 +73,14 @@
 # define unlikely(x)		__builtin_expect(!!(x), 0)
 #endif
 
+#ifndef __init
+# define __init
+#endif
+
+#ifndef noinline
+# define noinline
+#endif
+
 #define uninitialized_var(x) x = *(&(x))
 
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))

diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/include/linux/debug_locks.h
similarity index 74%
rename from tools/lib/lockdep/uinclude/linux/debug_locks.h
rename to tools/include/linux/debug_locks.h
index f38eb64..61cc7f5 100644
--- a/tools/lib/lockdep/uinclude/linux/debug_locks.h
+++ b/tools/include/linux/debug_locks.h

@@ -3,8 +3,9 @@
 
 #include <stddef.h>
 #include <linux/compiler.h>
+#include <asm/bug.h>
 
-#define DEBUG_LOCKS_WARN_ON(x) (x)
+#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x)
 
 extern bool debug_locks;
 extern bool debug_locks_silent;

diff --git a/tools/include/linux/delay.h b/tools/include/linux/delay.h
new file mode 100644
index 0000000..55aa417
--- /dev/null
+++ b/tools/include/linux/delay.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_DELAY_H
+#define _TOOLS_INCLUDE_LINUX_DELAY_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_DELAY_H */

diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index bdc3dd8..abf0478 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h

@@ -46,4 +46,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
+{
+	return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
 #endif /* _LINUX_ERR_H */

diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 390d7c9..4ce25d4 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h

@@ -208,6 +208,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\

diff --git a/tools/include/linux/ftrace.h b/tools/include/linux/ftrace.h
new file mode 100644
index 0000000..949f541
--- /dev/null
+++ b/tools/include/linux/ftrace.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_FTRACE_H
+#define _TOOLS_INCLUDE_LINUX_FTRACE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_FTRACE_H */

diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h
new file mode 100644
index 0000000..2203075
--- /dev/null
+++ b/tools/include/linux/gfp.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_GFP_H
+#define _TOOLS_INCLUDE_LINUX_GFP_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_GFP_H */

diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/include/linux/hardirq.h
similarity index 100%
rename from tools/lib/lockdep/uinclude/linux/hardirq.h
rename to tools/include/linux/hardirq.h


diff --git a/tools/include/linux/interrupt.h b/tools/include/linux/interrupt.h
new file mode 100644
index 0000000..6be25bb
--- /dev/null
+++ b/tools/include/linux/interrupt.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+#define _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_INTERRUPT_H */

diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/include/linux/irqflags.h
similarity index 84%
rename from tools/lib/lockdep/uinclude/linux/irqflags.h
rename to tools/include/linux/irqflags.h
index 6cc296f..df77669 100644
--- a/tools/lib/lockdep/uinclude/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h

@@ -17,19 +17,19 @@
 #define raw_local_irq_disable() do { } while (0)
 #define raw_local_irq_enable() do { } while (0)
 #define raw_local_irq_save(flags) ((flags) = 0)
-#define raw_local_irq_restore(flags) do { } while (0)
+#define raw_local_irq_restore(flags) ((void)(flags))
 #define raw_local_save_flags(flags) ((flags) = 0)
-#define raw_irqs_disabled_flags(flags) do { } while (0)
+#define raw_irqs_disabled_flags(flags) ((void)(flags))
 #define raw_irqs_disabled() 0
 #define raw_safe_halt()
 
 #define local_irq_enable() do { } while (0)
 #define local_irq_disable() do { } while (0)
 #define local_irq_save(flags) ((flags) = 0)
-#define local_irq_restore(flags) do { } while (0)
+#define local_irq_restore(flags) ((void)(flags))
 #define local_save_flags(flags)	((flags) = 0)
 #define irqs_disabled() (1)
-#define irqs_disabled_flags(flags) (0)
+#define irqs_disabled_flags(flags) ((void)(flags), 0)
 #define safe_halt() do { } while (0)
 
 #define trace_lock_release(x, y)

diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
new file mode 100644
index 0000000..348c6f4
--- /dev/null
+++ b/tools/include/linux/jhash.h

@@ -0,0 +1,175 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n)   ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n)   (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL		0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const u8 *k = key;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	/* All but the last block: affect some 32 bits of (a,b,c) */
+	while (length > 12) {
+		a += __get_unaligned_cpu32(k);
+		b += __get_unaligned_cpu32(k + 4);
+		c += __get_unaligned_cpu32(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	/* Last block: affect all 32 bits of (c) */
+	/* All the case statements fall through */
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+	u32 a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		__jhash_mix(a, b, c);
+		length -= 3;
+		k += 3;
+	}
+
+	/* Handle the last 3 u32's: all the case statements fall through */
+	switch (length) {
+	case 3: c += k[2];
+	case 2: b += k[1];
+	case 1: a += k[0];
+		__jhash_final(a, b, c);
+	case 0:	/* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+
+	__jhash_final(a, b, c);
+
+	return c;
+}
+
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+	return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+	return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
+}
+
+#endif /* _LINUX_JHASH_H */

diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/include/linux/kallsyms.h
similarity index 89%
rename from tools/lib/lockdep/uinclude/linux/kallsyms.h
rename to tools/include/linux/kallsyms.h
index b0f2dbd..582cc1e 100644
--- a/tools/lib/lockdep/uinclude/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h

@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <stdio.h>
+#include <unistd.h>
 
 #define KSYM_NAME_LEN 128
 
@@ -24,7 +25,7 @@ static inline void print_ip_sym(unsigned long ip)
 
 	name = backtrace_symbols((void **)&ip, 1);
 
-	printf("%s\n", *name);
+	dprintf(STDOUT_FILENO, "%s\n", *name);
 
 	free(name);
 }

diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/include/linux/kern_levels.h
similarity index 100%
rename from tools/lib/lockdep/uinclude/linux/kern_levels.h
rename to tools/include/linux/kern_levels.h


diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 73ccc48..77d2e94 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h

@@ -5,6 +5,8 @@
 #include <stddef.h>
 #include <assert.h>
 #include <linux/compiler.h>
+#include <endian.h>
+#include <byteswap.h>
 
 #ifndef UINT_MAX
 #define UINT_MAX	(~0U)
@@ -32,6 +34,7 @@
 	(type *)((char *)__mptr - offsetof(type, member)); })
 #endif
 
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 
 #ifndef max
@@ -67,12 +70,33 @@
 #endif
 #endif
 
-/*
- * Both need more care to handle endianness
- * (Don't use bitmap_copy_le() for now)
- */
-#define cpu_to_le64(x)	(x)
-#define cpu_to_le32(x)	(x)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define cpu_to_le16 bswap_16
+#define cpu_to_le32 bswap_32
+#define cpu_to_le64 bswap_64
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define cpu_to_be16
+#define cpu_to_be32
+#define cpu_to_be64
+#define be16_to_cpu
+#define be32_to_cpu
+#define be64_to_cpu
+#else
+#define cpu_to_le16
+#define cpu_to_le32
+#define cpu_to_le64
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define cpu_to_be16 bswap_16
+#define cpu_to_be32 bswap_32
+#define cpu_to_be64 bswap_64
+#define be16_to_cpu bswap_16
+#define be32_to_cpu bswap_32
+#define be64_to_cpu bswap_64
+#endif
 
 int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
 int scnprintf(char * buf, size_t size, const char * fmt, ...);
@@ -89,4 +113,7 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...);
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
+#define current_gfp_context(k) 0
+#define synchronize_sched()
+
 #endif

diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/include/linux/kmemcheck.h
similarity index 100%
rename from tools/lib/lockdep/uinclude/linux/kmemcheck.h
rename to tools/include/linux/kmemcheck.h


diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
new file mode 100644
index 0000000..bc763d5
--- /dev/null
+++ b/tools/include/linux/linkage.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H
+#define _TOOLS_INCLUDE_LINUX_LINKAGE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */

diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/include/linux/lockdep.h
similarity index 63%
rename from tools/lib/lockdep/uinclude/linux/lockdep.h
rename to tools/include/linux/lockdep.h
index c808c7d..8da3e8e 100644
--- a/tools/lib/lockdep/uinclude/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h

@@ -7,8 +7,15 @@
 #include <limits.h>
 #include <linux/utsname.h>
 #include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/kern_levels.h>
+#include <linux/err.h>
+#include <linux/rcu.h>
+#include <linux/list.h>
+#include <linux/hardirq.h>
+#include <unistd.h>
 
-#define MAX_LOCK_DEPTH 2000UL
+#define MAX_LOCK_DEPTH 63UL
 
 #define asmlinkage
 #define __visible
@@ -29,31 +36,32 @@ extern struct task_struct *__curr(void);
 
 #define current (__curr())
 
-#define debug_locks_off() 1
+static inline int debug_locks_off(void)
+{
+	return 1;
+}
+
 #define task_pid_nr(tsk) ((tsk)->pid)
 
 #define KSYM_NAME_LEN 128
-#define printk printf
+#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define pr_warn pr_err
 
 #define list_del_rcu list_del
 
 #define atomic_t unsigned long
 #define atomic_inc(x) ((*(x))++)
 
-static struct new_utsname *init_utsname(void)
-{
-	static struct new_utsname n = (struct new_utsname) {
-		.release = "liblockdep",
-		.version = LIBLOCKDEP_VERSION,
-	};
-
-	return &n;
-}
-
 #define print_tainted() ""
 #define static_obj(x) 1
 
 #define debug_show_all_locks()
 extern void debug_check_no_locks_held(void);
 
+static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr)
+{
+	return false;
+}
+
 #endif

diff --git a/tools/include/linux/module.h b/tools/include/linux/module.h
new file mode 100644
index 0000000..07055db
--- /dev/null
+++ b/tools/include/linux/module.h

@@ -0,0 +1,11 @@
+#ifndef _LIBLOCKDEP_LINUX_MODULE_H_
+#define _LIBLOCKDEP_LINUX_MODULE_H_
+
+#define module_param(name, type, perm)
+
+static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
+{
+	return false;
+}
+
+#endif

diff --git a/tools/include/linux/mutex.h b/tools/include/linux/mutex.h
new file mode 100644
index 0000000..a8180d2
--- /dev/null
+++ b/tools/include/linux/mutex.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_MUTEX_H
+#define _TOOLS_INCLUDE_LINUX_MUTEX_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_MUTEX_H */

diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h
new file mode 100644
index 0000000..8b3b03b
--- /dev/null
+++ b/tools/include/linux/proc_fs.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H
+#define _TOOLS_INCLUDE_LINUX_PROC_FS_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */

diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/include/linux/rcu.h
similarity index 76%
rename from tools/lib/lockdep/uinclude/linux/rcu.h
rename to tools/include/linux/rcu.h
index 042ee8e..5080649 100644
--- a/tools/lib/lockdep/uinclude/linux/rcu.h
+++ b/tools/include/linux/rcu.h

@@ -18,4 +18,7 @@ static inline bool rcu_is_watching(void)
 	return false;
 }
 
+#define rcu_assign_pointer(p, v) ((p) = (v))
+#define RCU_INIT_POINTER(p, v) p=(v)
+
 #endif

diff --git a/tools/include/linux/sched/clock.h b/tools/include/linux/sched/clock.h
new file mode 100644
index 0000000..5837d17
--- /dev/null
+++ b/tools/include/linux/sched/clock.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+#define _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_CLOCK_H */

diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h
new file mode 100644
index 0000000..c8d9f19
--- /dev/null
+++ b/tools/include/linux/sched/mm.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_MM_H
+#define _TOOLS_PERF_LINUX_SCHED_MM_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_MM_H */

diff --git a/tools/include/linux/sched/task.h b/tools/include/linux/sched/task.h
new file mode 100644
index 0000000..a97890ec
--- /dev/null
+++ b/tools/include/linux/sched/task.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_TASK_H
+#define _TOOLS_PERF_LINUX_SCHED_TASK_H
+
+#endif  /* _TOOLS_PERF_LINUX_SCHED_TASK_H */

diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h
new file mode 100644
index 0000000..102fd92
--- /dev/null
+++ b/tools/include/linux/seq_file.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+#define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */

diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 58397dc..417cda4 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h

@@ -1,5 +1,31 @@
+#ifndef __LINUX_SPINLOCK_H_
+#define __LINUX_SPINLOCK_H_
+
+#include <pthread.h>
+#include <stdbool.h>
+
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
 
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
+
+#define arch_spinlock_t pthread_mutex_t
+#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+static inline void arch_spin_lock(arch_spinlock_t *mutex)
+{
+	pthread_mutex_lock(mutex);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *mutex)
+{
+	pthread_mutex_unlock(mutex);
+}
+
+static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
+{
+	return true;
+}
+
+#endif

diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/include/linux/stacktrace.h
similarity index 100%
rename from tools/lib/lockdep/uinclude/linux/stacktrace.h
rename to tools/include/linux/stacktrace.h


diff --git a/tools/include/linux/unaligned/packed_struct.h b/tools/include/linux/unaligned/packed_struct.h
new file mode 100644
index 0000000..c0d817d
--- /dev/null
+++ b/tools/include/linux/unaligned/packed_struct.h

@@ -0,0 +1,46 @@
+#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
+#define _LINUX_UNALIGNED_PACKED_STRUCT_H
+
+#include <linux/kernel.h>
+
+struct __una_u16 { u16 x; } __packed;
+struct __una_u32 { u32 x; } __packed;
+struct __una_u64 { u64 x; } __packed;
+
+static inline u16 __get_unaligned_cpu16(const void *p)
+{
+	const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+	return ptr->x;
+}
+
+static inline u32 __get_unaligned_cpu32(const void *p)
+{
+	const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+	return ptr->x;
+}
+
+static inline u64 __get_unaligned_cpu64(const void *p)
+{
+	const struct __una_u64 *ptr = (const struct __una_u64 *)p;
+	return ptr->x;
+}
+
+static inline void __put_unaligned_cpu16(u16 val, void *p)
+{
+	struct __una_u16 *ptr = (struct __una_u16 *)p;
+	ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu32(u32 val, void *p)
+{
+	struct __una_u32 *ptr = (struct __una_u32 *)p;
+	ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu64(u64 val, void *p)
+{
+	struct __una_u64 *ptr = (struct __una_u64 *)p;
+	ptr->x = val;
+}
+
+#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */

diff --git a/tools/include/trace/events/lock.h b/tools/include/trace/events/lock.h
new file mode 100644
index 0000000..5b15fd5
--- /dev/null
+++ b/tools/include/trace/events/lock.h

@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+#define _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+
+#endif /* _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H */

diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index d538897..17b1030 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h

@@ -48,17 +48,13 @@
  * tv_sec holds the number of seconds before (negative) or after (positive)
  * 00:00:00 1st January 1970 UTC.
  *
- * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
- * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
- *
- * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
- * either be both positive or both negative.
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
  *
  * __reserved is held in case we need a yet finer resolution.
  */
 struct statx_timestamp {
 	__s64	tv_sec;
-	__s32	tv_nsec;
+	__u32	tv_nsec;
 	__s32	__reserved;
 };
 

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 809c772..a7ecf8f 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
 	return err;
 }
 
+int filename__write_int(const char *filename, int value)
+{
+	int fd = open(filename, O_WRONLY), err = -1;
+	char buf[64];
+
+	if (fd < 0)
+		return err;
+
+	sprintf(buf, "%d", value);
+	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+		err = 0;
+
+	close(fd);
+	return err;
+}
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep)
 {
 	char path[PATH_MAX];
@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
 
 	return filename__read_int(path, value);
 }
+
+int sysfs__write_int(const char *entry, int value)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+		return -1;
+
+	return filename__write_int(path, value);
+}

diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 956c211..4560534 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h

@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int filename__write_int(const char *filename, int value);
+
 int procfs__read_str(const char *entry, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
 int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
 #endif /* __API_FS__ */

diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 3bc0ef9..ed9ace5 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile

@@ -79,6 +79,7 @@
 # Set compile option CFLAGS if not set elsewhere
 CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
 CFLAGS += -fPIC
+CFLAGS += -Wall
 
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
 
@@ -100,7 +101,7 @@
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
-	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
+	$(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
 
 do_build_static_lib =				\
 	($(print_static_lib_build)		\
@@ -118,10 +119,10 @@
 $(LIB_IN): force
 	$(Q)$(MAKE) $(build)=liblockdep
 
-liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
+$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
 	$(Q)$(do_compile_shared_library)
 
-liblockdep.a: $(LIB_IN)
+$(OUTPUT)liblockdep.a: $(LIB_IN)
 	$(Q)$(do_build_static_lib)
 
 tags:	force
@@ -149,7 +150,7 @@
 install: install_lib
 
 clean:
-	$(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d .*.cmd
+	$(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
 	$(RM) tags TAGS
 
 PHONY += force

diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
index a0a2e3a..ced6d74 100644
--- a/tools/lib/lockdep/lockdep.c
+++ b/tools/lib/lockdep/lockdep.c

@@ -1,8 +1,27 @@
 #include <linux/lockdep.h>
+#include <stdlib.h>
 
 /* Trivial API wrappers, we don't (yet) have RCU in user-space: */
 #define hlist_for_each_entry_rcu	hlist_for_each_entry
 #define hlist_add_head_rcu		hlist_add_head
 #define hlist_del_rcu			hlist_del
+#define list_for_each_entry_rcu		list_for_each_entry
+#define list_add_tail_rcu		list_add_tail
+
+u32 prandom_u32(void)
+{
+	/* Used only by lock_pin_lock() which is dead code */
+	abort();
+}
+
+static struct new_utsname *init_utsname(void)
+{
+	static struct new_utsname n = (struct new_utsname) {
+		.release = "liblockdep",
+		.version = LIBLOCKDEP_VERSION,
+	};
+
+	return &n;
+}
 
 #include "../../../kernel/locking/lockdep.c"

diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 5284484..6a2d3c5 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c

@@ -4,6 +4,7 @@
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <sysexits.h>
+#include <unistd.h>
 #include "include/liblockdep/mutex.h"
 #include "../../include/linux/rbtree.h"
 
@@ -122,8 +123,6 @@ static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
 #define LIBLOCKDEP_STATIC_ENTRIES	1024
 #endif
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
 static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
 static int __locks_nr;
 
@@ -149,7 +148,7 @@ static struct lock_lookup *alloc_lock(void)
 
 		int idx = __locks_nr++;
 		if (idx >= ARRAY_SIZE(__locks)) {
-			fprintf(stderr,
+			dprintf(STDERR_FILENO,
 		"LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
 			exit(EX_UNAVAILABLE);
 		}

diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
index f7f4303..297c304 100644
--- a/tools/lib/lockdep/rbtree.c
+++ b/tools/lib/lockdep/rbtree.c

@@ -1 +1 @@
-#include "../../../lib/rbtree.c"
+#include "../../lib/rbtree.c"

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 1069d96..f9b9409 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh

@@ -4,9 +4,9 @@
 
 for i in `ls tests/*.c`; do
 	testname=$(basename "$i" .c)
-	gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
+	gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
 	echo -ne "$testname... "
-	if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
+	if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
 		echo "PASSED!"
 	else
 		echo "FAILED!"
@@ -18,9 +18,9 @@
 
 for i in `ls tests/*.c`; do
 	testname=$(basename "$i" .c)
-	gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
+	gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null
 	echo -ne "(PRELOAD) $testname... "
-	if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
+	if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
 		echo "PASSED!"
 	else
 		echo "FAILED!"

diff --git a/tools/lib/lockdep/uinclude/asm/hash.h b/tools/lib/lockdep/uinclude/asm/hash.h
deleted file mode 100644
index d82b170b..0000000
--- a/tools/lib/lockdep/uinclude/asm/hash.h
+++ /dev/null

@@ -1,6 +0,0 @@
-#ifndef __ASM_GENERIC_HASH_H
-#define __ASM_GENERIC_HASH_H
-
-/* Stub */
-
-#endif /* __ASM_GENERIC_HASH_H */

diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/asm/hweight.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/asm/sections.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/bitops.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
deleted file mode 100644
index fd3e56a..0000000
--- a/tools/lib/lockdep/uinclude/linux/compiler.h
+++ /dev/null

@@ -1,10 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_
-#define _LIBLOCKDEP_LINUX_COMPILER_H_
-
-#define __used		__attribute__((__unused__))
-#define unlikely
-#define READ_ONCE(x) (x)
-#define WRITE_ONCE(x, val) x=(val)
-#define RCU_INIT_POINTER(p, v) p=(v)
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/delay.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/ftrace.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/gfp.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h
deleted file mode 100644
index 0f84798..0000000
--- a/tools/lib/lockdep/uinclude/linux/hash.h
+++ /dev/null

@@ -1 +0,0 @@
-#include "../../../include/linux/hash.h"

diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/interrupt.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h
deleted file mode 100644
index 276c7a8..0000000
--- a/tools/lib/lockdep/uinclude/linux/kernel.h
+++ /dev/null

@@ -1,47 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_
-#define _LIBLOCKDEP_LINUX_KERNEL_H_
-
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/rcu.h>
-#include <linux/hardirq.h>
-#include <linux/kern_levels.h>
-
-#ifndef container_of
-#define container_of(ptr, type, member) ({			\
-	const typeof(((type *)0)->member) * __mptr = (ptr);	\
-	(type *)((char *)__mptr - offsetof(type, member)); })
-#endif
-
-#define max(x, y) ({				\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	(void) (&_max1 == &_max2);		\
-	_max1 > _max2 ? _max1 : _max2; })
-
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define WARN_ON(x) (x)
-#define WARN_ON_ONCE(x) (x)
-#define likely(x) (x)
-#define WARN(x, y...) (x)
-#define uninitialized_var(x) x
-#define __init
-#define noinline
-#define list_add_tail_rcu list_add_tail
-#define list_for_each_entry_rcu list_for_each_entry
-#define barrier() 
-#define synchronize_sched()
-
-#ifndef CALLER_ADDR0
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#endif
-
-#ifndef _RET_IP_
-#define _RET_IP_ CALLER_ADDR0
-#endif
-
-#ifndef _THIS_IP_
-#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
-#endif
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/linkage.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h
deleted file mode 100644
index 6e9ef31..0000000
--- a/tools/lib/lockdep/uinclude/linux/list.h
+++ /dev/null

@@ -1 +0,0 @@
-#include "../../../include/linux/list.h"

diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/lib/lockdep/uinclude/linux/module.h
deleted file mode 100644
index 09c7a7b..0000000
--- a/tools/lib/lockdep/uinclude/linux/module.h
+++ /dev/null

@@ -1,6 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_MODULE_H_
-#define _LIBLOCKDEP_LINUX_MODULE_H_
-
-#define module_param(name, type, perm)
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/mutex.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h
deleted file mode 100644
index 0c27bdf..0000000
--- a/tools/lib/lockdep/uinclude/linux/poison.h
+++ /dev/null

@@ -1 +0,0 @@
-#include "../../../include/linux/poison.h"

diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h
deleted file mode 100644
index d73fe6f..0000000
--- a/tools/lib/lockdep/uinclude/linux/prefetch.h
+++ /dev/null

@@ -1,6 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_
-#define _LIBLOCKDEP_LINUX_PREFETCH_H
-
-static inline void prefetch(void *a __attribute__((unused))) { }
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/proc_fs.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
deleted file mode 100644
index c375947..0000000
--- a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
+++ /dev/null

@@ -1,2 +0,0 @@
-#define __always_inline
-#include "../../../include/linux/rbtree_augmented.h"

diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/linux/seq_file.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h
deleted file mode 100644
index 68c1aa2..0000000
--- a/tools/lib/lockdep/uinclude/linux/spinlock.h
+++ /dev/null

@@ -1,25 +0,0 @@
-#ifndef _LIBLOCKDEP_SPINLOCK_H_
-#define _LIBLOCKDEP_SPINLOCK_H_
-
-#include <pthread.h>
-#include <stdbool.h>
-
-#define arch_spinlock_t pthread_mutex_t
-#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
-
-static inline void arch_spin_lock(arch_spinlock_t *mutex)
-{
-	pthread_mutex_lock(mutex);
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *mutex)
-{
-	pthread_mutex_unlock(mutex);
-}
-
-static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
-{
-	return true;
-}
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h
deleted file mode 100644
index 05dfcd1..0000000
--- a/tools/lib/lockdep/uinclude/linux/stringify.h
+++ /dev/null

@@ -1,7 +0,0 @@
-#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_
-#define _LIBLOCKDEP_LINUX_STRINGIFY_H_
-
-#define __stringify_1(x...)	#x
-#define __stringify(x...)	__stringify_1(x)
-
-#endif

diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h
deleted file mode 100644
index fab00ff..0000000
--- a/tools/lib/lockdep/uinclude/trace/events/lock.h
+++ /dev/null

@@ -1,3 +0,0 @@
-
-/* empty file */
-

diff --git a/tools/objtool/Build b/tools/objtool/Build
index d6cdece..6f2e198 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build

@@ -1,5 +1,6 @@
 objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
+objtool-y += check.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o

diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 55a60d3..17c1195 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt

@@ -127,28 +127,13 @@
 
 c) Higher live patching compatibility rate
 
-   (NOTE: This is not yet implemented)
+   Livepatch has an optional "consistency model", which is needed for
+   more complex patches.  In order for the consistency model to work,
+   stack traces need to be reliable (or an unreliable condition needs to
+   be detectable).  Objtool makes that possible.
 
-   Currently with CONFIG_LIVEPATCH there's a basic live patching
-   framework which is safe for roughly 85-90% of "security" fixes.  But
-   patches can't have complex features like function dependency or
-   prototype changes, or data structure changes.
-
-   There's a strong need to support patches which have the more complex
-   features so that the patch compatibility rate for security fixes can
-   eventually approach something resembling 100%.  To achieve that, a
-   "consistency model" is needed, which allows tasks to be safely
-   transitioned from an unpatched state to a patched state.
-
-   One of the key requirements of the currently proposed livepatch
-   consistency model [*] is that it needs to walk the stack of each
-   sleeping task to determine if it can be transitioned to the patched
-   state.  If objtool can ensure that stack traces are reliable, this
-   consistency model can be used and the live patching compatibility
-   rate can be improved significantly.
-
-   [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
-
+   For more details, see the livepatch documentation in the Linux kernel
+   source tree at Documentation/livepatch/livepatch.txt.
 
 Rules
 -----
@@ -201,80 +186,84 @@
    return normally.
 
 
-Errors in .S files
-------------------
+Objtool warnings
+----------------
 
-If you're getting an error in a compiled .S file which you don't
-understand, first make sure that the affected code follows the above
-rules.
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
+
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions.  See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
 
 Here are some examples of common warnings reported by objtool, what
 they mean, and suggestions for how to fix them.
 
 
-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
 
    The func() function made a function call without first saving and/or
-   updating the frame pointer.
+   updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
 
-   If func() is indeed a callable function, add proper frame pointer
-   logic using the FRAME_BEGIN and FRAME_END macros.  Otherwise, remove
-   its ELF function annotation by changing ENDPROC to END.
+   If the error is for an asm file, and func() is indeed a callable
+   function, add proper frame pointer logic using the FRAME_BEGIN and
+   FRAME_END macros.  Otherwise, if it's not a callable function, remove
+   its ELF function annotation by changing ENDPROC to END, and instead
+   use the manual CFI hint macros in asm/undwarf.h.
 
-   If you're getting this error in a .c file, see the "Errors in .c
-   files" section.
+   If it's a GCC-compiled .c file, the error may be because the function
+   uses an inline asm() statement which has a "call" instruction.  An
+   asm() statement with a call instruction must declare the use of the
+   stack pointer in its output operand.  For example, on x86_64:
+
+     register void *__sp asm("rsp");
+     asm volatile("call func" : "+r" (__sp));
+
+   Otherwise the stack frame may not get created before the call.
 
 
-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
 
-   A return instruction was detected, but objtool couldn't find a way
-   for a callable function to reach the instruction.
+   Objtool couldn't find a code path to reach the instruction.
 
-   If the return instruction is inside (or reachable from) a callable
-   function, the function needs to be annotated with the ENTRY/ENDPROC
-   macros.
+   If the error is for an asm file, and the instruction is inside (or
+   reachable from) a callable function, the function should be annotated
+   with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+   Otherwise, the code should probably be annotated with the CFI hint
+   macros in asm/undwarf.h so objtool and the unwinder can know the
+   stack state associated with the code.
 
-   If you _really_ need a return instruction outside of a function, and
-   are 100% sure that it won't affect stack traces, you can tell
-   objtool to ignore it.  See the "Adding exceptions" section below.
-
-
-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
-
-   The instruction lives inside of a callable function, but there's no
-   possible control flow path from the beginning of the function to the
-   instruction.
-
-   If the instruction is actually needed, and it's actually in a
-   callable function, ensure that its function is properly annotated
-   with ENTRY/ENDPROC.
+   If you're 100% sure the code won't affect stack traces, or if you're
+   a just a bad person, you can tell objtool to ignore it.  See the
+   "Adding exceptions" section below.
 
    If it's not actually in a callable function (e.g. kernel entry code),
    change ENDPROC to END.
 
 
-4. asm_file.o: warning: objtool: func(): can't find starting instruction
+4. file.o: warning: objtool: func(): can't find starting instruction
    or
-   asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+   file.o: warning: objtool: func()+0x11dd: can't decode instruction
 
-   Did you put data in a text section?  If so, that can confuse
+   Does the file have data in a text section?  If so, that can confuse
    objtool's instruction decoder.  Move the data to a more appropriate
    section like .data or .rodata.
 
 
-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
 
-   This is a kernel entry/exit instruction like sysenter or sysret.
-   Such instructions aren't allowed in a callable function, and are most
-   likely part of the kernel entry code.
-
-   If the instruction isn't actually in a callable function, change
-   ENDPROC to END.
+   This is a kernel entry/exit instruction like sysenter or iret.  Such
+   instructions aren't allowed in a callable function, and are most
+   likely part of the kernel entry code.  They should usually not have
+   the callable function annotation (ENDPROC) and should always be
+   annotated with the CFI hint macros in asm/undwarf.h.
 
 
-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
 
-   This is a dynamic jump or a jump to an undefined symbol.  Stacktool
+   This is a dynamic jump or a jump to an undefined symbol.  Objtool
    assumed it's a sibling call and detected that the frame pointer
    wasn't first restored to its original state.
 
@@ -282,24 +271,28 @@
    destination code to the local file.
 
    If the instruction is not actually in a callable function (e.g.
-   kernel entry code), change ENDPROC to END.
+   kernel entry code), change ENDPROC to END and annotate manually with
+   the CFI hint macros in asm/undwarf.h.
 
 
-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+7. file: warning: objtool: func()+0x5c: stack state mismatch
 
    The instruction's frame pointer state is inconsistent, depending on
    which execution path was taken to reach the instruction.
 
-   Make sure the function pushes and sets up the frame pointer (for
-   x86_64, this means rbp) at the beginning of the function and pops it
-   at the end of the function.  Also make sure that no other code in the
-   function touches the frame pointer.
+   Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+   pushes and sets up the frame pointer (for x86_64, this means rbp) at
+   the beginning of the function and pops it at the end of the function.
+   Also make sure that no other code in the function touches the frame
+   pointer.
+
+   Another possibility is that the code has some asm or inline asm which
+   does some unusual things to the stack or the frame pointer.  In such
+   cases it's probably appropriate to use the CFI hint macros in
+   asm/undwarf.h.
 
 
-Errors in .c files
-------------------
-
-1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
 
    This means that funcA() doesn't end with a return instruction or an
    unconditional jump, and that objtool has determined that the function
@@ -318,22 +311,6 @@
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
-2. If you're getting any other objtool error in a compiled .c file, it
-   may be because the file uses an asm() statement which has a "call"
-   instruction.  An asm() statement with a call instruction must declare
-   the use of the stack pointer in its output operand.  For example, on
-   x86_64:
-
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
-
-   Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
-   -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.

diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 27e019c..0e2765e 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile

@@ -25,7 +25,7 @@
 all: $(OBJTOOL)
 
 INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
 # Allow old libelf to be used:

diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index a59e061..21aeca8 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h

@@ -19,25 +19,63 @@
 #define _ARCH_H
 
 #include <stdbool.h>
+#include <linux/list.h>
 #include "elf.h"
+#include "cfi.h"
 
-#define INSN_FP_SAVE		1
-#define INSN_FP_SETUP		2
-#define INSN_FP_RESTORE		3
-#define INSN_JUMP_CONDITIONAL	4
-#define INSN_JUMP_UNCONDITIONAL	5
-#define INSN_JUMP_DYNAMIC	6
-#define INSN_CALL		7
-#define INSN_CALL_DYNAMIC	8
-#define INSN_RETURN		9
-#define INSN_CONTEXT_SWITCH	10
-#define INSN_NOP		11
-#define INSN_OTHER		12
+#define INSN_JUMP_CONDITIONAL	1
+#define INSN_JUMP_UNCONDITIONAL	2
+#define INSN_JUMP_DYNAMIC	3
+#define INSN_CALL		4
+#define INSN_CALL_DYNAMIC	5
+#define INSN_RETURN		6
+#define INSN_CONTEXT_SWITCH	7
+#define INSN_STACK		8
+#define INSN_NOP		9
+#define INSN_OTHER		10
 #define INSN_LAST		INSN_OTHER
 
+enum op_dest_type {
+	OP_DEST_REG,
+	OP_DEST_REG_INDIRECT,
+	OP_DEST_MEM,
+	OP_DEST_PUSH,
+	OP_DEST_LEAVE,
+};
+
+struct op_dest {
+	enum op_dest_type type;
+	unsigned char reg;
+	int offset;
+};
+
+enum op_src_type {
+	OP_SRC_REG,
+	OP_SRC_REG_INDIRECT,
+	OP_SRC_CONST,
+	OP_SRC_POP,
+	OP_SRC_ADD,
+	OP_SRC_AND,
+};
+
+struct op_src {
+	enum op_src_type type;
+	unsigned char reg;
+	int offset;
+};
+
+struct stack_op {
+	struct op_dest dest;
+	struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *displacement);
+			    unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
 
 #endif /* _ARCH_H */

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 6ac99e3..a36c2eb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c

@@ -27,6 +27,17 @@
 #include "../../arch.h"
 #include "../../warn.h"
 
+static unsigned char op_to_cfi_reg[][2] = {
+	{CFI_AX, CFI_R8},
+	{CFI_CX, CFI_R9},
+	{CFI_DX, CFI_R10},
+	{CFI_BX, CFI_R11},
+	{CFI_SP, CFI_R12},
+	{CFI_BP, CFI_R13},
+	{CFI_SI, CFI_R14},
+	{CFI_DI, CFI_R15},
+};
+
 static int is_x86_64(struct elf *elf)
 {
 	switch (elf->ehdr.e_machine) {
@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf)
 	}
 }
 
+bool arch_callee_saved_reg(unsigned char reg)
+{
+	switch (reg) {
+	case CFI_BP:
+	case CFI_BX:
+	case CFI_R12:
+	case CFI_R13:
+	case CFI_R14:
+	case CFI_R15:
+		return true;
+
+	case CFI_AX:
+	case CFI_CX:
+	case CFI_DX:
+	case CFI_SI:
+	case CFI_DI:
+	case CFI_SP:
+	case CFI_R8:
+	case CFI_R9:
+	case CFI_R10:
+	case CFI_R11:
+	case CFI_RA:
+	default:
+		return false;
+	}
+}
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
 			    unsigned int *len, unsigned char *type,
-			    unsigned long *immediate)
+			    unsigned long *immediate, struct stack_op *op)
 {
 	struct insn insn;
-	int x86_64;
-	unsigned char op1, op2, ext;
+	int x86_64, sign;
+	unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+		      modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+		      sib = 0;
 
 	x86_64 = is_x86_64(elf);
 	if (x86_64 == -1)
 		return -1;
 
-	insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+	insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
 	insn_get_length(&insn);
-	insn_get_opcode(&insn);
-	insn_get_modrm(&insn);
-	insn_get_immediate(&insn);
 
 	if (!insn_complete(&insn)) {
 		WARN_FUNC("can't decode instruction", sec, offset);
@@ -73,67 +110,323 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	op1 = insn.opcode.bytes[0];
 	op2 = insn.opcode.bytes[1];
 
+	if (insn.rex_prefix.nbytes) {
+		rex = insn.rex_prefix.bytes[0];
+		rex_w = X86_REX_W(rex) >> 3;
+		rex_r = X86_REX_R(rex) >> 2;
+		rex_b = X86_REX_B(rex);
+	}
+
+	if (insn.modrm.nbytes) {
+		modrm = insn.modrm.bytes[0];
+		modrm_mod = X86_MODRM_MOD(modrm);
+		modrm_reg = X86_MODRM_REG(modrm);
+		modrm_rm = X86_MODRM_RM(modrm);
+	}
+
+	if (insn.sib.nbytes)
+		sib = insn.sib.bytes[0];
+
 	switch (op1) {
-	case 0x55:
-		if (!insn.rex_prefix.nbytes)
-			/* push rbp */
-			*type = INSN_FP_SAVE;
+
+	case 0x1:
+	case 0x29:
+		if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+			/* add/sub reg, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_SRC_REG;
+			op->dest.reg = CFI_SP;
+		}
 		break;
 
-	case 0x5d:
-		if (!insn.rex_prefix.nbytes)
-			/* pop rbp */
-			*type = INSN_FP_RESTORE;
+	case 0x50 ... 0x57:
+
+		/* push reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_REG;
+		op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+		op->dest.type = OP_DEST_PUSH;
+
+		break;
+
+	case 0x58 ... 0x5f:
+
+		/* pop reg */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
+		break;
+
+	case 0x68:
+	case 0x6a:
+		/* push immediate */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
 		break;
 
 	case 0x70 ... 0x7f:
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
+	case 0x81:
+	case 0x83:
+		if (rex != 0x48)
+			break;
+
+		if (modrm == 0xe4) {
+			/* and imm, %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_AND;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.immediate.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (modrm == 0xc4)
+			sign = 1;
+		else if (modrm == 0xec)
+			sign = -1;
+		else
+			break;
+
+		/* add/sub imm, %rsp */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_ADD;
+		op->src.reg = CFI_SP;
+		op->src.offset = insn.immediate.value * sign;
+		op->dest.type = OP_DEST_REG;
+		op->dest.reg = CFI_SP;
+		break;
+
 	case 0x89:
-		if (insn.rex_prefix.nbytes == 1 &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
-			/* mov rsp, rbp */
-			*type = INSN_FP_SETUP;
+		if (rex == 0x48 && modrm == 0xe5) {
+
+			/* mov %rsp, %rbp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+			break;
+		}
+		/* fallthrough */
+	case 0x88:
+		if (!rex_b &&
+		    (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+			/* mov reg, disp(%rbp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_BP;
+			op->dest.offset = insn.displacement.value;
+
+		} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+			/* mov reg, disp(%rsp) */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+			op->dest.type = OP_DEST_REG_INDIRECT;
+			op->dest.reg = CFI_SP;
+			op->dest.offset = insn.displacement.value;
+		}
+
+		break;
+
+	case 0x8b:
+		if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+			/* mov disp(%rbp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+		} else if (rex_w && !rex_b && sib == 0x24 &&
+			   modrm_mod != 3 && modrm_rm == 4) {
+
+			/* mov disp(%rsp), reg */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG_INDIRECT;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+		}
+
 		break;
 
 	case 0x8d:
-		if (insn.rex_prefix.nbytes &&
-		    insn.rex_prefix.bytes[0] == 0x48 &&
-		    insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
-		    insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
-			/* lea %(rsp), %rbp */
-			*type = INSN_FP_SETUP;
+		if (rex == 0x48 && modrm == 0x65) {
+
+			/* lea -disp(%rbp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_BP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
+		    insn.displacement.value == 8) {
+
+			/*
+			 * lea 0x8(%rsp), %r10
+			 *
+			 * Here r10 is the "drap" pointer, used as a stack
+			 * pointer helper when the stack gets realigned.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_R10;
+			break;
+		}
+
+		if (rex == 0x4c && modrm == 0x6c && sib == 0x24 &&
+		    insn.displacement.value == 16) {
+
+			/*
+			 * lea 0x10(%rsp), %r13
+			 *
+			 * Here r13 is the "drap" pointer, used as a stack
+			 * pointer helper when the stack gets realigned.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = 16;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_R13;
+			break;
+		}
+
+		if (rex == 0x49 && modrm == 0x62 &&
+		    insn.displacement.value == -8) {
+
+			/*
+			 * lea -0x8(%r10), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R10;
+			op->src.offset = -8;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x49 && modrm == 0x65 &&
+		    insn.displacement.value == -16) {
+
+			/*
+			 * lea -0x10(%r13), %rsp
+			 *
+			 * Restoring rsp back to its original value after a
+			 * stack realignment.
+			 */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_R13;
+			op->src.offset = -16;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		break;
+
+	case 0x8f:
+		/* pop to mem */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
 		break;
 
 	case 0x90:
 		*type = INSN_NOP;
 		break;
 
+	case 0x9c:
+		/* pushf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_CONST;
+		op->dest.type = OP_DEST_PUSH;
+		break;
+
+	case 0x9d:
+		/* popf */
+		*type = INSN_STACK;
+		op->src.type = OP_SRC_POP;
+		op->dest.type = OP_DEST_MEM;
+		break;
+
 	case 0x0f:
+
 		if (op2 >= 0x80 && op2 <= 0x8f)
 			*type = INSN_JUMP_CONDITIONAL;
 		else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
 			 op2 == 0x35)
+
 			/* sysenter, sysret */
 			*type = INSN_CONTEXT_SWITCH;
+
 		else if (op2 == 0x0d || op2 == 0x1f)
+
 			/* nopl/nopw */
 			*type = INSN_NOP;
-		else if (op2 == 0x01 && insn.modrm.nbytes &&
-			 (insn.modrm.bytes[0] == 0xc2 ||
-			  insn.modrm.bytes[0] == 0xd8))
-			/* vmlaunch, vmrun */
-			*type = INSN_CONTEXT_SWITCH;
+
+		else if (op2 == 0xa0 || op2 == 0xa8) {
+
+			/* push fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+
+		} else if (op2 == 0xa1 || op2 == 0xa9) {
+
+			/* pop fs/gs */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_POP;
+			op->dest.type = OP_DEST_MEM;
+		}
 
 		break;
 
-	case 0xc9: /* leave */
-		*type = INSN_FP_RESTORE;
+	case 0xc9:
+		/*
+		 * leave
+		 *
+		 * equivalent to:
+		 * mov bp, sp
+		 * pop bp
+		 */
+		*type = INSN_STACK;
+		op->dest.type = OP_DEST_LEAVE;
+
 		break;
 
-	case 0xe3: /* jecxz/jrcxz */
+	case 0xe3:
+		/* jecxz/jrcxz */
 		*type = INSN_JUMP_CONDITIONAL;
 		break;
 
@@ -158,14 +451,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0xff:
-		ext = X86_MODRM_REG(insn.modrm.bytes[0]);
-		if (ext == 2 || ext == 3)
+		if (modrm_reg == 2 || modrm_reg == 3)
+
 			*type = INSN_CALL_DYNAMIC;
-		else if (ext == 4)
+
+		else if (modrm_reg == 4)
+
 			*type = INSN_JUMP_DYNAMIC;
-		else if (ext == 5) /*jmpf */
+
+		else if (modrm_reg == 5)
+
+			/* jmpf */
 			*type = INSN_CONTEXT_SWITCH;
 
+		else if (modrm_reg == 6) {
+
+			/* push from mem */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_CONST;
+			op->dest.type = OP_DEST_PUSH;
+		}
+
 		break;
 
 	default:
@@ -176,3 +482,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
 	return 0;
 }
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+	int i;
+
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		state->regs[i].base = CFI_UNDEFINED;
+		state->regs[i].offset = 0;
+	}
+
+	/* initial CFA (call frame address) */
+	state->cfa.base = CFI_SP;
+	state->cfa.offset = 8;
+
+	/* initial RA (return address) */
+	state->regs[16].base = CFI_CFA;
+	state->regs[16].offset = -8;
+}

diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 767be7c..12e3771 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt

@@ -1009,7 +1009,7 @@
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 282a603..365c34e 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -25,1286 +25,32 @@
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 
-#include <string.h>
-#include <stdlib.h>
 #include <subcmd/parse-options.h>
-
 #include "builtin.h"
-#include "elf.h"
-#include "special.h"
-#include "arch.h"
-#include "warn.h"
+#include "check.h"
 
-#include <linux/hashtable.h>
-#include <linux/kernel.h>
+bool nofp;
 
-#define STATE_FP_SAVED		0x1
-#define STATE_FP_SETUP		0x2
-#define STATE_FENTRY		0x4
-
-struct instruction {
-	struct list_head list;
-	struct hlist_node hash;
-	struct section *sec;
-	unsigned long offset;
-	unsigned int len, state;
-	unsigned char type;
-	unsigned long immediate;
-	bool alt_group, visited, dead_end;
-	struct symbol *call_dest;
-	struct instruction *jump_dest;
-	struct list_head alts;
-	struct symbol *func;
-};
-
-struct alternative {
-	struct list_head list;
-	struct instruction *insn;
-};
-
-struct objtool_file {
-	struct elf *elf;
-	struct list_head insn_list;
-	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file;
-};
-
-const char *objname;
-static bool nofp;
-
-static struct instruction *find_insn(struct objtool_file *file,
-				     struct section *sec, unsigned long offset)
-{
-	struct instruction *insn;
-
-	hash_for_each_possible(file->insn_hash, insn, hash, offset)
-		if (insn->sec == sec && insn->offset == offset)
-			return insn;
-
-	return NULL;
-}
-
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-					      struct instruction *insn)
-{
-	struct instruction *next = list_next_entry(insn, list);
-
-	if (&next->list == &file->insn_list || next->sec != insn->sec)
-		return NULL;
-
-	return next;
-}
-
-static bool gcov_enabled(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *sym;
-
-	list_for_each_entry(sec, &file->elf->sections, list)
-		list_for_each_entry(sym, &sec->symbol_list, list)
-			if (!strncmp(sym->name, "__gcov_.", 8))
-				return true;
-
-	return false;
-}
-
-#define for_each_insn(file, insn)					\
-	list_for_each_entry(insn, &file->insn_list, list)
-
-#define func_for_each_insn(file, func, insn)				\
-	for (insn = find_insn(file, func->sec, func->offset);		\
-	     insn && &insn->list != &file->insn_list &&			\
-		insn->sec == func->sec &&				\
-		insn->offset < func->offset + func->len;		\
-	     insn = list_next_entry(insn, list))
-
-#define func_for_each_insn_continue_reverse(file, func, insn)		\
-	for (insn = list_prev_entry(insn, list);			\
-	     &insn->list != &file->insn_list &&				\
-		insn->sec == func->sec && insn->offset >= func->offset;	\
-	     insn = list_prev_entry(insn, list))
-
-#define sec_for_each_insn_from(file, insn)				\
-	for (; insn; insn = next_insn_same_sec(file, insn))
-
-
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
-	struct rela *rela;
-	struct instruction *insn;
-
-	/* check for STACK_FRAME_NON_STANDARD */
-	if (file->whitelist && file->whitelist->rela)
-		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
-			if (rela->sym->type == STT_SECTION &&
-			    rela->sym->sec == func->sec &&
-			    rela->addend == func->offset)
-				return true;
-			if (rela->sym->type == STT_FUNC && rela->sym == func)
-				return true;
-		}
-
-	/* check if it has a context switching instruction */
-	func_for_each_insn(file, func, insn)
-		if (insn->type == INSN_CONTEXT_SWITCH)
-			return true;
-
-	return false;
-}
-
-/*
- * This checks to see if the given function is a "noreturn" function.
- *
- * For global functions which are outside the scope of this object file, we
- * have to keep a manual list of them.
- *
- * For local functions, we have to detect them manually by simply looking for
- * the lack of a return instruction.
- *
- * Returns:
- *  -1: error
- *   0: no dead end
- *   1: dead end
- */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
-			       int recursion)
-{
-	int i;
-	struct instruction *insn;
-	bool empty = true;
-
-	/*
-	 * Unfortunately these have to be hard coded because the noreturn
-	 * attribute isn't provided in ELF data.
-	 */
-	static const char * const global_noreturns[] = {
-		"__stack_chk_fail",
-		"panic",
-		"do_exit",
-		"do_task_dead",
-		"__module_put_and_exit",
-		"complete_and_exit",
-		"kvm_spurious_fault",
-		"__reiserfs_panic",
-		"lbug_with_loc"
-	};
-
-	if (func->bind == STB_WEAK)
-		return 0;
-
-	if (func->bind == STB_GLOBAL)
-		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
-			if (!strcmp(func->name, global_noreturns[i]))
-				return 1;
-
-	if (!func->sec)
-		return 0;
-
-	func_for_each_insn(file, func, insn) {
-		empty = false;
-
-		if (insn->type == INSN_RETURN)
-			return 0;
-	}
-
-	if (empty)
-		return 0;
-
-	/*
-	 * A function can have a sibling call instead of a return.  In that
-	 * case, the function's dead-end status depends on whether the target
-	 * of the sibling call returns.
-	 */
-	func_for_each_insn(file, func, insn) {
-		if (insn->sec != func->sec ||
-		    insn->offset >= func->offset + func->len)
-			break;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
-			struct instruction *dest = insn->jump_dest;
-			struct symbol *dest_func;
-
-			if (!dest)
-				/* sibling call to another file */
-				return 0;
-
-			if (dest->sec != func->sec ||
-			    dest->offset < func->offset ||
-			    dest->offset >= func->offset + func->len) {
-				/* local sibling call */
-				dest_func = find_symbol_by_offset(dest->sec,
-								  dest->offset);
-				if (!dest_func)
-					continue;
-
-				if (recursion == 5) {
-					WARN_FUNC("infinite recursion (objtool bug!)",
-						  dest->sec, dest->offset);
-					return -1;
-				}
-
-				return __dead_end_function(file, dest_func,
-							   recursion + 1);
-			}
-		}
-
-		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
-			/* sibling call */
-			return 0;
-	}
-
-	return 1;
-}
-
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
-{
-	return __dead_end_function(file, func, 0);
-}
-
-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	unsigned long offset;
-	struct instruction *insn;
-	int ret;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
-			continue;
-
-		for (offset = 0; offset < sec->len; offset += insn->len) {
-			insn = malloc(sizeof(*insn));
-			memset(insn, 0, sizeof(*insn));
-
-			INIT_LIST_HEAD(&insn->alts);
-			insn->sec = sec;
-			insn->offset = offset;
-
-			ret = arch_decode_instruction(file->elf, sec, offset,
-						      sec->len - offset,
-						      &insn->len, &insn->type,
-						      &insn->immediate);
-			if (ret)
-				return ret;
-
-			if (!insn->type || insn->type > INSN_LAST) {
-				WARN_FUNC("invalid instruction type %d",
-					  insn->sec, insn->offset, insn->type);
-				return -1;
-			}
-
-			hash_add(file->insn_hash, &insn->hash, insn->offset);
-			list_add_tail(&insn->list, &file->insn_list);
-		}
-
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!find_insn(file, sec, func->offset)) {
-				WARN("%s(): can't find starting instruction",
-				     func->name);
-				return -1;
-			}
-
-			func_for_each_insn(file, func, insn)
-				if (!insn->func)
-					insn->func = func;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find all uses of the unreachable() macro, which are code path dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
-	struct section *sec;
-	struct rela *rela;
-	struct instruction *insn;
-	bool found;
-
-	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
-	if (!sec)
-		return 0;
-
-	list_for_each_entry(rela, &sec->rela_list, list) {
-		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", sec->name);
-			return -1;
-		}
-		insn = find_insn(file, rela->sym->sec, rela->addend);
-		if (insn)
-			insn = list_prev_entry(insn, list);
-		else if (rela->addend == rela->sym->sec->len) {
-			found = false;
-			list_for_each_entry_reverse(insn, &file->insn_list, list) {
-				if (insn->sec == rela->sym->sec) {
-					found = true;
-					break;
-				}
-			}
-
-			if (!found) {
-				WARN("can't find unreachable insn at %s+0x%x",
-				     rela->sym->sec->name, rela->addend);
-				return -1;
-			}
-		} else {
-			WARN("can't find unreachable insn at %s+0x%x",
-			     rela->sym->sec->name, rela->addend);
-			return -1;
-		}
-
-		insn->dead_end = true;
-	}
-
-	return 0;
-}
-
-/*
- * Warnings shouldn't be reported for ignored functions.
- */
-static void add_ignores(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			if (!ignore_func(file, func))
-				continue;
-
-			func_for_each_insn(file, func, insn)
-				insn->visited = true;
-		}
-	}
-}
-
-/*
- * Find the destination instructions for all jumps.
- */
-static int add_jump_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct rela *rela;
-	struct section *dest_sec;
-	unsigned long dest_off;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		/* skip ignores */
-		if (insn->visited)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_sec = insn->sec;
-			dest_off = insn->offset + insn->len + insn->immediate;
-		} else if (rela->sym->type == STT_SECTION) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->addend + 4;
-		} else if (rela->sym->sec->idx) {
-			dest_sec = rela->sym->sec;
-			dest_off = rela->sym->sym.st_value + rela->addend + 4;
-		} else {
-			/* sibling call */
-			insn->jump_dest = 0;
-			continue;
-		}
-
-		insn->jump_dest = find_insn(file, dest_sec, dest_off);
-		if (!insn->jump_dest) {
-
-			/*
-			 * This is a special case where an alt instruction
-			 * jumps past the end of the section.  These are
-			 * handled later in handle_group_alt().
-			 */
-			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-				continue;
-
-			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
-				  insn->sec, insn->offset, dest_sec->name,
-				  dest_off);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Find the destination instructions for all calls.
- */
-static int add_call_destinations(struct objtool_file *file)
-{
-	struct instruction *insn;
-	unsigned long dest_off;
-	struct rela *rela;
-
-	for_each_insn(file, insn) {
-		if (insn->type != INSN_CALL)
-			continue;
-
-		rela = find_rela_by_dest_range(insn->sec, insn->offset,
-					       insn->len);
-		if (!rela) {
-			dest_off = insn->offset + insn->len + insn->immediate;
-			insn->call_dest = find_symbol_by_offset(insn->sec,
-								dest_off);
-			if (!insn->call_dest) {
-				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-					  insn->sec, insn->offset, dest_off);
-				return -1;
-			}
-		} else if (rela->sym->type == STT_SECTION) {
-			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
-								rela->addend+4);
-			if (!insn->call_dest ||
-			    insn->call_dest->type != STT_FUNC) {
-				WARN_FUNC("can't find call dest symbol at %s+0x%x",
-					  insn->sec, insn->offset,
-					  rela->sym->sec->name,
-					  rela->addend + 4);
-				return -1;
-			}
-		} else
-			insn->call_dest = rela->sym;
-	}
-
-	return 0;
-}
-
-/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- *    instruction at the end so that validate_branch() skips all the original
- *    replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero.  In
- *    that case the old instructions are replaced with noops.  We simulate that
- *    by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- *    conditionally jumps to the _end_ of the entry.  We have to modify these
- *    jumps' destinations to point back to .text rather than the end of the
- *    entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- *    which is a "very very small percentage of machines".
- */
-static int handle_group_alt(struct objtool_file *file,
-			    struct special_alt *special_alt,
-			    struct instruction *orig_insn,
-			    struct instruction **new_insn)
-{
-	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
-	unsigned long dest_off;
-
-	last_orig_insn = NULL;
-	insn = orig_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-			break;
-
-		if (special_alt->skip_orig)
-			insn->type = INSN_NOP;
-
-		insn->alt_group = true;
-		last_orig_insn = insn;
-	}
-
-	if (!next_insn_same_sec(file, last_orig_insn)) {
-		WARN("%s: don't know how to handle alternatives at end of section",
-		     special_alt->orig_sec->name);
-		return -1;
-	}
-
-	fake_jump = malloc(sizeof(*fake_jump));
-	if (!fake_jump) {
-		WARN("malloc failed");
-		return -1;
-	}
-	memset(fake_jump, 0, sizeof(*fake_jump));
-	INIT_LIST_HEAD(&fake_jump->alts);
-	fake_jump->sec = special_alt->new_sec;
-	fake_jump->offset = -1;
-	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-
-	if (!special_alt->new_len) {
-		*new_insn = fake_jump;
-		return 0;
-	}
-
-	last_new_insn = NULL;
-	insn = *new_insn;
-	sec_for_each_insn_from(file, insn) {
-		if (insn->offset >= special_alt->new_off + special_alt->new_len)
-			break;
-
-		last_new_insn = insn;
-
-		if (insn->type != INSN_JUMP_CONDITIONAL &&
-		    insn->type != INSN_JUMP_UNCONDITIONAL)
-			continue;
-
-		if (!insn->immediate)
-			continue;
-
-		dest_off = insn->offset + insn->len + insn->immediate;
-		if (dest_off == special_alt->new_off + special_alt->new_len)
-			insn->jump_dest = fake_jump;
-
-		if (!insn->jump_dest) {
-			WARN_FUNC("can't find alternative jump destination",
-				  insn->sec, insn->offset);
-			return -1;
-		}
-	}
-
-	if (!last_new_insn) {
-		WARN_FUNC("can't find last new alternative instruction",
-			  special_alt->new_sec, special_alt->new_off);
-		return -1;
-	}
-
-	list_add(&fake_jump->list, &last_new_insn->list);
-
-	return 0;
-}
-
-/*
- * A jump table entry can either convert a nop to a jump or a jump to a nop.
- * If the original instruction is a jump, make the alt entry an effective nop
- * by just skipping the original instruction.
- */
-static int handle_jump_alt(struct objtool_file *file,
-			   struct special_alt *special_alt,
-			   struct instruction *orig_insn,
-			   struct instruction **new_insn)
-{
-	if (orig_insn->type == INSN_NOP)
-		return 0;
-
-	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
-		WARN_FUNC("unsupported instruction at jump label",
-			  orig_insn->sec, orig_insn->offset);
-		return -1;
-	}
-
-	*new_insn = list_next_entry(orig_insn, list);
-	return 0;
-}
-
-/*
- * Read all the special sections which have alternate instructions which can be
- * patched in or redirected to at runtime.  Each instruction having alternate
- * instruction(s) has them added to its insn->alts list, which will be
- * traversed in validate_branch().
- */
-static int add_special_section_alts(struct objtool_file *file)
-{
-	struct list_head special_alts;
-	struct instruction *orig_insn, *new_insn;
-	struct special_alt *special_alt, *tmp;
-	struct alternative *alt;
-	int ret;
-
-	ret = special_get_alts(file->elf, &special_alts);
-	if (ret)
-		return ret;
-
-	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			ret = -1;
-			goto out;
-		}
-
-		orig_insn = find_insn(file, special_alt->orig_sec,
-				      special_alt->orig_off);
-		if (!orig_insn) {
-			WARN_FUNC("special: can't find orig instruction",
-				  special_alt->orig_sec, special_alt->orig_off);
-			ret = -1;
-			goto out;
-		}
-
-		new_insn = NULL;
-		if (!special_alt->group || special_alt->new_len) {
-			new_insn = find_insn(file, special_alt->new_sec,
-					     special_alt->new_off);
-			if (!new_insn) {
-				WARN_FUNC("special: can't find new instruction",
-					  special_alt->new_sec,
-					  special_alt->new_off);
-				ret = -1;
-				goto out;
-			}
-		}
-
-		if (special_alt->group) {
-			ret = handle_group_alt(file, special_alt, orig_insn,
-					       &new_insn);
-			if (ret)
-				goto out;
-		} else if (special_alt->jump_or_nop) {
-			ret = handle_jump_alt(file, special_alt, orig_insn,
-					      &new_insn);
-			if (ret)
-				goto out;
-		}
-
-		alt->insn = new_insn;
-		list_add_tail(&alt->list, &orig_insn->alts);
-
-		list_del(&special_alt->list);
-		free(special_alt);
-	}
-
-out:
-	return ret;
-}
-
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-			    struct instruction *insn, struct rela *table,
-			    struct rela *next_table)
-{
-	struct rela *rela = table;
-	struct instruction *alt_insn;
-	struct alternative *alt;
-
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
-		if (rela == next_table)
-			break;
-
-		if (rela->sym->sec != insn->sec ||
-		    rela->addend <= func->offset ||
-		    rela->addend >= func->offset + func->len)
-			break;
-
-		alt_insn = find_insn(file, insn->sec, rela->addend);
-		if (!alt_insn) {
-			WARN("%s: can't find instruction at %s+0x%x",
-			     file->rodata->rela->name, insn->sec->name,
-			     rela->addend);
-			return -1;
-		}
-
-		alt = malloc(sizeof(*alt));
-		if (!alt) {
-			WARN("malloc failed");
-			return -1;
-		}
-
-		alt->insn = alt_insn;
-		list_add_tail(&alt->list, &insn->alts);
-	}
-
-	return 0;
-}
-
-/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- *    This is the most common case by far.  It jumps to an address in a simple
- *    jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- *    This is caused by a rare GCC quirk, currently only seen in three driver
- *    functions in the kernel, only with certain obscure non-distro configs.
- *
- *    As part of an optimization, GCC makes a copy of an existing switch jump
- *    table, modifies it, and then hard-codes the jump (albeit with an indirect
- *    jump) to use a single entry in the table.  The rest of the jump table and
- *    some of its jump targets remain as dead code.
- *
- *    In such a case we can just crudely ignore all unreachable instruction
- *    warnings for the entire object file.  Ideally we would just ignore them
- *    for the function, but that would require redesigning the code quite a
- *    bit.  And honestly that's just not worth doing: unreachable instruction
- *    warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- *    ... some instructions ...
- *    jmpq *(%reg1,%reg2,8)
- *
- *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
- *    writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- *    ensure the same register is used in the mov and jump instructions.
- */
-static struct rela *find_switch_table(struct objtool_file *file,
-				      struct symbol *func,
-				      struct instruction *insn)
-{
-	struct rela *text_rela, *rodata_rela;
-	struct instruction *orig_insn = insn;
-
-	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-	if (text_rela && text_rela->sym == file->rodata->sym) {
-		/* case 1 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend);
-		if (rodata_rela)
-			return rodata_rela;
-
-		/* case 2 */
-		rodata_rela = find_rela_by_dest(file->rodata,
-						text_rela->addend + 4);
-		if (!rodata_rela)
-			return NULL;
-		file->ignore_unreachables = true;
-		return rodata_rela;
-	}
-
-	/* case 3 */
-	func_for_each_insn_continue_reverse(file, func, insn) {
-		if (insn->type == INSN_JUMP_DYNAMIC)
-			break;
-
-		/* allow small jumps within the range */
-		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
-		    insn->jump_dest &&
-		    (insn->jump_dest->offset <= insn->offset ||
-		     insn->jump_dest->offset > orig_insn->offset))
-		    break;
-
-		/* look for a relocation which references .rodata */
-		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
-						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
-			continue;
-
-		/*
-		 * Make sure the .rodata address isn't associated with a
-		 * symbol.  gcc jump tables are anonymous data.
-		 */
-		if (find_symbol_containing(file->rodata, text_rela->addend))
-			continue;
-
-		return find_rela_by_dest(file->rodata, text_rela->addend);
-	}
-
-	return NULL;
-}
-
-static int add_func_switch_tables(struct objtool_file *file,
-				  struct symbol *func)
-{
-	struct instruction *insn, *prev_jump = NULL;
-	struct rela *rela, *prev_rela = NULL;
-	int ret;
-
-	func_for_each_insn(file, func, insn) {
-		if (insn->type != INSN_JUMP_DYNAMIC)
-			continue;
-
-		rela = find_switch_table(file, func, insn);
-		if (!rela)
-			continue;
-
-		/*
-		 * We found a switch table, but we don't know yet how big it
-		 * is.  Don't add it until we reach the end of the function or
-		 * the beginning of another switch table in the same function.
-		 */
-		if (prev_jump) {
-			ret = add_switch_table(file, func, prev_jump, prev_rela,
-					       rela);
-			if (ret)
-				return ret;
-		}
-
-		prev_jump = insn;
-		prev_rela = rela;
-	}
-
-	if (prev_jump) {
-		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/*
- * For some switch statements, gcc generates a jump table in the .rodata
- * section which contains a list of addresses within the function to jump to.
- * This finds these jump tables and adds them to the insn->alts lists.
- */
-static int add_switch_table_alts(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	int ret;
-
-	if (!file->rodata || !file->rodata->rela)
-		return 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			ret = add_func_switch_tables(file, func);
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int decode_sections(struct objtool_file *file)
-{
-	int ret;
-
-	ret = decode_instructions(file);
-	if (ret)
-		return ret;
-
-	ret = add_dead_ends(file);
-	if (ret)
-		return ret;
-
-	add_ignores(file);
-
-	ret = add_jump_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_call_destinations(file);
-	if (ret)
-		return ret;
-
-	ret = add_special_section_alts(file);
-	if (ret)
-		return ret;
-
-	ret = add_switch_table_alts(file);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static bool is_fentry_call(struct instruction *insn)
-{
-	if (insn->type == INSN_CALL &&
-	    insn->call_dest->type == STT_NOTYPE &&
-	    !strcmp(insn->call_dest->name, "__fentry__"))
-		return true;
-
-	return false;
-}
-
-static bool has_modified_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) ||
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static bool has_valid_stack_frame(struct instruction *insn)
-{
-	return (insn->state & STATE_FP_SAVED) &&
-	       (insn->state & STATE_FP_SETUP);
-}
-
-static unsigned int frame_state(unsigned long state)
-{
-	return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
-}
-
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
- */
-static int validate_branch(struct objtool_file *file,
-			   struct instruction *first, unsigned char first_state)
-{
-	struct alternative *alt;
-	struct instruction *insn;
-	struct section *sec;
-	struct symbol *func = NULL;
-	unsigned char state;
-	int ret;
-
-	insn = first;
-	sec = insn->sec;
-	state = first_state;
-
-	if (insn->alt_group && list_empty(&insn->alts)) {
-		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-			  sec, insn->offset);
-		return 1;
-	}
-
-	while (1) {
-		if (file->c_file && insn->func) {
-			if (func && func != insn->func) {
-				WARN("%s() falls through to next function %s()",
-				     func->name, insn->func->name);
-				return 1;
-			}
-
-			func = insn->func;
-		}
-
-		if (insn->visited) {
-			if (frame_state(insn->state) != frame_state(state)) {
-				WARN_FUNC("frame pointer state mismatch",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-		}
-
-		insn->visited = true;
-		insn->state = state;
-
-		list_for_each_entry(alt, &insn->alts, list) {
-			ret = validate_branch(file, alt->insn, state);
-			if (ret)
-				return 1;
-		}
-
-		switch (insn->type) {
-
-		case INSN_FP_SAVE:
-			if (!nofp) {
-				if (state & STATE_FP_SAVED) {
-					WARN_FUNC("duplicate frame pointer save",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_FP_SETUP:
-			if (!nofp) {
-				if (state & STATE_FP_SETUP) {
-					WARN_FUNC("duplicate frame pointer setup",
-						  sec, insn->offset);
-					return 1;
-				}
-				state |= STATE_FP_SETUP;
-			}
-			break;
-
-		case INSN_FP_RESTORE:
-			if (!nofp) {
-				if (has_valid_stack_frame(insn))
-					state &= ~STATE_FP_SETUP;
-
-				state &= ~STATE_FP_SAVED;
-			}
-			break;
-
-		case INSN_RETURN:
-			if (!nofp && has_modified_stack_frame(insn)) {
-				WARN_FUNC("return without frame pointer restore",
-					  sec, insn->offset);
-				return 1;
-			}
-			return 0;
-
-		case INSN_CALL:
-			if (is_fentry_call(insn)) {
-				state |= STATE_FENTRY;
-				break;
-			}
-
-			ret = dead_end_function(file, insn->call_dest);
-			if (ret == 1)
-				return 0;
-			if (ret == -1)
-				return 1;
-
-			/* fallthrough */
-		case INSN_CALL_DYNAMIC:
-			if (!nofp && !has_valid_stack_frame(insn)) {
-				WARN_FUNC("call without frame pointer save/setup",
-					  sec, insn->offset);
-				return 1;
-			}
-			break;
-
-		case INSN_JUMP_CONDITIONAL:
-		case INSN_JUMP_UNCONDITIONAL:
-			if (insn->jump_dest) {
-				ret = validate_branch(file, insn->jump_dest,
-						      state);
-				if (ret)
-					return 1;
-			} else if (has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			} /* else it's a sibling call */
-
-			if (insn->type == INSN_JUMP_UNCONDITIONAL)
-				return 0;
-
-			break;
-
-		case INSN_JUMP_DYNAMIC:
-			if (list_empty(&insn->alts) &&
-			    has_modified_stack_frame(insn)) {
-				WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-					  sec, insn->offset);
-				return 1;
-			}
-
-			return 0;
-
-		default:
-			break;
-		}
-
-		if (insn->dead_end)
-			return 0;
-
-		insn = next_insn_same_sec(file, insn);
-		if (!insn) {
-			WARN("%s: unexpected end of section", sec->name);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static bool is_kasan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
-}
-
-static bool is_ubsan_insn(struct instruction *insn)
-{
-	return (insn->type == INSN_CALL &&
-		!strcmp(insn->call_dest->name,
-			"__ubsan_handle_builtin_unreachable"));
-}
-
-static bool ignore_unreachable_insn(struct symbol *func,
-				    struct instruction *insn)
-{
-	int i;
-
-	if (insn->type == INSN_NOP)
-		return true;
-
-	/*
-	 * Check if this (or a subsequent) instruction is related to
-	 * CONFIG_UBSAN or CONFIG_KASAN.
-	 *
-	 * End the search at 5 instructions to avoid going into the weeds.
-	 */
-	for (i = 0; i < 5; i++) {
-
-		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
-			return true;
-
-		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-			insn = insn->jump_dest;
-			continue;
-		}
-
-		if (insn->offset + insn->len >= func->offset + func->len)
-			break;
-		insn = list_next_entry(insn, list);
-	}
-
-	return false;
-}
-
-static int validate_functions(struct objtool_file *file)
-{
-	struct section *sec;
-	struct symbol *func;
-	struct instruction *insn;
-	int ret, warnings = 0;
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			insn = find_insn(file, sec, func->offset);
-			if (!insn)
-				continue;
-
-			ret = validate_branch(file, insn, 0);
-			warnings += ret;
-		}
-	}
-
-	list_for_each_entry(sec, &file->elf->sections, list) {
-		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			func_for_each_insn(file, func, insn) {
-				if (insn->visited)
-					continue;
-
-				insn->visited = true;
-
-				if (file->ignore_unreachables || warnings ||
-				    ignore_unreachable_insn(func, insn))
-					continue;
-
-				/*
-				 * gcov produces a lot of unreachable
-				 * instructions.  If we get an unreachable
-				 * warning and the file has gcov enabled, just
-				 * ignore it, and all other such warnings for
-				 * the file.
-				 */
-				if (!file->ignore_unreachables &&
-				    gcov_enabled(file)) {
-					file->ignore_unreachables = true;
-					continue;
-				}
-
-				WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
-				warnings++;
-			}
-		}
-	}
-
-	return warnings;
-}
-
-static int validate_uncallable_instructions(struct objtool_file *file)
-{
-	struct instruction *insn;
-	int warnings = 0;
-
-	for_each_insn(file, insn) {
-		if (!insn->visited && insn->type == INSN_RETURN) {
-			WARN_FUNC("return instruction outside of a callable function",
-				  insn->sec, insn->offset);
-			warnings++;
-		}
-	}
-
-	return warnings;
-}
-
-static void cleanup(struct objtool_file *file)
-{
-	struct instruction *insn, *tmpinsn;
-	struct alternative *alt, *tmpalt;
-
-	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
-		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
-			list_del(&alt->list);
-			free(alt);
-		}
-		list_del(&insn->list);
-		hash_del(&insn->hash);
-		free(insn);
-	}
-	elf_close(file->elf);
-}
-
-const char * const check_usage[] = {
+static const char * const check_usage[] = {
 	"objtool check [<options>] file.o",
 	NULL,
 };
 
+const struct option check_options[] = {
+	OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+	OPT_END(),
+};
+
 int cmd_check(int argc, const char **argv)
 {
-	struct objtool_file file;
-	int ret, warnings = 0;
+	const char *objname;
 
-	const struct option options[] = {
-		OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
-		OPT_END(),
-	};
-
-	argc = parse_options(argc, argv, options, check_usage, 0);
+	argc = parse_options(argc, argv, check_options, check_usage, 0);
 
 	if (argc != 1)
-		usage_with_options(check_usage, options);
+		usage_with_options(check_usage, check_options);
 
 	objname = argv[0];
 
-	file.elf = elf_open(objname);
-	if (!file.elf) {
-		fprintf(stderr, "error reading elf file %s\n", objname);
-		return 1;
-	}
-
-	INIT_LIST_HEAD(&file.insn_list);
-	hash_init(file.insn_hash);
-	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
-	file.ignore_unreachables = false;
-	file.c_file = find_section_by_name(file.elf, ".comment");
-
-	ret = decode_sections(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_functions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-	ret = validate_uncallable_instructions(&file);
-	if (ret < 0)
-		goto out;
-	warnings += ret;
-
-out:
-	cleanup(&file);
-
-	/* ignore warnings for now until we get all the code cleaned up */
-	if (ret || warnings)
-		return 0;
-	return 0;
+	return check(objname, nofp);
 }

diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
new file mode 100644
index 0000000..443ab2c
--- /dev/null
+++ b/tools/objtool/cfi.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED		-1
+#define CFI_CFA			-2
+#define CFI_SP_INDIRECT		-3
+#define CFI_BP_INDIRECT		-4
+
+#define CFI_AX			0
+#define CFI_DX			1
+#define CFI_CX			2
+#define CFI_BX			3
+#define CFI_SI			4
+#define CFI_DI			5
+#define CFI_BP			6
+#define CFI_SP			7
+#define CFI_R8			8
+#define CFI_R9			9
+#define CFI_R10			10
+#define CFI_R11			11
+#define CFI_R12			12
+#define CFI_R13			13
+#define CFI_R14			14
+#define CFI_R15			15
+#define CFI_RA			16
+#define CFI_NUM_REGS	17
+
+struct cfi_reg {
+	int base;
+	int offset;
+};
+
+struct cfi_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
new file mode 100644
index 0000000..fea2221
--- /dev/null
+++ b/tools/objtool/check.c

@@ -0,0 +1,1655 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+
+struct alternative {
+	struct list_head list;
+	struct instruction *insn;
+};
+
+const char *objname;
+static bool nofp;
+struct cfi_state initial_func_cfi;
+
+static struct instruction *find_insn(struct objtool_file *file,
+				     struct section *sec, unsigned long offset)
+{
+	struct instruction *insn;
+
+	hash_for_each_possible(file->insn_hash, insn, hash, offset)
+		if (insn->sec == sec && insn->offset == offset)
+			return insn;
+
+	return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+					      struct instruction *insn)
+{
+	struct instruction *next = list_next_entry(insn, list);
+
+	if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+		return NULL;
+
+	return next;
+}
+
+static bool gcov_enabled(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *sym;
+
+	for_each_sec(file, sec)
+		list_for_each_entry(sym, &sec->symbol_list, list)
+			if (!strncmp(sym->name, "__gcov_.", 8))
+				return true;
+
+	return false;
+}
+
+#define func_for_each_insn(file, func, insn)				\
+	for (insn = find_insn(file, func->sec, func->offset);		\
+	     insn && &insn->list != &file->insn_list &&			\
+		insn->sec == func->sec &&				\
+		insn->offset < func->offset + func->len;		\
+	     insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn)		\
+	for (insn = list_prev_entry(insn, list);			\
+	     &insn->list != &file->insn_list &&				\
+		insn->sec == func->sec && insn->offset >= func->offset;	\
+	     insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn)				\
+	for (; insn; insn = next_insn_same_sec(file, insn))
+
+#define sec_for_each_insn_continue(file, insn)				\
+	for (insn = next_insn_same_sec(file, insn); insn;		\
+	     insn = next_insn_same_sec(file, insn))
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+	struct rela *rela;
+	struct instruction *insn;
+
+	/* check for STACK_FRAME_NON_STANDARD */
+	if (file->whitelist && file->whitelist->rela)
+		list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+			if (rela->sym->type == STT_SECTION &&
+			    rela->sym->sec == func->sec &&
+			    rela->addend == func->offset)
+				return true;
+			if (rela->sym->type == STT_FUNC && rela->sym == func)
+				return true;
+		}
+
+	/* check if it has a context switching instruction */
+	func_for_each_insn(file, func, insn)
+		if (insn->type == INSN_CONTEXT_SWITCH)
+			return true;
+
+	return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ *  -1: error
+ *   0: no dead end
+ *   1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+			       int recursion)
+{
+	int i;
+	struct instruction *insn;
+	bool empty = true;
+
+	/*
+	 * Unfortunately these have to be hard coded because the noreturn
+	 * attribute isn't provided in ELF data.
+	 */
+	static const char * const global_noreturns[] = {
+		"__stack_chk_fail",
+		"panic",
+		"do_exit",
+		"do_task_dead",
+		"__module_put_and_exit",
+		"complete_and_exit",
+		"kvm_spurious_fault",
+		"__reiserfs_panic",
+		"lbug_with_loc",
+		"fortify_panic",
+	};
+
+	if (func->bind == STB_WEAK)
+		return 0;
+
+	if (func->bind == STB_GLOBAL)
+		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+			if (!strcmp(func->name, global_noreturns[i]))
+				return 1;
+
+	if (!func->sec)
+		return 0;
+
+	func_for_each_insn(file, func, insn) {
+		empty = false;
+
+		if (insn->type == INSN_RETURN)
+			return 0;
+	}
+
+	if (empty)
+		return 0;
+
+	/*
+	 * A function can have a sibling call instead of a return.  In that
+	 * case, the function's dead-end status depends on whether the target
+	 * of the sibling call returns.
+	 */
+	func_for_each_insn(file, func, insn) {
+		if (insn->sec != func->sec ||
+		    insn->offset >= func->offset + func->len)
+			break;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+			struct instruction *dest = insn->jump_dest;
+			struct symbol *dest_func;
+
+			if (!dest)
+				/* sibling call to another file */
+				return 0;
+
+			if (dest->sec != func->sec ||
+			    dest->offset < func->offset ||
+			    dest->offset >= func->offset + func->len) {
+				/* local sibling call */
+				dest_func = find_symbol_by_offset(dest->sec,
+								  dest->offset);
+				if (!dest_func)
+					continue;
+
+				if (recursion == 5) {
+					WARN_FUNC("infinite recursion (objtool bug!)",
+						  dest->sec, dest->offset);
+					return -1;
+				}
+
+				return __dead_end_function(file, dest_func,
+							   recursion + 1);
+			}
+		}
+
+		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+			/* sibling call */
+			return 0;
+	}
+
+	return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+	return __dead_end_function(file, func, 0);
+}
+
+static void clear_insn_state(struct insn_state *state)
+{
+	int i;
+
+	memset(state, 0, sizeof(*state));
+	state->cfa.base = CFI_UNDEFINED;
+	for (i = 0; i < CFI_NUM_REGS; i++)
+		state->regs[i].base = CFI_UNDEFINED;
+	state->drap_reg = CFI_UNDEFINED;
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	unsigned long offset;
+	struct instruction *insn;
+	int ret;
+
+	for_each_sec(file, sec) {
+
+		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+			continue;
+
+		for (offset = 0; offset < sec->len; offset += insn->len) {
+			insn = malloc(sizeof(*insn));
+			if (!insn) {
+				WARN("malloc failed");
+				return -1;
+			}
+			memset(insn, 0, sizeof(*insn));
+			INIT_LIST_HEAD(&insn->alts);
+			clear_insn_state(&insn->state);
+
+			insn->sec = sec;
+			insn->offset = offset;
+
+			ret = arch_decode_instruction(file->elf, sec, offset,
+						      sec->len - offset,
+						      &insn->len, &insn->type,
+						      &insn->immediate,
+						      &insn->stack_op);
+			if (ret)
+				return ret;
+
+			if (!insn->type || insn->type > INSN_LAST) {
+				WARN_FUNC("invalid instruction type %d",
+					  insn->sec, insn->offset, insn->type);
+				return -1;
+			}
+
+			hash_add(file->insn_hash, &insn->hash, insn->offset);
+			list_add_tail(&insn->list, &file->insn_list);
+		}
+
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!find_insn(file, sec, func->offset)) {
+				WARN("%s(): can't find starting instruction",
+				     func->name);
+				return -1;
+			}
+
+			func_for_each_insn(file, func, insn)
+				if (!insn->func)
+					insn->func = func;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+	bool found;
+
+	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in %s", sec->name);
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find unreachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find unreachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = true;
+	}
+
+	return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!ignore_func(file, func))
+				continue;
+
+			func_for_each_insn(file, func, insn)
+				insn->ignore = true;
+		}
+	}
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct rela *rela;
+	struct section *dest_sec;
+	unsigned long dest_off;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		if (insn->ignore)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_sec = insn->sec;
+			dest_off = insn->offset + insn->len + insn->immediate;
+		} else if (rela->sym->type == STT_SECTION) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->addend + 4;
+		} else if (rela->sym->sec->idx) {
+			dest_sec = rela->sym->sec;
+			dest_off = rela->sym->sym.st_value + rela->addend + 4;
+		} else {
+			/* sibling call */
+			insn->jump_dest = 0;
+			continue;
+		}
+
+		insn->jump_dest = find_insn(file, dest_sec, dest_off);
+		if (!insn->jump_dest) {
+
+			/*
+			 * This is a special case where an alt instruction
+			 * jumps past the end of the section.  These are
+			 * handled later in handle_group_alt().
+			 */
+			if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+				continue;
+
+			WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+				  insn->sec, insn->offset, dest_sec->name,
+				  dest_off);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+	struct instruction *insn;
+	unsigned long dest_off;
+	struct rela *rela;
+
+	for_each_insn(file, insn) {
+		if (insn->type != INSN_CALL)
+			continue;
+
+		rela = find_rela_by_dest_range(insn->sec, insn->offset,
+					       insn->len);
+		if (!rela) {
+			dest_off = insn->offset + insn->len + insn->immediate;
+			insn->call_dest = find_symbol_by_offset(insn->sec,
+								dest_off);
+			if (!insn->call_dest) {
+				WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+					  insn->sec, insn->offset, dest_off);
+				return -1;
+			}
+		} else if (rela->sym->type == STT_SECTION) {
+			insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+								rela->addend+4);
+			if (!insn->call_dest ||
+			    insn->call_dest->type != STT_FUNC) {
+				WARN_FUNC("can't find call dest symbol at %s+0x%x",
+					  insn->sec, insn->offset,
+					  rela->sym->sec->name,
+					  rela->addend + 4);
+				return -1;
+			}
+		} else
+			insn->call_dest = rela->sym;
+	}
+
+	return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ *    instruction at the end so that validate_branch() skips all the original
+ *    replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero.  In
+ *    that case the old instructions are replaced with noops.  We simulate that
+ *    by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ *    conditionally jumps to the _end_ of the entry.  We have to modify these
+ *    jumps' destinations to point back to .text rather than the end of the
+ *    entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ *    which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+			    struct special_alt *special_alt,
+			    struct instruction *orig_insn,
+			    struct instruction **new_insn)
+{
+	struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+	unsigned long dest_off;
+
+	last_orig_insn = NULL;
+	insn = orig_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+			break;
+
+		if (special_alt->skip_orig)
+			insn->type = INSN_NOP;
+
+		insn->alt_group = true;
+		last_orig_insn = insn;
+	}
+
+	if (!next_insn_same_sec(file, last_orig_insn)) {
+		WARN("%s: don't know how to handle alternatives at end of section",
+		     special_alt->orig_sec->name);
+		return -1;
+	}
+
+	fake_jump = malloc(sizeof(*fake_jump));
+	if (!fake_jump) {
+		WARN("malloc failed");
+		return -1;
+	}
+	memset(fake_jump, 0, sizeof(*fake_jump));
+	INIT_LIST_HEAD(&fake_jump->alts);
+	clear_insn_state(&fake_jump->state);
+
+	fake_jump->sec = special_alt->new_sec;
+	fake_jump->offset = -1;
+	fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+	fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+	fake_jump->ignore = true;
+
+	if (!special_alt->new_len) {
+		*new_insn = fake_jump;
+		return 0;
+	}
+
+	last_new_insn = NULL;
+	insn = *new_insn;
+	sec_for_each_insn_from(file, insn) {
+		if (insn->offset >= special_alt->new_off + special_alt->new_len)
+			break;
+
+		last_new_insn = insn;
+
+		if (insn->type != INSN_JUMP_CONDITIONAL &&
+		    insn->type != INSN_JUMP_UNCONDITIONAL)
+			continue;
+
+		if (!insn->immediate)
+			continue;
+
+		dest_off = insn->offset + insn->len + insn->immediate;
+		if (dest_off == special_alt->new_off + special_alt->new_len)
+			insn->jump_dest = fake_jump;
+
+		if (!insn->jump_dest) {
+			WARN_FUNC("can't find alternative jump destination",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+	}
+
+	if (!last_new_insn) {
+		WARN_FUNC("can't find last new alternative instruction",
+			  special_alt->new_sec, special_alt->new_off);
+		return -1;
+	}
+
+	list_add(&fake_jump->list, &last_new_insn->list);
+
+	return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+			   struct special_alt *special_alt,
+			   struct instruction *orig_insn,
+			   struct instruction **new_insn)
+{
+	if (orig_insn->type == INSN_NOP)
+		return 0;
+
+	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+		WARN_FUNC("unsupported instruction at jump label",
+			  orig_insn->sec, orig_insn->offset);
+		return -1;
+	}
+
+	*new_insn = list_next_entry(orig_insn, list);
+	return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime.  Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+	struct list_head special_alts;
+	struct instruction *orig_insn, *new_insn;
+	struct special_alt *special_alt, *tmp;
+	struct alternative *alt;
+	int ret;
+
+	ret = special_get_alts(file->elf, &special_alts);
+	if (ret)
+		return ret;
+
+	list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			ret = -1;
+			goto out;
+		}
+
+		orig_insn = find_insn(file, special_alt->orig_sec,
+				      special_alt->orig_off);
+		if (!orig_insn) {
+			WARN_FUNC("special: can't find orig instruction",
+				  special_alt->orig_sec, special_alt->orig_off);
+			ret = -1;
+			goto out;
+		}
+
+		new_insn = NULL;
+		if (!special_alt->group || special_alt->new_len) {
+			new_insn = find_insn(file, special_alt->new_sec,
+					     special_alt->new_off);
+			if (!new_insn) {
+				WARN_FUNC("special: can't find new instruction",
+					  special_alt->new_sec,
+					  special_alt->new_off);
+				ret = -1;
+				goto out;
+			}
+		}
+
+		if (special_alt->group) {
+			ret = handle_group_alt(file, special_alt, orig_insn,
+					       &new_insn);
+			if (ret)
+				goto out;
+		} else if (special_alt->jump_or_nop) {
+			ret = handle_jump_alt(file, special_alt, orig_insn,
+					      &new_insn);
+			if (ret)
+				goto out;
+		}
+
+		alt->insn = new_insn;
+		list_add_tail(&alt->list, &orig_insn->alts);
+
+		list_del(&special_alt->list);
+		free(special_alt);
+	}
+
+out:
+	return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+			    struct instruction *insn, struct rela *table,
+			    struct rela *next_table)
+{
+	struct rela *rela = table;
+	struct instruction *alt_insn;
+	struct alternative *alt;
+
+	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+		if (rela == next_table)
+			break;
+
+		if (rela->sym->sec != insn->sec ||
+		    rela->addend <= func->offset ||
+		    rela->addend >= func->offset + func->len)
+			break;
+
+		alt_insn = find_insn(file, insn->sec, rela->addend);
+		if (!alt_insn) {
+			WARN("%s: can't find instruction at %s+0x%x",
+			     file->rodata->rela->name, insn->sec->name,
+			     rela->addend);
+			return -1;
+		}
+
+		alt = malloc(sizeof(*alt));
+		if (!alt) {
+			WARN("malloc failed");
+			return -1;
+		}
+
+		alt->insn = alt_insn;
+		list_add_tail(&alt->list, &insn->alts);
+	}
+
+	return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ *    This is the most common case by far.  It jumps to an address in a simple
+ *    jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ *    This is caused by a rare GCC quirk, currently only seen in three driver
+ *    functions in the kernel, only with certain obscure non-distro configs.
+ *
+ *    As part of an optimization, GCC makes a copy of an existing switch jump
+ *    table, modifies it, and then hard-codes the jump (albeit with an indirect
+ *    jump) to use a single entry in the table.  The rest of the jump table and
+ *    some of its jump targets remain as dead code.
+ *
+ *    In such a case we can just crudely ignore all unreachable instruction
+ *    warnings for the entire object file.  Ideally we would just ignore them
+ *    for the function, but that would require redesigning the code quite a
+ *    bit.  And honestly that's just not worth doing: unreachable instruction
+ *    warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ *    ... some instructions ...
+ *    jmpq *(%reg1,%reg2,8)
+ *
+ *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
+ *    writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ *    ensure the same register is used in the mov and jump instructions.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+				      struct symbol *func,
+				      struct instruction *insn)
+{
+	struct rela *text_rela, *rodata_rela;
+	struct instruction *orig_insn = insn;
+
+	text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+	if (text_rela && text_rela->sym == file->rodata->sym) {
+		/* case 1 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend);
+		if (rodata_rela)
+			return rodata_rela;
+
+		/* case 2 */
+		rodata_rela = find_rela_by_dest(file->rodata,
+						text_rela->addend + 4);
+		if (!rodata_rela)
+			return NULL;
+		file->ignore_unreachables = true;
+		return rodata_rela;
+	}
+
+	/* case 3 */
+	func_for_each_insn_continue_reverse(file, func, insn) {
+		if (insn->type == INSN_JUMP_DYNAMIC)
+			break;
+
+		/* allow small jumps within the range */
+		if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+		    insn->jump_dest &&
+		    (insn->jump_dest->offset <= insn->offset ||
+		     insn->jump_dest->offset > orig_insn->offset))
+		    break;
+
+		/* look for a relocation which references .rodata */
+		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+						    insn->len);
+		if (!text_rela || text_rela->sym != file->rodata->sym)
+			continue;
+
+		/*
+		 * Make sure the .rodata address isn't associated with a
+		 * symbol.  gcc jump tables are anonymous data.
+		 */
+		if (find_symbol_containing(file->rodata, text_rela->addend))
+			continue;
+
+		return find_rela_by_dest(file->rodata, text_rela->addend);
+	}
+
+	return NULL;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+				  struct symbol *func)
+{
+	struct instruction *insn, *prev_jump = NULL;
+	struct rela *rela, *prev_rela = NULL;
+	int ret;
+
+	func_for_each_insn(file, func, insn) {
+		if (insn->type != INSN_JUMP_DYNAMIC)
+			continue;
+
+		rela = find_switch_table(file, func, insn);
+		if (!rela)
+			continue;
+
+		/*
+		 * We found a switch table, but we don't know yet how big it
+		 * is.  Don't add it until we reach the end of the function or
+		 * the beginning of another switch table in the same function.
+		 */
+		if (prev_jump) {
+			ret = add_switch_table(file, func, prev_jump, prev_rela,
+					       rela);
+			if (ret)
+				return ret;
+		}
+
+		prev_jump = insn;
+		prev_rela = rela;
+	}
+
+	if (prev_jump) {
+		ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	int ret;
+
+	if (!file->rodata || !file->rodata->rela)
+		return 0;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			ret = add_func_switch_tables(file, func);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+	int ret;
+
+	ret = decode_instructions(file);
+	if (ret)
+		return ret;
+
+	ret = add_dead_ends(file);
+	if (ret)
+		return ret;
+
+	add_ignores(file);
+
+	ret = add_jump_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_call_destinations(file);
+	if (ret)
+		return ret;
+
+	ret = add_special_section_alts(file);
+	if (ret)
+		return ret;
+
+	ret = add_switch_table_alts(file);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+	if (insn->type == INSN_CALL &&
+	    insn->call_dest->type == STT_NOTYPE &&
+	    !strcmp(insn->call_dest->name, "__fentry__"))
+		return true;
+
+	return false;
+}
+
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+	int i;
+
+	if (state->cfa.base != initial_func_cfi.cfa.base ||
+	    state->cfa.offset != initial_func_cfi.cfa.offset ||
+	    state->stack_size != initial_func_cfi.cfa.offset ||
+	    state->drap)
+		return true;
+
+	for (i = 0; i < CFI_NUM_REGS; i++)
+		if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+		    state->regs[i].offset != initial_func_cfi.regs[i].offset)
+			return true;
+
+	return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+	if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+	    state->regs[CFI_BP].offset == -16)
+		return true;
+
+	if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+		return true;
+
+	return false;
+}
+
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+		     int offset)
+{
+	if ((arch_callee_saved_reg(reg) ||
+	    (state->drap && reg == state->drap_reg)) &&
+	    state->regs[reg].base == CFI_UNDEFINED) {
+		state->regs[reg].base = base;
+		state->regs[reg].offset = offset;
+	}
+}
+
+static void restore_reg(struct insn_state *state, unsigned char reg)
+{
+	state->regs[reg].base = CFI_UNDEFINED;
+	state->regs[reg].offset = 0;
+}
+
+/*
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack.  r10 or r13 is used as the DRAP
+ * register.  The typical DRAP pattern is:
+ *
+ *   4c 8d 54 24 08		lea    0x8(%rsp),%r10
+ *   48 83 e4 c0		and    $0xffffffffffffffc0,%rsp
+ *   41 ff 72 f8		pushq  -0x8(%r10)
+ *   55				push   %rbp
+ *   48 89 e5			mov    %rsp,%rbp
+ *				(more pushes)
+ *   41 52			push   %r10
+ *				...
+ *   41 5a			pop    %r10
+ *				(more pops)
+ *   5d				pop    %rbp
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ *   5b				pop    %rbx
+ *   41 5a			pop    %r10
+ *   41 5c			pop    %r12
+ *   41 5d			pop    %r13
+ *   41 5e			pop    %r14
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * and:
+ *
+ *   4c 8b 55 e8		mov    -0x18(%rbp),%r10
+ *   48 8b 5d e0		mov    -0x20(%rbp),%rbx
+ *   4c 8b 65 f0		mov    -0x10(%rbp),%r12
+ *   4c 8b 6d f8		mov    -0x8(%rbp),%r13
+ *   c9				leaveq
+ *   49 8d 62 f8		lea    -0x8(%r10),%rsp
+ *   c3				retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ *   41 55			push   %r13
+ *   4c 8d 6c 24 10		lea    0x10(%rsp),%r13
+ *   48 83 e4 f0		and    $0xfffffffffffffff0,%rsp
+ *				...
+ *   49 8d 65 f0		lea    -0x10(%r13),%rsp
+ *   41 5d			pop    %r13
+ *   c3				retq
+ */
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
+{
+	struct stack_op *op = &insn->stack_op;
+	struct cfi_reg *cfa = &state->cfa;
+	struct cfi_reg *regs = state->regs;
+
+	/* stack operations don't make sense with an undefined CFA */
+	if (cfa->base == CFI_UNDEFINED) {
+		if (insn->func) {
+			WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+			return -1;
+		}
+		return 0;
+	}
+
+	switch (op->dest.type) {
+
+	case OP_DEST_REG:
+		switch (op->src.type) {
+
+		case OP_SRC_REG:
+			if (cfa->base == op->src.reg && cfa->base == CFI_SP &&
+			    op->dest.reg == CFI_BP && regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			} else if (state->drap) {
+
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			} else if (!nofp) {
+
+				WARN_FUNC("unknown stack-related register move",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			break;
+
+		case OP_SRC_ADD:
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+				/* add imm, %rsp */
+				state->stack_size -= op->src.offset;
+				if (cfa->base == CFI_SP)
+					cfa->offset -= op->src.offset;
+				break;
+			}
+
+			if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+				/* lea disp(%rbp), %rsp */
+				state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+				break;
+			}
+
+			if (op->dest.reg != CFI_BP && op->src.reg == CFI_SP &&
+			    cfa->base == CFI_SP) {
+
+				/* drap: lea disp(%rsp), %drap */
+				state->drap_reg = op->dest.reg;
+				break;
+			}
+
+			if (state->drap && op->dest.reg == CFI_SP &&
+			    op->src.reg == state->drap_reg) {
+
+				 /* drap: lea disp(%drap), %rsp */
+				cfa->base = CFI_SP;
+				cfa->offset = state->stack_size = -op->src.offset;
+				state->drap_reg = CFI_UNDEFINED;
+				state->drap = false;
+				break;
+			}
+
+			if (op->dest.reg == state->cfa.base) {
+				WARN_FUNC("unsupported stack register modification",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			break;
+
+		case OP_SRC_AND:
+			if (op->dest.reg != CFI_SP ||
+			    (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+			    (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+				WARN_FUNC("unsupported stack pointer realignment",
+					  insn->sec, insn->offset);
+				return -1;
+			}
+
+			if (state->drap_reg != CFI_UNDEFINED) {
+				/* drap: and imm, %rsp */
+				cfa->base = state->drap_reg;
+				cfa->offset = state->stack_size = 0;
+				state->drap = true;
+
+			}
+
+			/*
+			 * Older versions of GCC (4.8ish) realign the stack
+			 * without DRAP, with a frame pointer.
+			 */
+
+			break;
+
+		case OP_SRC_POP:
+			if (!state->drap && op->dest.type == OP_DEST_REG &&
+			    op->dest.reg == cfa->base) {
+
+				/* pop %rbp */
+				cfa->base = CFI_SP;
+			}
+
+			if (regs[op->dest.reg].offset == -state->stack_size) {
+
+				if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+				    op->dest.type == OP_DEST_REG &&
+				    op->dest.reg == state->drap_reg) {
+
+					/* drap: pop %drap */
+					cfa->base = state->drap_reg;
+					cfa->offset = 0;
+				}
+
+				restore_reg(state, op->dest.reg);
+			}
+
+			state->stack_size -= 8;
+			if (cfa->base == CFI_SP)
+				cfa->offset -= 8;
+
+			break;
+
+		case OP_SRC_REG_INDIRECT:
+			if (state->drap && op->src.reg == CFI_BP &&
+			    op->src.offset == regs[op->dest.reg].offset) {
+
+				/* drap: mov disp(%rbp), %reg */
+				if (op->dest.reg == state->drap_reg) {
+					cfa->base = state->drap_reg;
+					cfa->offset = 0;
+				}
+
+				restore_reg(state, op->dest.reg);
+
+			} else if (op->src.reg == cfa->base &&
+			    op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+				/* mov disp(%rbp), %reg */
+				/* mov disp(%rsp), %reg */
+				restore_reg(state, op->dest.reg);
+			}
+
+			break;
+
+		default:
+			WARN_FUNC("unknown stack-related instruction",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		break;
+
+	case OP_DEST_PUSH:
+		state->stack_size += 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset += 8;
+
+		if (op->src.type != OP_SRC_REG)
+			break;
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: push %drap */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = -state->stack_size;
+
+				/* save drap so we know when to undefine it */
+				save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+
+			} else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+				/* drap: push %rbp */
+				state->stack_size = 0;
+
+			} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: push %reg */
+				save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+			}
+
+		} else {
+
+			/* push %reg */
+			save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+		}
+
+		/* detect when asm code uses rbp as a scratch register */
+		if (!nofp && insn->func && op->src.reg == CFI_BP &&
+		    cfa->base != CFI_BP)
+			state->bp_scratch = true;
+		break;
+
+	case OP_DEST_REG_INDIRECT:
+
+		if (state->drap) {
+			if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+				/* drap: mov %drap, disp(%rbp) */
+				cfa->base = CFI_BP_INDIRECT;
+				cfa->offset = op->dest.offset;
+
+				/* save drap so we know when to undefine it */
+				save_reg(state, op->src.reg, CFI_CFA, op->dest.offset);
+			}
+
+			else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+				/* drap: mov reg, disp(%rbp) */
+				save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+			}
+
+		} else if (op->dest.reg == cfa->base) {
+
+			/* mov reg, disp(%rbp) */
+			/* mov reg, disp(%rsp) */
+			save_reg(state, op->src.reg, CFI_CFA,
+				 op->dest.offset - state->cfa.offset);
+		}
+
+		break;
+
+	case OP_DEST_LEAVE:
+		if ((!state->drap && cfa->base != CFI_BP) ||
+		    (state->drap && cfa->base != state->drap_reg)) {
+			WARN_FUNC("leave instruction with modified stack frame",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* leave (mov %rbp, %rsp; pop %rbp) */
+
+		state->stack_size = -state->regs[CFI_BP].offset - 8;
+		restore_reg(state, CFI_BP);
+
+		if (!state->drap) {
+			cfa->base = CFI_SP;
+			cfa->offset -= 8;
+		}
+
+		break;
+
+	case OP_DEST_MEM:
+		if (op->src.type != OP_SRC_POP) {
+			WARN_FUNC("unknown stack-related memory operation",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
+		/* pop mem */
+		state->stack_size -= 8;
+		if (cfa->base == CFI_SP)
+			cfa->offset -= 8;
+
+		break;
+
+	default:
+		WARN_FUNC("unknown stack-related instruction",
+			  insn->sec, insn->offset);
+		return -1;
+	}
+
+	return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+	struct insn_state *state1 = &insn->state, *state2 = state;
+	int i;
+
+	if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+		WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+			  insn->sec, insn->offset,
+			  state1->cfa.base, state1->cfa.offset,
+			  state2->cfa.base, state2->cfa.offset);
+
+	} else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+		for (i = 0; i < CFI_NUM_REGS; i++) {
+			if (!memcmp(&state1->regs[i], &state2->regs[i],
+				    sizeof(struct cfi_reg)))
+				continue;
+
+			WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+				  insn->sec, insn->offset,
+				  i, state1->regs[i].base, state1->regs[i].offset,
+				  i, state2->regs[i].base, state2->regs[i].offset);
+			break;
+		}
+
+	} else if (state1->drap != state2->drap ||
+		 (state1->drap && state1->drap_reg != state2->drap_reg)) {
+		WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
+			  insn->sec, insn->offset,
+			  state1->drap, state1->drap_reg,
+			  state2->drap, state2->drap_reg);
+
+	} else
+		return true;
+
+	return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+			   struct insn_state state)
+{
+	struct alternative *alt;
+	struct instruction *insn;
+	struct section *sec;
+	struct symbol *func = NULL;
+	int ret;
+
+	insn = first;
+	sec = insn->sec;
+
+	if (insn->alt_group && list_empty(&insn->alts)) {
+		WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+			  sec, insn->offset);
+		return -1;
+	}
+
+	while (1) {
+		if (file->c_file && insn->func) {
+			if (func && func != insn->func) {
+				WARN("%s() falls through to next function %s()",
+				     func->name, insn->func->name);
+				return 1;
+			}
+		}
+
+		func = insn->func;
+
+		if (insn->visited) {
+			if (!!insn_state_match(insn, &state))
+				return 1;
+
+			return 0;
+		}
+
+		insn->state = state;
+
+		insn->visited = true;
+
+		list_for_each_entry(alt, &insn->alts, list) {
+			ret = validate_branch(file, alt->insn, state);
+			if (ret)
+				return 1;
+		}
+
+		switch (insn->type) {
+
+		case INSN_RETURN:
+			if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("return with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			if (state.bp_scratch) {
+				WARN("%s uses BP as a scratch register",
+				     insn->func->name);
+				return 1;
+			}
+
+			return 0;
+
+		case INSN_CALL:
+			if (is_fentry_call(insn))
+				break;
+
+			ret = dead_end_function(file, insn->call_dest);
+			if (ret == 1)
+				return 0;
+			if (ret == -1)
+				return 1;
+
+			/* fallthrough */
+		case INSN_CALL_DYNAMIC:
+			if (!nofp && func && !has_valid_stack_frame(&state)) {
+				WARN_FUNC("call without frame pointer save/setup",
+					  sec, insn->offset);
+				return 1;
+			}
+			break;
+
+		case INSN_JUMP_CONDITIONAL:
+		case INSN_JUMP_UNCONDITIONAL:
+			if (insn->jump_dest) {
+				ret = validate_branch(file, insn->jump_dest,
+						      state);
+				if (ret)
+					return 1;
+			} else if (func && has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			} /* else it's a sibling call */
+
+			if (insn->type == INSN_JUMP_UNCONDITIONAL)
+				return 0;
+
+			break;
+
+		case INSN_JUMP_DYNAMIC:
+			if (func && list_empty(&insn->alts) &&
+			    has_modified_stack_frame(&state)) {
+				WARN_FUNC("sibling call from callable instruction with modified stack frame",
+					  sec, insn->offset);
+				return 1;
+			}
+
+			return 0;
+
+		case INSN_STACK:
+			if (update_insn_state(insn, &state))
+				return -1;
+
+			break;
+
+		default:
+			break;
+		}
+
+		if (insn->dead_end)
+			return 0;
+
+		insn = next_insn_same_sec(file, insn);
+		if (!insn) {
+			WARN("%s: unexpected end of section", sec->name);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+	return (insn->type == INSN_CALL &&
+		!strcmp(insn->call_dest->name,
+			"__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct instruction *insn)
+{
+	int i;
+
+	if (insn->ignore || insn->type == INSN_NOP)
+		return true;
+
+	/*
+	 * Ignore any unused exceptions.  This can happen when a whitelisted
+	 * function has an exception table entry.
+	 */
+	if (!strcmp(insn->sec->name, ".fixup"))
+		return true;
+
+	/*
+	 * Check if this (or a subsequent) instruction is related to
+	 * CONFIG_UBSAN or CONFIG_KASAN.
+	 *
+	 * End the search at 5 instructions to avoid going into the weeds.
+	 */
+	if (!insn->func)
+		return false;
+	for (i = 0; i < 5; i++) {
+
+		if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+			return true;
+
+		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+			insn = insn->jump_dest;
+			continue;
+		}
+
+		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+			break;
+		insn = list_next_entry(insn, list);
+	}
+
+	return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *func;
+	struct instruction *insn;
+	struct insn_state state;
+	int ret, warnings = 0;
+
+	clear_insn_state(&state);
+
+	state.cfa = initial_func_cfi.cfa;
+	memcpy(&state.regs, &initial_func_cfi.regs,
+	       CFI_NUM_REGS * sizeof(struct cfi_reg));
+	state.stack_size = initial_func_cfi.cfa.offset;
+
+	for_each_sec(file, sec) {
+		list_for_each_entry(func, &sec->symbol_list, list) {
+			if (func->type != STT_FUNC)
+				continue;
+
+			insn = find_insn(file, sec, func->offset);
+			if (!insn || insn->ignore)
+				continue;
+
+			ret = validate_branch(file, insn, state);
+			warnings += ret;
+		}
+	}
+
+	return warnings;
+}
+
+static int validate_reachable_instructions(struct objtool_file *file)
+{
+	struct instruction *insn;
+
+	if (file->ignore_unreachables)
+		return 0;
+
+	for_each_insn(file, insn) {
+		if (insn->visited || ignore_unreachable_insn(insn))
+			continue;
+
+		/*
+		 * gcov produces a lot of unreachable instructions.  If we get
+		 * an unreachable warning and the file has gcov enabled, just
+		 * ignore it, and all other such warnings for the file.  Do
+		 * this here because this is an expensive function.
+		 */
+		if (gcov_enabled(file))
+			return 0;
+
+		WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+	struct instruction *insn, *tmpinsn;
+	struct alternative *alt, *tmpalt;
+
+	list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+		list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+			list_del(&alt->list);
+			free(alt);
+		}
+		list_del(&insn->list);
+		hash_del(&insn->hash);
+		free(insn);
+	}
+	elf_close(file->elf);
+}
+
+int check(const char *_objname, bool _nofp)
+{
+	struct objtool_file file;
+	int ret, warnings = 0;
+
+	objname = _objname;
+	nofp = _nofp;
+
+	file.elf = elf_open(objname);
+	if (!file.elf)
+		return 1;
+
+	INIT_LIST_HEAD(&file.insn_list);
+	hash_init(file.insn_hash);
+	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+	file.rodata = find_section_by_name(file.elf, ".rodata");
+	file.ignore_unreachables = false;
+	file.c_file = find_section_by_name(file.elf, ".comment");
+
+	arch_initial_func_cfi_state(&initial_func_cfi);
+
+	ret = decode_sections(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	if (list_empty(&file.insn_list))
+		goto out;
+
+	ret = validate_functions(&file);
+	if (ret < 0)
+		goto out;
+	warnings += ret;
+
+	if (!warnings) {
+		ret = validate_reachable_instructions(&file);
+		if (ret < 0)
+			goto out;
+		warnings += ret;
+	}
+
+out:
+	cleanup(&file);
+
+	/* ignore warnings for now until we get all the code cleaned up */
+	if (ret || warnings)
+		return 0;
+	return 0;
+}

diff --git a/tools/objtool/check.h b/tools/objtool/check.h
new file mode 100644
index 0000000..da85f5b
--- /dev/null
+++ b/tools/objtool/check.h

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "cfi.h"
+#include "arch.h"
+#include <linux/hashtable.h>
+
+struct insn_state {
+	struct cfi_reg cfa;
+	struct cfi_reg regs[CFI_NUM_REGS];
+	int stack_size;
+	bool bp_scratch;
+	bool drap;
+	int drap_reg;
+};
+
+struct instruction {
+	struct list_head list;
+	struct hlist_node hash;
+	struct section *sec;
+	unsigned long offset;
+	unsigned int len;
+	unsigned char type;
+	unsigned long immediate;
+	bool alt_group, visited, dead_end, ignore;
+	struct symbol *call_dest;
+	struct instruction *jump_dest;
+	struct list_head alts;
+	struct symbol *func;
+	struct stack_op stack_op;
+	struct insn_state state;
+};
+
+struct objtool_file {
+	struct elf *elf;
+	struct list_head insn_list;
+	DECLARE_HASHTABLE(insn_hash, 16);
+	struct section *rodata, *whitelist;
+	bool ignore_unreachables, c_file;
+};
+
+int check(const char *objname, bool nofp);
+
+#define for_each_insn(file, insn)					\
+	list_for_each_entry(insn, &file->insn_list, list)
+
+#endif /* _CHECK_H */

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d897702..1a7e8aa 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c

@@ -37,6 +37,9 @@
 #define ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+#define WARN_ELF(format, ...)					\
+	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
@@ -139,12 +142,12 @@ static int read_sections(struct elf *elf)
 	int i;
 
 	if (elf_getshdrnum(elf->elf, &sections_nr)) {
-		perror("elf_getshdrnum");
+		WARN_ELF("elf_getshdrnum");
 		return -1;
 	}
 
 	if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
-		perror("elf_getshdrstrndx");
+		WARN_ELF("elf_getshdrstrndx");
 		return -1;
 	}
 
@@ -165,37 +168,36 @@ static int read_sections(struct elf *elf)
 
 		s = elf_getscn(elf->elf, i);
 		if (!s) {
-			perror("elf_getscn");
+			WARN_ELF("elf_getscn");
 			return -1;
 		}
 
 		sec->idx = elf_ndxscn(s);
 
 		if (!gelf_getshdr(s, &sec->sh)) {
-			perror("gelf_getshdr");
+			WARN_ELF("gelf_getshdr");
 			return -1;
 		}
 
 		sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
 		if (!sec->name) {
-			perror("elf_strptr");
+			WARN_ELF("elf_strptr");
 			return -1;
 		}
 
-		sec->elf_data = elf_getdata(s, NULL);
-		if (!sec->elf_data) {
-			perror("elf_getdata");
+		sec->data = elf_getdata(s, NULL);
+		if (!sec->data) {
+			WARN_ELF("elf_getdata");
 			return -1;
 		}
 
-		if (sec->elf_data->d_off != 0 ||
-		    sec->elf_data->d_size != sec->sh.sh_size) {
+		if (sec->data->d_off != 0 ||
+		    sec->data->d_size != sec->sh.sh_size) {
 			WARN("unexpected data attributes for %s", sec->name);
 			return -1;
 		}
 
-		sec->data = (unsigned long)sec->elf_data->d_buf;
-		sec->len = sec->elf_data->d_size;
+		sec->len = sec->data->d_size;
 	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
@@ -232,15 +234,15 @@ static int read_symbols(struct elf *elf)
 
 		sym->idx = i;
 
-		if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
-			perror("gelf_getsym");
+		if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+			WARN_ELF("gelf_getsym");
 			goto err;
 		}
 
 		sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
 				       sym->sym.st_name);
 		if (!sym->name) {
-			perror("elf_strptr");
+			WARN_ELF("elf_strptr");
 			goto err;
 		}
 
@@ -322,8 +324,8 @@ static int read_relas(struct elf *elf)
 			}
 			memset(rela, 0, sizeof(*rela));
 
-			if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
-				perror("gelf_getrela");
+			if (!gelf_getrela(sec->data, i, &rela->rela)) {
+				WARN_ELF("gelf_getrela");
 				return -1;
 			}
 
@@ -362,12 +364,6 @@ struct elf *elf_open(const char *name)
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf->name = strdup(name);
-	if (!elf->name) {
-		perror("strdup");
-		goto err;
-	}
-
 	elf->fd = open(name, O_RDONLY);
 	if (elf->fd == -1) {
 		perror("open");
@@ -376,12 +372,12 @@ struct elf *elf_open(const char *name)
 
 	elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
 	if (!elf->elf) {
-		perror("elf_begin");
+		WARN_ELF("elf_begin");
 		goto err;
 	}
 
 	if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
-		perror("gelf_getehdr");
+		WARN_ELF("gelf_getehdr");
 		goto err;
 	}
 
@@ -407,6 +403,12 @@ void elf_close(struct elf *elf)
 	struct symbol *sym, *tmpsym;
 	struct rela *rela, *tmprela;
 
+	if (elf->elf)
+		elf_end(elf->elf);
+
+	if (elf->fd > 0)
+		close(elf->fd);
+
 	list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
 		list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
 			list_del(&sym->list);
@@ -421,11 +423,6 @@ void elf_close(struct elf *elf)
 		list_del(&sec->list);
 		free(sec);
 	}
-	if (elf->name)
-		free(elf->name);
-	if (elf->fd > 0)
-		close(elf->fd);
-	if (elf->elf)
-		elf_end(elf->elf);
+
 	free(elf);
 }

diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 731973e..343968b 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h

@@ -37,10 +37,9 @@ struct section {
 	DECLARE_HASHTABLE(rela_hash, 16);
 	struct section *base, *rela;
 	struct symbol *sym;
-	Elf_Data *elf_data;
+	Elf_Data *data;
 	char *name;
 	int idx;
-	unsigned long data;
 	unsigned int len;
 };
 
@@ -86,6 +85,7 @@ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 struct symbol *find_containing_func(struct section *sec, unsigned long offset);
 void elf_close(struct elf *elf);
 
-
+#define for_each_sec(file, sec)						\
+	list_for_each_entry(sec, &file->elf->sections, list)
 
 #endif /* _OBJTOOL_ELF_H */

diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index bff8abb..84f001d 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c

@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 	alt->jump_or_nop = entry->jump_or_nop;
 
 	if (alt->group) {
-		alt->orig_len = *(unsigned char *)(sec->data + offset +
+		alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
 						   entry->orig_len);
-		alt->new_len = *(unsigned char *)(sec->data + offset +
+		alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
 						  entry->new_len);
 	}
 
 	if (entry->feature) {
 		unsigned short feature;
 
-		feature = *(unsigned short *)(sec->data + offset +
+		feature = *(unsigned short *)(sec->data->d_buf + offset +
 					      entry->feature);
 
 		/*

diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index ac7e075..afd9f7a 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h

@@ -18,6 +18,13 @@
 #ifndef _WARN_H
 #define _WARN_H
 
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
 extern const char *objname;
 
 static inline char *offstr(struct section *sec, unsigned long offset)
@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	free(_str);					\
 })
 
+#define WARN_ELF(format, ...)				\
+	WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 #endif /* _WARN_H */

diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index b0b3007..4b6cdbf 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt

@@ -108,6 +108,9 @@
 script export-to-postgresql.py for more details, and to script
 call-graph-from-postgresql.py for an example of using the database.
 
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
 As mentioned above, it is easy to capture too much data.  One way to limit the
 data captured is to use 'snapshot' mode which is explained further below.
 Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
@@ -364,6 +367,42 @@
 
 		CYC packets are not requested by default.
 
+pt		Specifies pass-through which enables the 'branch' config term.
+
+		The default config selects 'pt' if it is available, so a user will
+		never need to specify this term.
+
+branch		Enable branch tracing.  Branch tracing is enabled by default so to
+		disable branch tracing use 'branch=0'.
+
+		The default config selects 'branch' if it is available.
+
+ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
+		is executed.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
+fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
+		provides the address of the ptwrite instruction.  In the absence of
+		fup_on_ptw, the decoder will use the address of the previous branch
+		if branch tracing is enabled, otherwise the address will be zero.
+		Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt		Enable power events.  The power events provide information about
+		changes to the CPU C-state.
+
+		Support for this feature is indicated by:
+
+			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+		which contains "1" if the feature is supported and
+		"0" otherwise.
+
 
 new snapshot option
 -------------------
@@ -674,13 +713,15 @@
 
 which, in turn, is the same as
 
-	--itrace=ibxe
+	--itrace=ibxwpe
 
 The letters are:
 
 	i	synthesize "instructions" events
 	b	synthesize "branches" events
 	x	synthesize "transactions" events
+	w	synthesize "ptwrite" events
+	p	synthesize "power" events
 	c	synthesize branches events (calls only)
 	r	synthesize branches events (returns only)
 	e	synthesize tracing error events
@@ -699,7 +740,40 @@
 'flags' field can be used in perf script to determine whether the event is a
 tranasaction start, commit or abort.
 
-Error events are new.  They show where the decoder lost the trace.  Error events
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0".  Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used.  Refer to the config terms
+section above.  perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets.  While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used.  Refer to
+the config terms section above.  The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency.  perf script
+"event,synth" fields display information like this:
+	cbr:  cbr: 22 freq: 2189 MHz (200%)
+	mwait:  hints: 0x60 extensions: 0x1
+	pwre:  hw: 0 cstate: 2 sub-cstate: 0
+	exstop:  ip: 1
+	pwrx:  deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+	"cbr" includes the frequency and the percentage of maximum non-turbo
+	"mwait" shows mwait hints and extensions
+	"pwre" shows C-state transitions (to a C-state deeper than C0) and
+	whether	initiated by hardware
+	"exstop" indicates execution stopped and whether the IP was recorded
+	exactly,
+	"pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace.  Error events
 are quite important.  Users must know if what they are seeing is a complete
 picture or not.
 

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e2a4c5e..a3abe04 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt

@@ -3,13 +3,15 @@
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
 		x	synthesize transactions events
+		w	synthesize ptwrite events
+		p	synthesize power events
 		e	synthesize error events
 		d	create a debug log
 		g	synthesize a call chain (use with i or x)
 		l	synthesize last branch entries (use with i or x)
 		s       skip initial number of events
 
-	The default is all events i.e. the same as --itrace=ibxe
+	The default is all events i.e. the same as --itrace=ibxwpe
 
 	In addition, the period (default 100000) for instructions events
 	can be specified in units of:
@@ -26,8 +28,8 @@
 	Also the number of last branch entries (default 64, max. 1024) for
 	instructions or transactions events can be specified.
 
-	It is also possible to skip events generated (instructions, branches, transactions)
-	at the beginning. This is useful to ignore initialization code.
+	It is also possible to skip events generated (instructions, branches, transactions,
+	ptwrite, power) at the beginning. This is useful to ignore initialization code.
 
 	--itrace=i0nss1000000
 

diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 6e6a8b2..721a447 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt

@@ -48,6 +48,39 @@
 	Ranges of CPUs are specified with -: 0-2.
 	Default is to trace on all online CPUs.
 
+-T::
+--trace-funcs=::
+	Only trace functions given by the argument.  Multiple functions
+	can be given by using this option more than once.  The function
+	argument also can be a glob pattern.  It will be passed to
+	'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+	Do not trace functions given by the argument.  Like -T option,
+	this can be used more than once to specify multiple functions
+	(or glob patterns).  It will be passed to 'set_ftrace_notrace'
+	in tracefs.
+
+-G::
+--graph-funcs=::
+	Set graph filter on the given function (or a glob pattern).
+	This is useful for the function_graph tracer only and enables
+	tracing for functions executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+	Set graph notrace filter on the given function (or a glob pattern).
+	Like -G option, this is useful for the function_graph tracer only
+	and disables tracing for function executed from the given function.
+	This can be used more than once to specify multiple functions.
+	It will be passed to 'set_graph_notrace' in tracefs.
+
+-D::
+--graph-depth=::
+	Set max depth for function graph tracer to follow
 
 SEE ALSO
 --------

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index e6c9902..165c2b1 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt

@@ -240,9 +240,13 @@
  or
  ./perf probe --add='schedule:12 cpu'
 
- this will add one or more probes which has the name start with "schedule".
+Add one or more probes which has the name start with "schedule".
 
- Add probes on lines in schedule() function which calls update_rq_clock().
+ ./perf probe schedule*
+ or
+ ./perf probe --add='schedule*'
+
+Add probes on lines in schedule() function which calls update_rq_clock().
 
  ./perf probe 'schedule;update_rq_clock*'
  or

diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index dfbb506..142606c 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt

@@ -39,7 +39,7 @@
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the

diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 54acba2..51ec2d2 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt

@@ -149,10 +149,8 @@
 		print "id=%d, args=%s\n" % \
 		(id, args),
 
-def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm):
-		print_header(event_name, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm)
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
 
 def print_header(event_name, cpu, secs, nsecs, pid, comm):
 	print "%-20s %5u %05u.%09u %8u %-20s " % \
@@ -321,7 +319,7 @@
 process can be generalized to any tracepoint or set of tracepoints
 you're interested in - basically find the tracepoint(s) you're
 interested in by looking at the list of available events shown by
-'perf list' and/or look in /sys/kernel/debug/tracing events for
+'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
 generate a skeleton script using 'perf script -g python' and modify the
@@ -334,7 +332,7 @@
 scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -383,8 +381,6 @@
 
 ----
 # ls -al kernel-source/tools/perf/scripts/python
-
-root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
 total 32
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
@@ -399,7 +395,7 @@
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -437,7 +433,7 @@
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the
@@ -532,7 +528,7 @@
 gives scripts a chance to do setup tasks:
 
 ----
-def trace_begin:
+def trace_begin():
     pass
 ----
 
@@ -541,7 +537,7 @@
  as display results:
 
 ----
-def trace_end:
+def trace_end():
     pass
 ----
 
@@ -550,8 +546,7 @@
  of common arguments are passed into it:
 
 ----
-def trace_unhandled(event_name, context, common_cpu, common_secs,
-        common_nsecs, common_pid, common_comm):
+def trace_unhandled(event_name, context, event_fields_dict):
     pass
 ----
 

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index cb0eda3..5ee8796 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt

@@ -116,8 +116,9 @@
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-        callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
+        callindent, insn, insnlen, synth.
+        Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
@@ -130,6 +131,14 @@
 	i.e., the specified fields apply to all event types if the type string
 	is not given.
 
+	In addition to overriding fields, it is also possible to add or remove
+	fields from the defaults. For example
+
+		-F -cpu,+insn
+
+	removes the cpu field and adds the insn field. Adding/removing fields
+	cannot be mixed with normal overriding.
+
 	The arguments are processed in the order received. A later usage can
 	reset a prior request. e.g.:
 
@@ -185,6 +194,9 @@
 	instruction bytes and the instruction length of the current
 	instruction.
 
+	The synth field is used by synthesized events which may be created when
+	Instruction Trace decoding.
+
 	Finally, a user may not set fields to none for all event types.
 	i.e., -F "" is not allowed.
 
@@ -203,6 +215,8 @@
 	is printed. This is the full execution path leading to the sample. This is only supported when the
 	sample was recorded with perf record -b or -j any.
 
+	The brstackoff field will print an offset into a specific dso/binary.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
@@ -311,6 +325,10 @@
 	Set the maximum number of program blocks to print with brstackasm for
 	each sample.
 
+--inline::
+	If a callgraph address belongs to an inlined function, the inline stack
+	will be printed. Each entry has function name and file/line.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e441..6980763 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt

@@ -239,6 +239,20 @@
 --no-merge::
 Do not merge results from same PMUs.
 
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
 EXAMPLES
 --------
 

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 8354d04..bdf0e87 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config

@@ -19,18 +19,18 @@
 
 include $(srctree)/tools/scripts/Makefile.arch
 
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
 # Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@
   NO_PERF_REGS := 0
 endif
 
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -61,7 +61,7 @@
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
@@ -115,9 +115,9 @@
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
 
 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -228,12 +228,12 @@
 endif
 
 INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/uapi
 INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
 
 # $(obj-perf)      for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -355,7 +355,7 @@
 
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-      msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+      msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
       NO_DWARF := 1
     else
       CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -380,7 +380,7 @@
         CFLAGS += -DHAVE_BPF_PROLOGUE
         $(call detected,CONFIG_BPF_PROLOGUE)
       else
-        msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+        msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
       endif
     else
       msg := $(warning DWARF support is off, BPF prologue is disabled);
@@ -406,7 +406,7 @@
   endif
 endif
 
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
   endif
@@ -487,7 +487,7 @@
 endif
 
 ifndef NO_LOCAL_LIBUNWIND
-  ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+  ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
     $(call feature_check,libunwind-debug-frame)
     ifneq ($(feature-libunwind-debug-frame), 1)
       msg := $(warning No debug_frame support found in libunwind);
@@ -740,7 +740,7 @@
       NO_PERF_READ_VDSO32 := 1
     endif
   endif
-  ifneq ($(ARCH), x86)
+  ifneq ($(SRCARCH), x86)
     NO_PERF_READ_VDSOX32 := 1
   endif
   ifndef NO_PERF_READ_VDSOX32
@@ -769,7 +769,7 @@
 endif
 
 ifndef NO_AUXTRACE
-  ifeq ($(ARCH),x86)
+  ifeq ($(SRCARCH),x86)
     ifeq ($(feature-get_cpuid), 0)
       msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
       NO_AUXTRACE := 1
@@ -872,7 +872,7 @@
 ETC_PERFCONFIG = etc/perfconfig
 endif
 ifndef lib
-ifeq ($(ARCH)$(IS_64_BIT), x861)
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
 lib = lib64
 else
 lib = lib

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 79fe31f..5008f51 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -226,7 +226,7 @@
 
 ifeq ($(config),0)
 include $(srctree)/tools/scripts/Makefile.arch
--include arch/$(ARCH)/Makefile
+-include arch/$(SRCARCH)/Makefile
 endif
 
 # The FEATURE_DUMP_EXPORT holds location of the actual

diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
index 109eb75..d9b6af83 100644
--- a/tools/perf/arch/Build
+++ b/tools/perf/arch/Build

@@ -1,2 +1,2 @@
 libperf-y += common.o
-libperf-y += $(ARCH)/
+libperf-y += $(SRCARCH)/

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 29361d9..7ce3d1a 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c

@@ -17,6 +17,7 @@
 
 #include <api/fs/fs.h>
 #include <linux/bitops.h>
+#include <linux/compiler.h>
 #include <linux/coresight-pmu.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -202,19 +203,18 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 		pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
 			  opts->auxtrace_snapshot_size);
 
-	if (cs_etm_evsel) {
-		/*
-		 * To obtain the auxtrace buffer file descriptor, the auxtrace
-		 * event must come first.
-		 */
-		perf_evlist__to_front(evlist, cs_etm_evsel);
-		/*
-		 * In the case of per-cpu mmaps, we need the CPU on the
-		 * AUX event.
-		 */
-		if (!cpu_map__empty(cpus))
-			perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
-	}
+	/*
+	 * To obtain the auxtrace buffer file descriptor, the auxtrace
+	 * event must come first.
+	 */
+	perf_evlist__to_front(evlist, cs_etm_evsel);
+
+	/*
+	 * In the case of per-cpu mmaps, we need the CPU on the
+	 * AUX event.
+	 */
+	if (!cpu_map__empty(cpus))
+		perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
 
 	/* Add dummy event to keep tracking */
 	if (opts->full_auxtrace) {
@@ -583,8 +583,7 @@ static FILE *cs_device__open_file(const char *name)
 
 }
 
-static __attribute__((format(printf, 2, 3)))
-int cs_device__print_file(const char *name, const char *fmt, ...)
+static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
 {
 	va_list args;
 	FILE *file;

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 837067f..6b40e9f 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c

@@ -26,6 +26,7 @@ const char *const arm64_triplets[] = {
 
 const char *const powerpc_triplets[] = {
 	"powerpc-unknown-linux-gnu-",
+	"powerpc-linux-gnu-",
 	"powerpc64-unknown-linux-gnu-",
 	"powerpc64-linux-gnu-",
 	"powerpc64le-linux-gnu-",

diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 90ad64b..2e659531 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build

@@ -5,4 +5,6 @@
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
 libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 0000000..3a24b3c4
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c

@@ -0,0 +1,73 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
+static const int special_regs[3][2] = {
+	{ 65, PERF_REG_POWERPC_LINK },
+	{ 101, PERF_REG_POWERPC_XER },
+	{ 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+	struct unwind_info *ui = arg;
+	struct regs_dump *user_regs = &ui->sample->user_regs;
+	Dwarf_Word dwarf_regs[32], dwarf_nip;
+	size_t i;
+
+#define REG(r) ({						\
+	Dwarf_Word val = 0;					\
+	perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r);	\
+	val;							\
+})
+
+	dwarf_regs[0]  = REG(R0);
+	dwarf_regs[1]  = REG(R1);
+	dwarf_regs[2]  = REG(R2);
+	dwarf_regs[3]  = REG(R3);
+	dwarf_regs[4]  = REG(R4);
+	dwarf_regs[5]  = REG(R5);
+	dwarf_regs[6]  = REG(R6);
+	dwarf_regs[7]  = REG(R7);
+	dwarf_regs[8]  = REG(R8);
+	dwarf_regs[9]  = REG(R9);
+	dwarf_regs[10] = REG(R10);
+	dwarf_regs[11] = REG(R11);
+	dwarf_regs[12] = REG(R12);
+	dwarf_regs[13] = REG(R13);
+	dwarf_regs[14] = REG(R14);
+	dwarf_regs[15] = REG(R15);
+	dwarf_regs[16] = REG(R16);
+	dwarf_regs[17] = REG(R17);
+	dwarf_regs[18] = REG(R18);
+	dwarf_regs[19] = REG(R19);
+	dwarf_regs[20] = REG(R20);
+	dwarf_regs[21] = REG(R21);
+	dwarf_regs[22] = REG(R22);
+	dwarf_regs[23] = REG(R23);
+	dwarf_regs[24] = REG(R24);
+	dwarf_regs[25] = REG(R25);
+	dwarf_regs[26] = REG(R26);
+	dwarf_regs[27] = REG(R27);
+	dwarf_regs[28] = REG(R28);
+	dwarf_regs[29] = REG(R29);
+	dwarf_regs[30] = REG(R30);
+	dwarf_regs[31] = REG(R31);
+	if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+		return false;
+
+	dwarf_nip = REG(NIP);
+	dwfl_thread_state_register_pc(thread, dwarf_nip);
+	for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+		Dwarf_Word val = 0;
+		perf_reg_value(&val, user_regs, special_regs[i][1]);
+		if (!dwfl_thread_state_registers(thread,
+						 special_regs[i][0], 1,
+						 &val))
+			return false;
+	}
+
+	return true;
+}

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 0f196ee..3cbf6fa 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c

@@ -1664,3 +1664,15 @@
 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index af25bc8..aa512fa 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c

@@ -1696,3 +1696,33 @@
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20          \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20       \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 48 0f ae 20       \tptwriteq (%rax)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 49 0f ae 20       \tptwriteq (%r8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae 24 25 78 56 34 12 \tptwriteq 0x12345678",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",},

diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 979487d..6cdb65d 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c

@@ -1343,6 +1343,26 @@ int main(void)
 	asm volatile("xrstors 0x12345678(%rax,%rcx,8)");
 	asm volatile("xrstors 0x12345678(%r8,%rcx,8)");
 
+	/* ptwrite */
+
+	asm volatile("ptwrite (%rax)");
+	asm volatile("ptwrite (%r8)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwrite 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwritel (%rax)");
+	asm volatile("ptwritel (%r8)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwritel 0x12345678(%r8,%rcx,8)");
+
+	asm volatile("ptwriteq (%rax)");
+	asm volatile("ptwriteq (%r8)");
+	asm volatile("ptwriteq (0x12345678)");
+	asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
+	asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
+
 #else  /* #ifdef __x86_64__ */
 
 	/* bound r32, mem (same op code as EVEX prefix) */
@@ -2653,6 +2673,16 @@ int main(void)
 	asm volatile("xrstors (0x12345678)");
 	asm volatile("xrstors 0x12345678(%eax,%ecx,8)");
 
+	/* ptwrite */
+
+	asm volatile("ptwrite (%eax)");
+	asm volatile("ptwrite (0x12345678)");
+	asm volatile("ptwrite 0x12345678(%eax,%ecx,8)");
+
+	asm volatile("ptwritel (%eax)");
+	asm volatile("ptwritel (0x12345678)");
+	asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
+
 #endif /* #ifndef __x86_64__ */
 
 	/* Following line is a marker for the awk script - do not change */

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af2bce7..781df40 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c

@@ -35,10 +35,6 @@
 #define KiB_MASK(x) (KiB(x) - 1)
 #define MiB_MASK(x) (MiB(x) - 1)
 
-#define INTEL_BTS_DFLT_SAMPLE_SIZE	KiB(4)
-
-#define INTEL_BTS_MAX_SAMPLE_SIZE	KiB(60)
-
 struct intel_bts_snapshot_ref {
 	void	*ref_buf;
 	size_t	ref_offset;

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index f630de0..9535be5 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c

@@ -40,10 +40,6 @@
 #define KiB_MASK(x) (KiB(x) - 1)
 #define MiB_MASK(x) (MiB(x) - 1)
 
-#define INTEL_PT_DEFAULT_SAMPLE_SIZE	KiB(4)
-
-#define INTEL_PT_MAX_SAMPLE_SIZE	KiB(60)
-
 #define INTEL_PT_PSB_PERIOD_NEAR	256
 
 struct intel_pt_snapshot_ref {
@@ -196,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 	int psb_cyc, psb_periods, psb_period;
 	int pos = 0;
 	u64 config;
+	char c;
 
 	pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
 
@@ -229,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 		}
 	}
 
+	if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+	    perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
+		pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
+
 	pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 
 	intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 27de0c8..469d65b 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c

@@ -700,7 +700,7 @@ static inline uint32_t lfsr_32(uint32_t lfsr)
  * kernel (KSM, zero page, etc.) cannot optimize away RAM
  * accesses:
  */
-static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
+static inline u64 access_data(u64 *data, u64 val)
 {
 	if (g->p.data_reads)
 		val += *data;

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 620a467..475999e 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c

@@ -1725,10 +1725,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
 				tok; tok = strtok_r(NULL, ", ", &tmp)) {	\
 			ret = _fn(hpp_list, tok);				\
 			if (ret == -EINVAL) {					\
-				error("Invalid --fields key: `%s'", tok);	\
+				pr_err("Invalid --fields key: `%s'", tok);	\
 				break;						\
 			} else if (ret == -ESRCH) {				\
-				error("Unknown --fields key: `%s'", tok);	\
+				pr_err("Unknown --fields key: `%s'", tok);	\
 				break;						\
 			}							\
 		}								\

diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 80668fa..ece4558 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c

@@ -156,7 +156,7 @@ static int parse_config_arg(char *arg, char **var, char **value)
 
 int cmd_config(int argc, const char **argv)
 {
-	int i, ret = 0;
+	int i, ret = -1;
 	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 	const char *config_filename;
@@ -186,10 +186,8 @@ int cmd_config(int argc, const char **argv)
 	 * because of reinitializing with options config file location.
 	 */
 	set = perf_config_set__new();
-	if (!set) {
-		ret = -1;
+	if (!set)
 		goto out_err;
-	}
 
 	switch (actions) {
 	case ACTION_LIST:
@@ -197,41 +195,54 @@ int cmd_config(int argc, const char **argv)
 			pr_err("Error: takes no arguments\n");
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
-			ret = show_config(set);
-			if (ret < 0)
+			if (show_config(set) < 0) {
 				pr_err("Nothing configured, "
 				       "please check your %s \n", config_filename);
+				goto out_err;
+			}
 		}
 		break;
 	default:
-		if (argc) {
-			for (i = 0; argv[i]; i++) {
-				char *var, *value;
-				char *arg = strdup(argv[i]);
-
-				if (!arg) {
-					pr_err("%s: strdup failed\n", __func__);
-					ret = -1;
-					break;
-				}
-
-				if (parse_config_arg(arg, &var, &value) < 0) {
-					free(arg);
-					ret = -1;
-					break;
-				}
-
-				if (value == NULL)
-					ret = show_spec_config(set, var);
-				else
-					ret = set_config(set, config_filename, var, value);
-				free(arg);
-			}
-		} else
+		if (!argc) {
 			usage_with_options(config_usage, config_options);
+			break;
+		}
+
+		for (i = 0; argv[i]; i++) {
+			char *var, *value;
+			char *arg = strdup(argv[i]);
+
+			if (!arg) {
+				pr_err("%s: strdup failed\n", __func__);
+				goto out_err;
+			}
+
+			if (parse_config_arg(arg, &var, &value) < 0) {
+				free(arg);
+				goto out_err;
+			}
+
+			if (value == NULL) {
+				if (show_spec_config(set, var) < 0) {
+					pr_err("%s is not configured: %s\n",
+					       var, config_filename);
+					free(arg);
+					goto out_err;
+				}
+			} else {
+				if (set_config(set, config_filename, var, value) < 0) {
+					pr_err("Failed to set '%s=%s' on %s\n",
+					       var, value, config_filename);
+					free(arg);
+					goto out_err;
+				}
+			}
+			free(arg);
+		}
 	}
 
-	perf_config_set__delete(set);
+	ret = 0;
 out_err:
+	perf_config_set__delete(set);
 	return ret;
 }

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index eec5df8..0cd4cf6 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -1302,7 +1302,10 @@ static int diff__config(const char *var, const char *value,
 			void *cb __maybe_unused)
 {
 	if (!strcmp(var, "diff.order")) {
-		sort_compute = perf_config_int(var, value);
+		int ret;
+		if (perf_config_int(&ret, var, value) < 0)
+			return -1;
+		sort_compute = ret;
 		return 0;
 	}
 	if (!strcmp(var, "diff.compute")) {

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 9e0b35c..dd26c62 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c

@@ -28,9 +28,19 @@
 #define DEFAULT_TRACER  "function_graph"
 
 struct perf_ftrace {
-	struct perf_evlist *evlist;
-	struct target target;
-	const char *tracer;
+	struct perf_evlist	*evlist;
+	struct target		target;
+	const char		*tracer;
+	struct list_head	filters;
+	struct list_head	notrace;
+	struct list_head	graph_funcs;
+	struct list_head	nograph_funcs;
+	int			graph_depth;
+};
+
+struct filter_entry {
+	struct list_head	list;
+	char			name[];
 };
 
 static bool done;
@@ -61,6 +71,7 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 	int fd, ret = -1;
 	ssize_t size = strlen(val);
 	int flags = O_WRONLY;
+	char errbuf[512];
 
 	file = get_tracing_file(name);
 	if (!file) {
@@ -75,14 +86,16 @@ static int __write_tracing_file(const char *name, const char *val, bool append)
 
 	fd = open(file, flags);
 	if (fd < 0) {
-		pr_debug("cannot open tracing file: %s\n", name);
+		pr_debug("cannot open tracing file: %s: %s\n",
+			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
 		goto out;
 	}
 
 	if (write(fd, val, size) == size)
 		ret = 0;
 	else
-		pr_debug("write '%s' to tracing/%s failed\n", val, name);
+		pr_debug("write '%s' to tracing/%s failed: %s\n",
+			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
 
 	close(fd);
 out:
@@ -101,6 +114,7 @@ static int append_tracing_file(const char *name, const char *val)
 }
 
 static int reset_tracing_cpu(void);
+static void reset_tracing_filters(void);
 
 static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 {
@@ -116,6 +130,10 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
 	if (reset_tracing_cpu() < 0)
 		return -1;
 
+	if (write_tracing_file("max_graph_depth", "0") < 0)
+		return -1;
+
+	reset_tracing_filters();
 	return 0;
 }
 
@@ -181,6 +199,68 @@ static int reset_tracing_cpu(void)
 	return ret;
 }
 
+static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
+{
+	struct filter_entry *pos;
+
+	list_for_each_entry(pos, funcs, list) {
+		if (append_tracing_file(filter_file, pos->name) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int set_tracing_filters(struct perf_ftrace *ftrace)
+{
+	int ret;
+
+	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
+	if (ret < 0)
+		return ret;
+
+	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
+	if (ret < 0)
+		return ret;
+
+	/* old kernels do not have this filter */
+	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
+
+	return ret;
+}
+
+static void reset_tracing_filters(void)
+{
+	write_tracing_file("set_ftrace_filter", " ");
+	write_tracing_file("set_ftrace_notrace", " ");
+	write_tracing_file("set_graph_function", " ");
+	write_tracing_file("set_graph_notrace", " ");
+}
+
+static int set_tracing_depth(struct perf_ftrace *ftrace)
+{
+	char buf[16];
+
+	if (ftrace->graph_depth == 0)
+		return 0;
+
+	if (ftrace->graph_depth < 0) {
+		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
+		return -1;
+	}
+
+	snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+
+	if (write_tracing_file("max_graph_depth", buf) < 0)
+		return -1;
+
+	return 0;
+}
+
 static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 {
 	char *trace_file;
@@ -223,11 +303,23 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		goto out_reset;
 	}
 
+	if (set_tracing_filters(ftrace) < 0) {
+		pr_err("failed to set tracing filters\n");
+		goto out_reset;
+	}
+
+	if (set_tracing_depth(ftrace) < 0) {
+		pr_err("failed to set graph depth\n");
+		goto out_reset;
+	}
+
 	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
 		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
 		goto out_reset;
 	}
 
+	setup_pager();
+
 	trace_file = get_tracing_file("trace_pipe");
 	if (!trace_file) {
 		pr_err("failed to open trace_pipe\n");
@@ -251,8 +343,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		goto out_close_fd;
 	}
 
-	setup_pager();
-
 	perf_evlist__start_workload(ftrace->evlist);
 
 	while (!done) {
@@ -307,6 +397,32 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb)
 	return -1;
 }
 
+static int parse_filter_func(const struct option *opt, const char *str,
+			     int unset __maybe_unused)
+{
+	struct list_head *head = opt->value;
+	struct filter_entry *entry;
+
+	entry = malloc(sizeof(*entry) + strlen(str) + 1);
+	if (entry == NULL)
+		return -ENOMEM;
+
+	strcpy(entry->name, str);
+	list_add_tail(&entry->list, head);
+
+	return 0;
+}
+
+static void delete_filter_func(struct list_head *head)
+{
+	struct filter_entry *pos, *tmp;
+
+	list_for_each_entry_safe(pos, tmp, head, list) {
+		list_del(&pos->list);
+		free(pos);
+	}
+}
+
 int cmd_ftrace(int argc, const char **argv)
 {
 	int ret;
@@ -330,9 +446,24 @@ int cmd_ftrace(int argc, const char **argv)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
+	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+		     "trace given functions only", parse_filter_func),
+	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+		     "do not trace given functions", parse_filter_func),
+	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+		     "Set graph filter on given functions", parse_filter_func),
+	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+		     "Set nograph filter on given functions", parse_filter_func),
+	OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
+		    "Max depth for function graph tracer"),
 	OPT_END()
 	};
 
+	INIT_LIST_HEAD(&ftrace.filters);
+	INIT_LIST_HEAD(&ftrace.notrace);
+	INIT_LIST_HEAD(&ftrace.graph_funcs);
+	INIT_LIST_HEAD(&ftrace.nograph_funcs);
+
 	ret = perf_config(perf_ftrace_config, &ftrace);
 	if (ret < 0)
 		return -1;
@@ -348,12 +479,14 @@ int cmd_ftrace(int argc, const char **argv)
 
 		target__strerror(&ftrace.target, ret, errbuf, 512);
 		pr_err("%s\n", errbuf);
-		return -EINVAL;
+		goto out_delete_filters;
 	}
 
 	ftrace.evlist = perf_evlist__new();
-	if (ftrace.evlist == NULL)
-		return -ENOMEM;
+	if (ftrace.evlist == NULL) {
+		ret = -ENOMEM;
+		goto out_delete_filters;
+	}
 
 	ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
 	if (ret < 0)
@@ -364,5 +497,11 @@ int cmd_ftrace(int argc, const char **argv)
 out_delete_evlist:
 	perf_evlist__delete(ftrace.evlist);
 
+out_delete_filters:
+	delete_filter_func(&ftrace.filters);
+	delete_filter_func(&ftrace.notrace);
+	delete_filter_func(&ftrace.graph_funcs);
+	delete_filter_func(&ftrace.nograph_funcs);
+
 	return ret;
 }

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 492f8e1..530a7f2 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c

@@ -108,10 +108,14 @@ static int check_emacsclient_version(void)
 	return ret;
 }
 
-static void exec_woman_emacs(const char *path, const char *page)
+static void exec_failed(const char *cmd)
 {
 	char sbuf[STRERR_BUFSIZE];
+	pr_warning("failed to exec '%s': %s", cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+}
 
+static void exec_woman_emacs(const char *path, const char *page)
+{
 	if (!check_emacsclient_version()) {
 		/* This works only with emacsclient version >= 22. */
 		char *man_page;
@@ -122,8 +126,7 @@ static void exec_woman_emacs(const char *path, const char *page)
 			execlp(path, "emacsclient", "-e", man_page, NULL);
 			free(man_page);
 		}
-		warning("failed to exec '%s': %s", path,
-			str_error_r(errno, sbuf, sizeof(sbuf)));
+		exec_failed(path);
 	}
 }
 
@@ -134,7 +137,6 @@ static void exec_man_konqueror(const char *path, const char *page)
 	if (display && *display) {
 		char *man_page;
 		const char *filename = "kfmclient";
-		char sbuf[STRERR_BUFSIZE];
 
 		/* It's simpler to launch konqueror using kfmclient. */
 		if (path) {
@@ -155,33 +157,27 @@ static void exec_man_konqueror(const char *path, const char *page)
 			execlp(path, filename, "newTab", man_page, NULL);
 			free(man_page);
 		}
-		warning("failed to exec '%s': %s", path,
-			str_error_r(errno, sbuf, sizeof(sbuf)));
+		exec_failed(path);
 	}
 }
 
 static void exec_man_man(const char *path, const char *page)
 {
-	char sbuf[STRERR_BUFSIZE];
-
 	if (!path)
 		path = "man";
 	execlp(path, "man", page, NULL);
-	warning("failed to exec '%s': %s", path,
-		str_error_r(errno, sbuf, sizeof(sbuf)));
+	exec_failed(path);
 }
 
 static void exec_man_cmd(const char *cmd, const char *page)
 {
-	char sbuf[STRERR_BUFSIZE];
 	char *shell_cmd;
 
 	if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
 		execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
 		free(shell_cmd);
 	}
-	warning("failed to exec '%s': %s", cmd,
-		str_error_r(errno, sbuf, sizeof(sbuf)));
+	exec_failed(cmd);
 }
 
 static void add_man_viewer(const char *name)
@@ -214,6 +210,12 @@ static void do_add_man_viewer_info(const char *name,
 	man_viewer_info_list = new;
 }
 
+static void unsupported_man_viewer(const char *name, const char *var)
+{
+	pr_warning("'%s': path for unsupported man viewer.\n"
+		   "Please consider using 'man.<tool>.%s' instead.", name, var);
+}
+
 static int add_man_viewer_path(const char *name,
 			       size_t len,
 			       const char *value)
@@ -221,9 +223,7 @@ static int add_man_viewer_path(const char *name,
 	if (supported_man_viewer(name, len))
 		do_add_man_viewer_info(name, len, value);
 	else
-		warning("'%s': path for unsupported man viewer.\n"
-			"Please consider using 'man.<tool>.cmd' instead.",
-			name);
+		unsupported_man_viewer(name, "cmd");
 
 	return 0;
 }
@@ -233,9 +233,7 @@ static int add_man_viewer_cmd(const char *name,
 			      const char *value)
 {
 	if (supported_man_viewer(name, len))
-		warning("'%s': cmd for supported man viewer.\n"
-			"Please consider using 'man.<tool>.path' instead.",
-			name);
+		unsupported_man_viewer(name, "path");
 	else
 		do_add_man_viewer_info(name, len, value);
 
@@ -247,8 +245,10 @@ static int add_man_viewer_info(const char *var, const char *value)
 	const char *name = var + 4;
 	const char *subkey = strrchr(name, '.');
 
-	if (!subkey)
-		return error("Config with no key for man viewer: %s", name);
+	if (!subkey) {
+		pr_err("Config with no key for man viewer: %s", name);
+		return -1;
+	}
 
 	if (!strcmp(subkey, ".path")) {
 		if (!value)
@@ -261,7 +261,7 @@ static int add_man_viewer_info(const char *var, const char *value)
 		return add_man_viewer_cmd(name, subkey - name, value);
 	}
 
-	warning("'%s': unsupported man viewer sub key.", subkey);
+	pr_warning("'%s': unsupported man viewer sub key.", subkey);
 	return 0;
 }
 
@@ -332,7 +332,7 @@ static void setup_man_path(void)
 		setenv("MANPATH", new_path, 1);
 		free(new_path);
 	} else {
-		error("Unable to setup man path");
+		pr_err("Unable to setup man path");
 	}
 }
 
@@ -349,7 +349,7 @@ static void exec_viewer(const char *name, const char *page)
 	else if (info)
 		exec_man_cmd(info, page);
 	else
-		warning("'%s': unknown man viewer.", name);
+		pr_warning("'%s': unknown man viewer.", name);
 }
 
 static int show_man_page(const char *perf_cmd)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 9409c94..0a8a1c4 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c

@@ -1715,7 +1715,7 @@ static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
 		if (!tok)
 			break;
 		if (slab_sort_dimension__add(tok, sort_list) < 0) {
-			error("Unknown slab --sort key: '%s'", tok);
+			pr_err("Unknown slab --sort key: '%s'", tok);
 			free(str);
 			return -1;
 		}
@@ -1741,7 +1741,7 @@ static int setup_page_sorting(struct list_head *sort_list, const char *arg)
 		if (!tok)
 			break;
 		if (page_sort_dimension__add(tok, sort_list) < 0) {
-			error("Unknown page --sort key: '%s'", tok);
+			pr_err("Unknown page --sort key: '%s'", tok);
 			free(str);
 			return -1;
 		}

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ee7d0a8..17a14bc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -453,7 +453,7 @@ static int record__open(struct record *rec)
 	}
 
 	if (perf_evlist__apply_filters(evlist, &pos)) {
-		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			pos->filter, perf_evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;
@@ -461,7 +461,7 @@ static int record__open(struct record *rec)
 	}
 
 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 		      str_error_r(errno, msg, sizeof(msg)));
 		rc = -1;

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 22478ff..79a33eb 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -94,10 +94,9 @@ static int report__config(const char *var, const char *value, void *cb)
 		symbol_conf.cumulate_callchain = perf_config_bool(var, value);
 		return 0;
 	}
-	if (!strcmp(var, "report.queue-size")) {
-		rep->queue_size = perf_config_u64(var, value);
-		return 0;
-	}
+	if (!strcmp(var, "report.queue-size"))
+		return perf_config_u64(&rep->queue_size, var, value);
+
 	if (!strcmp(var, "report.sort_order")) {
 		default_sort_order = strdup(value);
 		return 0;
@@ -558,6 +557,7 @@ static int __cmd_report(struct report *rep)
 			ui__error("failed to set cpu bitmap\n");
 			return ret;
 		}
+		session->itrace_synth_opts->cpu_bitmap = rep->cpu_bitmap;
 	}
 
 	if (rep->show_threads) {

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 39996c5..322b4de 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c

@@ -2066,7 +2066,7 @@ static void save_task_callchain(struct perf_sched *sched,
 	if (thread__resolve_callchain(thread, cursor, evsel, sample,
 				      NULL, NULL, sched->max_stack + 2) != 0) {
 		if (verbose > 0)
-			error("Failed to resolve callchain. Skipping\n");
+			pr_err("Failed to resolve callchain. Skipping\n");
 
 		return;
 	}

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d05aec4..83cdc0a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -85,6 +85,8 @@ enum perf_output_field {
 	PERF_OUTPUT_INSN	    = 1U << 21,
 	PERF_OUTPUT_INSNLEN	    = 1U << 22,
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
+	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
+	PERF_OUTPUT_SYNTH           = 1U << 25,
 };
 
 struct output_option {
@@ -115,6 +117,13 @@ struct output_option {
 	{.str = "insn", .field = PERF_OUTPUT_INSN},
 	{.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
+	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+};
+
+enum {
+	OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX,
+	OUTPUT_TYPE_MAX
 };
 
 /* default set to maintain compatibility with current format */
@@ -124,7 +133,7 @@ static struct {
 	unsigned int print_ip_opts;
 	u64 fields;
 	u64 invalid_fields;
-} output[PERF_TYPE_MAX] = {
+} output[OUTPUT_TYPE_MAX] = {
 
 	[PERF_TYPE_HARDWARE] = {
 		.user_set = false,
@@ -182,12 +191,44 @@ static struct {
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
+
+	[OUTPUT_TYPE_SYNTH] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+			      PERF_OUTPUT_SYNTH,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
 };
 
+static inline int output_type(unsigned int type)
+{
+	switch (type) {
+	case PERF_TYPE_SYNTH:
+		return OUTPUT_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
+static inline unsigned int attr_type(unsigned int type)
+{
+	switch (type) {
+	case OUTPUT_TYPE_SYNTH:
+		return PERF_TYPE_SYNTH;
+	default:
+		return type;
+	}
+}
+
 static bool output_set_by_user(void)
 {
 	int j;
-	for (j = 0; j < PERF_TYPE_MAX; ++j) {
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 		if (output[j].user_set)
 			return true;
 	}
@@ -208,7 +249,7 @@ static const char *output_field2str(enum perf_output_field field)
 	return str;
 }
 
-#define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
+#define PRINT_FIELD(x)  (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
 
 static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
 				      u64 sample_type, const char *sample_msg,
@@ -216,7 +257,7 @@ static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
 				      bool allow_user_set)
 {
 	struct perf_event_attr *attr = &evsel->attr;
-	int type = attr->type;
+	int type = output_type(attr->type);
 	const char *evname;
 
 	if (attr->sample_type & sample_type)
@@ -298,10 +339,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-		pr_err("Display of DSO requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
-		       "to DSO.\n");
+	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+		pr_err("Display of DSO requested but no address to convert.  Select\n"
+		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -346,7 +387,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 
 static void set_print_ip_opts(struct perf_event_attr *attr)
 {
-	unsigned int type = attr->type;
+	unsigned int type = output_type(attr->type);
 
 	output[type].print_ip_opts = 0;
 	if (PRINT_FIELD(IP))
@@ -374,16 +415,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	unsigned int j;
 	struct perf_evsel *evsel;
 
-	for (j = 0; j < PERF_TYPE_MAX; ++j) {
-		evsel = perf_session__find_first_evtype(session, j);
+	for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+		evsel = perf_session__find_first_evtype(session, attr_type(j));
 
 		/*
 		 * even if fields is set to 0 (ie., show nothing) event must
 		 * exist if user explicitly includes it on the command line
 		 */
-		if (!evsel && output[j].user_set && !output[j].wildcard_set) {
+		if (!evsel && output[j].user_set && !output[j].wildcard_set &&
+		    j != OUTPUT_TYPE_SYNTH) {
 			pr_err("%s events do not exist. "
-			       "Remove corresponding -f option to proceed.\n",
+			       "Remove corresponding -F option to proceed.\n",
 			       event_type(j));
 			return -1;
 		}
@@ -514,18 +556,43 @@ mispred_str(struct branch_entry *br)
 	return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(struct perf_sample *sample)
+static void print_sample_brstack(struct perf_sample *sample,
+				 struct thread *thread,
+				 struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
-	u64 i;
+	struct addr_location alf, alt;
+	u64 i, from, to;
 
 	if (!(br && br->nr))
 		return;
 
 	for (i = 0; i < br->nr; i++) {
-		printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
-			br->entries[i].from,
-			br->entries[i].to,
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		if (PRINT_FIELD(DSO)) {
+			memset(&alf, 0, sizeof(alf));
+			memset(&alt, 0, sizeof(alt));
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		}
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+
+		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
 			br->entries[i].flags.abort? 'A' : '-',
@@ -534,7 +601,8 @@ static void print_sample_brstack(struct perf_sample *sample)
 }
 
 static void print_sample_brstacksym(struct perf_sample *sample,
-				    struct thread *thread)
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct addr_location alf, alt;
@@ -559,8 +627,18 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
 		putchar('/');
 		symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
 		printf("/%c/%c/%c/%d ",
 			mispred_str( br->entries + i),
 			br->entries[i].flags.in_tx? 'X' : '-',
@@ -569,6 +647,51 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 	}
 }
 
+static void print_sample_brstackoff(struct perf_sample *sample,
+				    struct thread *thread,
+				    struct perf_event_attr *attr)
+{
+	struct branch_stack *br = sample->branch_stack;
+	struct addr_location alf, alt;
+	u64 i, from, to;
+
+	if (!(br && br->nr))
+		return;
+
+	for (i = 0; i < br->nr; i++) {
+
+		memset(&alf, 0, sizeof(alf));
+		memset(&alt, 0, sizeof(alt));
+		from = br->entries[i].from;
+		to   = br->entries[i].to;
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+		if (alf.map && !alf.map->dso->adjust_symbols)
+			from = map__map_ip(alf.map, from);
+
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+		if (alt.map && !alt.map->dso->adjust_symbols)
+			to = map__map_ip(alt.map, to);
+
+		printf("0x%"PRIx64, from);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alf.map, stdout);
+			printf(")");
+		}
+		printf("/0x%"PRIx64, to);
+		if (PRINT_FIELD(DSO)) {
+			printf("(");
+			map__fprintf_dsoname(alt.map, stdout);
+			printf(")");
+		}
+		printf("/%c/%c/%c/%d ",
+			mispred_str(br->entries + i),
+			br->entries[i].flags.in_tx ? 'X' : '-',
+			br->entries[i].flags.abort ? 'A' : '-',
+			br->entries[i].flags.cycles);
+	}
+}
 #define MAXBB 16384UL
 
 static int grab_bb(u8 *buffer, u64 start, u64 end,
@@ -906,6 +1029,7 @@ static void print_sample_bts(struct perf_sample *sample,
 			     struct machine *machine)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
 	bool print_srcline_last = false;
 
 	if (PRINT_FIELD(CALLINDENT))
@@ -913,7 +1037,7 @@ static void print_sample_bts(struct perf_sample *sample,
 
 	/* print branch_from information */
 	if (PRINT_FIELD(IP)) {
-		unsigned int print_opts = output[attr->type].print_ip_opts;
+		unsigned int print_opts = output[type].print_ip_opts;
 		struct callchain_cursor *cursor = NULL;
 
 		if (symbol_conf.use_callchain && sample->callchain &&
@@ -936,7 +1060,7 @@ static void print_sample_bts(struct perf_sample *sample,
 	/* print branch_to information */
 	if (PRINT_FIELD(ADDR) ||
 	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
-	     !output[attr->type].user_set)) {
+	     !output[type].user_set)) {
 		printf(" => ");
 		print_sample_addr(sample, thread, attr);
 	}
@@ -1079,6 +1203,127 @@ static void print_sample_bpf_output(struct perf_sample *sample)
 		       (char *)(sample->raw_data));
 }
 
+static void print_sample_spacing(int len, int spacing)
+{
+	if (len > 0 && len < spacing)
+		printf("%*s", spacing - len, "");
+}
+
+static void print_sample_pt_spacing(int len)
+{
+	print_sample_spacing(len, 34);
+}
+
+static void print_sample_synth_ptwrite(struct perf_sample *sample)
+{
+	struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" IP: %u payload: %#" PRIx64 " ",
+		     data->ip, le64_to_cpu(data->payload));
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_mwait(struct perf_sample *sample)
+{
+	struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" hints: %#x extensions: %#x ",
+		     data->hints, data->extensions);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwre(struct perf_sample *sample)
+{
+	struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" hw: %u cstate: %u sub-cstate: %u ",
+		     data->hw, data->cstate, data->subcstate);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_exstop(struct perf_sample *sample)
+{
+	struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" IP: %u ", data->ip);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_pwrx(struct perf_sample *sample)
+{
+	struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	len = printf(" deepest cstate: %u last cstate: %u wake reason: %#x ",
+		     data->deepest_cstate, data->last_cstate,
+		     data->wake_reason);
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth_cbr(struct perf_sample *sample)
+{
+	struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
+	unsigned int percent, freq;
+	int len;
+
+	if (perf_sample__bad_synth_size(sample, *data))
+		return;
+
+	freq = (le32_to_cpu(data->freq) + 500) / 1000;
+	len = printf(" cbr: %2u freq: %4u MHz ", data->cbr, freq);
+	if (data->max_nonturbo) {
+		percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
+		len += printf("(%3u%%) ", percent);
+	}
+	print_sample_pt_spacing(len);
+}
+
+static void print_sample_synth(struct perf_sample *sample,
+			       struct perf_evsel *evsel)
+{
+	switch (evsel->attr.config) {
+	case PERF_SYNTH_INTEL_PTWRITE:
+		print_sample_synth_ptwrite(sample);
+		break;
+	case PERF_SYNTH_INTEL_MWAIT:
+		print_sample_synth_mwait(sample);
+		break;
+	case PERF_SYNTH_INTEL_PWRE:
+		print_sample_synth_pwre(sample);
+		break;
+	case PERF_SYNTH_INTEL_EXSTOP:
+		print_sample_synth_exstop(sample);
+		break;
+	case PERF_SYNTH_INTEL_PWRX:
+		print_sample_synth_pwrx(sample);
+		break;
+	case PERF_SYNTH_INTEL_CBR:
+		print_sample_synth_cbr(sample);
+		break;
+	default:
+		break;
+	}
+}
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1132,8 +1377,9 @@ static void process_event(struct perf_script *script,
 {
 	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->attr;
+	unsigned int type = output_type(attr->type);
 
-	if (output[attr->type].fields == 0)
+	if (output[type].fields == 0)
 		return;
 
 	print_sample_start(sample, thread, evsel);
@@ -1162,6 +1408,10 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(TRACE))
 		event_format__print(evsel->tp_format, sample->cpu,
 				    sample->raw_data, sample->raw_size);
+
+	if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
+		print_sample_synth(sample, evsel);
+
 	if (PRINT_FIELD(ADDR))
 		print_sample_addr(sample, thread, attr);
 
@@ -1180,16 +1430,18 @@ static void process_event(struct perf_script *script,
 			cursor = &callchain_cursor;
 
 		putchar(cursor ? '\n' : ' ');
-		sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
+		sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, stdout);
 	}
 
 	if (PRINT_FIELD(IREGS))
 		print_sample_iregs(sample, attr);
 
 	if (PRINT_FIELD(BRSTACK))
-		print_sample_brstack(sample);
+		print_sample_brstack(sample, thread, attr);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		print_sample_brstacksym(sample, thread);
+		print_sample_brstacksym(sample, thread, attr);
+	else if (PRINT_FIELD(BRSTACKOFF))
+		print_sample_brstackoff(sample, thread, attr);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
@@ -1325,7 +1577,8 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	evlist = *pevlist;
 	evsel = perf_evlist__last(*pevlist);
 
-	if (evsel->attr.type >= PERF_TYPE_MAX)
+	if (evsel->attr.type >= PERF_TYPE_MAX &&
+	    evsel->attr.type != PERF_TYPE_SYNTH)
 		return 0;
 
 	evlist__for_each_entry(evlist, pos) {
@@ -1727,6 +1980,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 	int rc = 0;
 	char *str = strdup(arg);
 	int type = -1;
+	enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT;
 
 	if (!str)
 		return -ENOMEM;
@@ -1749,6 +2003,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			type = PERF_TYPE_RAW;
 		else if (!strcmp(str, "break"))
 			type = PERF_TYPE_BREAKPOINT;
+		else if (!strcmp(str, "synth"))
+			type = OUTPUT_TYPE_SYNTH;
 		else {
 			fprintf(stderr, "Invalid event type in field string.\n");
 			rc = -EINVAL;
@@ -1772,23 +2028,44 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			goto out;
 		}
 
+		/* Don't override defaults for +- */
+		if (strchr(str, '+') || strchr(str, '-'))
+			goto parse;
+
 		if (output_set_by_user())
 			pr_warning("Overriding previous field request for all events.\n");
 
-		for (j = 0; j < PERF_TYPE_MAX; ++j) {
+		for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 			output[j].fields = 0;
 			output[j].user_set = true;
 			output[j].wildcard_set = true;
 		}
 	}
 
+parse:
 	for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) {
+		if (*tok == '+') {
+			if (change == SET)
+				goto out_badmix;
+			change = ADD;
+			tok++;
+		} else if (*tok == '-') {
+			if (change == SET)
+				goto out_badmix;
+			change = REMOVE;
+			tok++;
+		} else {
+			if (change != SET && change != DEFAULT)
+				goto out_badmix;
+			change = SET;
+		}
+
 		for (i = 0; i < imax; ++i) {
 			if (strcmp(tok, all_output_options[i].str) == 0)
 				break;
 		}
 		if (i == imax && strcmp(tok, "flags") == 0) {
-			print_flags = true;
+			print_flags = change == REMOVE ? false : true;
 			continue;
 		}
 		if (i == imax) {
@@ -1801,12 +2078,16 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			/* add user option to all events types for
 			 * which it is valid
 			 */
-			for (j = 0; j < PERF_TYPE_MAX; ++j) {
+			for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
 				if (output[j].invalid_fields & all_output_options[i].field) {
 					pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
 						   all_output_options[i].str, event_type(j));
-				} else
-					output[j].fields |= all_output_options[i].field;
+				} else {
+					if (change == REMOVE)
+						output[j].fields &= ~all_output_options[i].field;
+					else
+						output[j].fields |= all_output_options[i].field;
+				}
 			}
 		} else {
 			if (output[type].invalid_fields & all_output_options[i].field) {
@@ -1826,7 +2107,11 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 				 "Events will not be displayed.\n", event_type(type));
 		}
 	}
+	goto out;
 
+out_badmix:
+	fprintf(stderr, "Cannot mix +-field with overridden fields\n");
+	rc = -EINVAL;
 out:
 	free(str);
 	return rc;
@@ -2444,10 +2729,11 @@ int cmd_script(int argc, const char **argv)
 		     symbol__config_symfs),
 	OPT_CALLBACK('F', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. "
-		     "Valid types: hw,sw,trace,raw. "
+		     "+field to add and -field to remove."
+		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn",
+		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
@@ -2494,6 +2780,8 @@ int cmd_script(int argc, const char **argv)
 			"Enable kernel symbol demangling"),
 	OPT_STRING(0, "time", &script.time_str, "str",
 		   "Time span of interest (start,stop)"),
+	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+		    "Show inline function"),
 	OPT_END()
 	};
 	const char * const script_subcommands[] = { "record", "report", NULL };
@@ -2704,6 +2992,7 @@ int cmd_script(int argc, const char **argv)
 		err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
 		if (err < 0)
 			goto out_delete;
+		itrace_synth_opts.cpu_bitmap = cpu_bitmap;
 	}
 
 	if (!no_callchain)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a935b50..48ac53b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c

@@ -86,6 +86,7 @@
 #define DEFAULT_SEPARATOR	" "
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
 
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -137,6 +146,8 @@ static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -625,14 +636,14 @@ static int __run_perf_stat(int argc, const char **argv)
 	}
 
 	if (perf_evlist__apply_filters(evsel_list, &counter)) {
-		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			counter->filter, perf_evsel__name(counter), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		return -1;
 	}
 
 	if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 		      err_term->val.drv_cfg, perf_evsel__name(counter), errno,
 		      str_error_r(errno, msg, sizeof(msg)));
 		return -1;
@@ -1578,6 +1589,7 @@ static void print_header(int argc, const char **argv)
 static void print_footer(void)
 {
 	FILE *output = stat_config.output;
+	int n;
 
 	if (!null_run)
 		fprintf(output, "\n");
@@ -1590,7 +1602,9 @@ static void print_footer(void)
 	}
 	fprintf(output, "\n\n");
 
-	if (print_free_counters_hint)
+	if (print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
 		fprintf(output,
 "Some events weren't counted. Try disabling the NMI watchdog:\n"
 "	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1779,6 +1793,8 @@ static const struct option stat_options[] = {
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
 	OPT_END()
 };
 
@@ -2157,6 +2173,39 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (topdown_run) {
 		char *str = NULL;
 		bool warn = false;
@@ -2739,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
 	perf_evlist__delete(evsel_list);
 	return status;
 }

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 10b6362..6052376 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__disassemble(sym, map, NULL, 0);
+	err = symbol__disassemble(sym, map, NULL, 0, NULL);
 	if (err == 0) {
 out_assign:
 		top->sym_filter_entry = he;
@@ -958,7 +958,7 @@ static int __cmd_top(struct perf_top *top)
 
 	ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
 	if (ret) {
-		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 			err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
 		goto out_delete;

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d014350..4b2a5d2 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c

@@ -681,6 +681,10 @@ static struct syscall_fmt {
 	{ .name	    = "mlockall",   .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "mmap",	    .hexret = true,
+/* The standard mmap maps to old_mmap on s390x */
+#if defined(__s390x__)
+	.alias = "old_mmap",
+#endif
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 			     [2] = SCA_MMAP_PROT, /* prot */
 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },

diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index e9651a9..cf36de7 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c

@@ -304,7 +304,7 @@ jvmti_close(void *agent)
 	FILE *fp = agent;
 
 	if (!fp) {
-		warnx("jvmti: incalid fd in close_agent");
+		warnx("jvmti: invalid fd in close_agent");
 		return -1;
 	}
 

diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
index bedf5d0..c53a41f 100644
--- a/tools/perf/jvmti/jvmti_agent.h
+++ b/tools/perf/jvmti/jvmti_agent.h

@@ -5,8 +5,6 @@
 #include <stdint.h>
 #include <jvmti.h>
 
-#define __unused __attribute__((unused))
-
 #if defined(__cplusplus)
 extern "C" {
 #endif

diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index 5612641..6d71090 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c

@@ -1,3 +1,4 @@
+#include <linux/compiler.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <string.h>
@@ -238,7 +239,7 @@ code_generated_cb(jvmtiEnv *jvmti,
 }
 
 JNIEXPORT jint JNICALL
-Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused)
 {
 	jvmtiEventCallbacks cb;
 	jvmtiCapabilities caps1;
@@ -313,7 +314,7 @@ Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
 }
 
 JNIEXPORT void JNICALL
-Agent_OnUnload(JavaVM *jvm __unused)
+Agent_OnUnload(JavaVM *jvm __maybe_unused)
 {
 	int ret;
 

diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 9213a12..999a4e8 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build

@@ -2,7 +2,7 @@
 
 jevents-y	+= json.o jsmn.o jevents.o
 pmu-events-y	+= pmu-events.o
-JDIR		=  pmu-events/arch/$(ARCH)
+JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\
 			find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
 #
@@ -10,4 +10,4 @@
 # directory and create tables in pmu-events.c.
 #
 $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
-	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+	$(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index baa073f..bd0aabb 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c

@@ -48,10 +48,6 @@
 #include "json.h"
 #include "jevents.h"
 
-#ifndef __maybe_unused
-#define __maybe_unused                  __attribute__((unused))
-#endif
-
 int verbose;
 char *prog;
 

diff --git a/tools/perf/scripts/python/bin/intel-pt-events-record b/tools/perf/scripts/python/bin/intel-pt-events-record
new file mode 100644
index 0000000..10fe2b6
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-record

@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#
+# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs
+# to be specified with appropriate config terms.
+#
+if ! echo "$@" | grep -q intel_pt ; then
+	echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/"
+	echo "and for power events it probably needs to be system wide i.e. -a option"
+	echo "For example: -a -e intel_pt/pwr_evt,branch=0/ sleep 1"
+	exit 1
+fi
+perf record $@

diff --git a/tools/perf/scripts/python/bin/intel-pt-events-report b/tools/perf/scripts/python/bin/intel-pt-events-report
new file mode 100644
index 0000000..9a9c92f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-report

@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: print Intel PT Power Events and PTWRITE
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py
\ No newline at end of file

diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
new file mode 100644
index 0000000..b19172d
--- /dev/null
+++ b/tools/perf/scripts/python/intel-pt-events.py

@@ -0,0 +1,128 @@
+# intel-pt-events.py: Print Intel PT Power Events and PTWRITE
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+def trace_begin():
+	print "Intel PT Power Events and PTWRITE"
+
+def trace_end():
+	print "End"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_ptwrite(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	flags = data[0]
+	payload = data[1]
+	exact_ip = flags & 1
+	print "IP: %u payload: %#x" % (exact_ip, payload),
+
+def print_cbr(raw_buf):
+	data = struct.unpack_from("<BBBBII", raw_buf)
+	cbr = data[0]
+	f = (data[4] + 500) / 1000
+	p = ((cbr * 1000 / data[2]) + 5) / 10
+	print "%3u  freq: %4u MHz  (%3u%%)" % (cbr, f, p),
+
+def print_mwait(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hints = payload & 0xff
+	extensions = (payload >> 32) & 0x3
+	print "hints: %#x extensions: %#x" % (hints, extensions),
+
+def print_pwre(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	hw = (payload >> 7) & 1
+	cstate = (payload >> 12) & 0xf
+	subcstate = (payload >> 8) & 0xf
+	print "hw: %u cstate: %u sub-cstate: %u" % (hw, cstate, subcstate),
+
+def print_exstop(raw_buf):
+	data = struct.unpack_from("<I", raw_buf)
+	flags = data[0]
+	exact_ip = flags & 1
+	print "IP: %u" % (exact_ip),
+
+def print_pwrx(raw_buf):
+	data = struct.unpack_from("<IQ", raw_buf)
+	payload = data[1]
+	deepest_cstate = payload & 0xf
+	last_cstate = (payload >> 4) & 0xf
+	wake_reason = (payload >> 8) & 0xf
+	print "deepest cstate: %u last cstate: %u wake reason: %#x" % (deepest_cstate, last_cstate, wake_reason),
+
+def print_common_start(comm, sample, name):
+	ts = sample["time"]
+	cpu = sample["cpu"]
+	pid = sample["pid"]
+	tid = sample["tid"]
+	print "%16s %5u/%-5u [%03u] %9u.%09u %7s:" % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name),
+
+def print_common_ip(sample, symbol, dso):
+	ip = sample["ip"]
+	print "%16x %s (%s)" % (ip, symbol, dso)
+
+def process_event(param_dict):
+        event_attr = param_dict["attr"]
+        sample     = param_dict["sample"]
+        raw_buf    = param_dict["raw_buf"]
+        comm       = param_dict["comm"]
+        name       = param_dict["ev_name"]
+
+        # Symbol and dso info are not always resolved
+        if (param_dict.has_key("dso")):
+                dso = param_dict["dso"]
+        else:
+                dso = "[unknown]"
+
+        if (param_dict.has_key("symbol")):
+                symbol = param_dict["symbol"]
+        else:
+                symbol = "[unknown]"
+
+	if name == "ptwrite":
+		print_common_start(comm, sample, name)
+		print_ptwrite(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "cbr":
+		print_common_start(comm, sample, name)
+		print_cbr(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "mwait":
+		print_common_start(comm, sample, name)
+		print_mwait(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwre":
+		print_common_start(comm, sample, name)
+		print_pwre(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "exstop":
+		print_common_start(comm, sample, name)
+		print_exstop(raw_buf)
+		print_common_ip(sample, symbol, dso)
+	elif name == "pwrx":
+		print_common_start(comm, sample, name)
+		print_pwrx(raw_buf)
+		print_common_ip(sample, symbol, dso)

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index af58ebc..84222bd 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build

@@ -75,7 +75,7 @@
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 

diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 0dd77494..0e77b2c 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c

@@ -18,6 +18,7 @@
  * permissions. All the event text files are stored there.
  */
 
+#include <debug.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
@@ -29,14 +30,11 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include "../perf.h"
-#include "util.h"
 #include <subcmd/exec-cmd.h>
 #include "tests.h"
 
 #define ENV "PERF_TEST_ATTR"
 
-extern int verbose;
-
 static char *dir;
 
 void test_attr__init(void)
@@ -138,8 +136,10 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
 {
 	int errno_saved = errno;
 
-	if (store_event(attr, pid, cpu, fd, group_fd, flags))
-		die("test attr FAILED");
+	if (store_event(attr, pid, cpu, fd, group_fd, flags)) {
+		pr_err("test attr FAILED");
+		exit(128);
+	}
 
 	errno = errno_saved;
 }

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index 1091bd4..cdf21a9 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py

@@ -16,6 +16,13 @@
     def getMsg(self):
         return '\'%s\' - %s' % (self.test.path, self.msg)
 
+class Notest(Exception):
+    def __init__(self, test, arch):
+        self.arch = arch
+        self.test = test
+    def getMsg(self):
+        return '[%s] \'%s\'' % (self.arch, self.test.path)
+
 class Unsup(Exception):
     def __init__(self, test):
         self.test = test
@@ -112,6 +119,9 @@
 #     'command' - perf command name
 #     'args'    - special command arguments
 #     'ret'     - expected command return value (0 by default)
+#     'arch'    - architecture specific test (optional)
+#                 comma separated list, ! at the beginning
+#                 negates it.
 #
 # [eventX:base]
 #   - one or multiple instances in file
@@ -134,6 +144,12 @@
         except:
             self.ret  = 0
 
+        try:
+            self.arch  = parser.get('config', 'arch')
+            log.warning("test limitation '%s'" % self.arch)
+        except:
+            self.arch  = ''
+
         self.expect   = {}
         self.result   = {}
         log.debug("  loading expected events");
@@ -145,6 +161,31 @@
         else:
             return True
 
+    def skip_test(self, myarch):
+        # If architecture not set always run test
+        if self.arch == '':
+            # log.warning("test for arch %s is ok" % myarch)
+            return False
+
+        # Allow multiple values in assignment separated by ','
+        arch_list = self.arch.split(',')
+
+        # Handle negated list such as !s390x,ppc
+        if arch_list[0][0] == '!':
+            arch_list[0] = arch_list[0][1:]
+            log.warning("excluded architecture list %s" % arch_list)
+            for arch_item in arch_list:
+                # log.warning("test for %s arch is %s" % (arch_item, myarch))
+                if arch_item == myarch:
+                    return True
+            return False
+
+        for arch_item in arch_list:
+            # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch))
+            if arch_item == myarch:
+                return False
+        return True
+
     def load_events(self, path, events):
         parser_event = ConfigParser.SafeConfigParser()
         parser_event.read(path)
@@ -168,6 +209,11 @@
             events[section] = e
 
     def run_cmd(self, tempdir):
+        junk1, junk2, junk3, junk4, myarch = (os.uname())
+
+        if self.skip_test(myarch):
+            raise Notest(self, myarch)
+
         cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
               self.perf, self.command, tempdir, self.args)
         ret = os.WEXITSTATUS(os.system(cmd))
@@ -265,6 +311,8 @@
             Test(f, options).run()
         except Unsup, obj:
             log.warning("unsupp  %s" % obj.getMsg())
+        except Notest, obj:
+            log.warning("skipped %s" % obj.getMsg())
 
 def setup_log(verbose):
     global log

diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index e7664fe..39bbb97 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c

@@ -62,8 +62,7 @@ static void __test_function(volatile long *ptr)
 }
 #endif
 
-__attribute__ ((noinline))
-static int test_function(void)
+static noinline int test_function(void)
 {
 	__test_function(&the_var);
 	the_var++;
@@ -288,3 +287,17 @@ int test__bp_signal(int subtest __maybe_unused)
 	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
 		TEST_OK : TEST_FAIL;
 }
+
+bool test__bp_signal_is_supported(void)
+{
+/*
+ * The powerpc so far does not have support to even create
+ * instruction breakpoint using the perf event interface.
+ * Once it's there we can release this.
+ */
+#ifdef __powerpc__
+	return false;
+#else
+	return true;
+#endif
+}

diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index 89f92fa..3b1ac6f 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c

@@ -28,8 +28,7 @@
 
 static int overflows;
 
-__attribute__ ((noinline))
-static int test_function(void)
+static noinline int test_function(void)
 {
 	return time(NULL);
 }

diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index 7230e62..b4ebc75 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c

@@ -10,6 +10,15 @@
 
 #include <uapi/linux/fs.h>
 
+/*
+ * If CONFIG_PROFILE_ALL_BRANCHES is selected,
+ * 'if' is redefined after include kernel header.
+ * Recover 'if' for BPF object code.
+ */
+#ifdef if
+# undef if
+#endif
+
 #define FMODE_READ		0x1
 #define FMODE_WRITE		0x2
 

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9e08d29..3ccfd58 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c

@@ -97,10 +97,12 @@ static struct test generic_tests[] = {
 	{
 		.desc = "Breakpoint overflow signal handler",
 		.func = test__bp_signal,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Breakpoint overflow sampling",
 		.func = test__bp_signal_overflow,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Number of exit events of a simple workload",
@@ -401,6 +403,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 		if (!perf_test__matches(t, curr, argc, argv))
 			continue;
 
+		if (t->is_supported && !t->is_supported()) {
+			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
+			continue;
+		}
+
 		pr_info("%2d: %-*s:", i, width, t->desc);
 
 		if (intlist__find(skiplist, i)) {

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 1f14e76..94b7c7b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c

@@ -229,6 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	unsigned char buf2[BUFSZ];
 	size_t ret_len;
 	u64 objdump_addr;
+	const char *objdump_name;
+	char decomp_name[KMOD_DECOMP_LEN];
 	int ret;
 
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
@@ -289,9 +291,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		state->done[state->done_cnt++] = al.map->start;
 	}
 
+	objdump_name = al.map->dso->long_name;
+	if (dso__needs_decompress(al.map->dso)) {
+		if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
+						 decomp_name,
+						 sizeof(decomp_name)) < 0) {
+			pr_debug("decompression failed\n");
+			return -1;
+		}
+
+		objdump_name = decomp_name;
+	}
+
 	/* Read the object code using objdump */
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
-	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
+	ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
+
+	if (dso__needs_decompress(al.map->dso))
+		unlink(objdump_name);
+
 	if (ret > 0) {
 		/*
 		 * The kernel maps are inaccurate - assume objdump is right in

diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index dfe5c89..3e56d08 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c

@@ -76,8 +76,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	return strcmp((const char *) symbol, funcs[idx]);
 }
 
-__attribute__ ((noinline))
-static int unwind_thread(struct thread *thread)
+static noinline int unwind_thread(struct thread *thread)
 {
 	struct perf_sample sample;
 	unsigned long cnt = 0;
@@ -108,8 +107,7 @@ static int unwind_thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-__attribute__ ((noinline))
-static int compare(void *p1, void *p2)
+static noinline int compare(void *p1, void *p2)
 {
 	/* Any possible value should be 'thread' */
 	struct thread *thread = *(struct thread **)p1;
@@ -128,8 +126,7 @@ static int compare(void *p1, void *p2)
 	return p1 - p2;
 }
 
-__attribute__ ((noinline))
-static int krava_3(struct thread *thread)
+static noinline int krava_3(struct thread *thread)
 {
 	struct thread *array[2] = {thread, thread};
 	void *fp = &bsearch;
@@ -147,14 +144,12 @@ static int krava_3(struct thread *thread)
 	return global_unwind_retval;
 }
 
-__attribute__ ((noinline))
-static int krava_2(struct thread *thread)
+static noinline int krava_2(struct thread *thread)
 {
 	return krava_3(thread);
 }
 
-__attribute__ ((noinline))
-static int krava_1(struct thread *thread)
+static noinline int krava_1(struct thread *thread)
 {
 	return krava_2(thread);
 }

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7fad885..812a053 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c

@@ -1810,17 +1810,6 @@ static int test_pmu_events(void)
 	return ret;
 }
 
-static void debug_warn(const char *warn, va_list params)
-{
-	char msg[1024];
-
-	if (verbose <= 0)
-		return;
-
-	vsnprintf(msg, sizeof(msg), warn, params);
-	fprintf(stderr, " Warning: %s\n", msg);
-}
-
 int test__parse_events(int subtest __maybe_unused)
 {
 	int ret1, ret2 = 0;
@@ -1832,8 +1821,6 @@ do {							\
 		ret2 = ret1;				\
 } while (0)
 
-	set_warning_routine(debug_warn);
-
 	TEST_EVENTS(test__events);
 
 	if (test_pmu())

diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 32873ec..cf00eba 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c

@@ -83,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused)
 
 	evsel = perf_evlist__first(evlist);
 	evsel->attr.task = 1;
-	evsel->attr.sample_freq = 0;
+	evsel->attr.sample_freq = 1;
 	evsel->attr.inherit = 0;
 	evsel->attr.watermark = 0;
 	evsel->attr.wakeup_events = 1;

diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 6318596..5773638 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h

@@ -34,6 +34,7 @@ struct test {
 		int (*get_nr)(void);
 		const char *(*get_desc)(int subtest);
 	} subtest;
+	bool (*is_supported)(void);
 };
 
 /* Tests */
@@ -99,6 +100,8 @@ const char *test__clang_subtest_get_desc(int subtest);
 int test__clang_subtest_get_nr(void);
 int test__unit_number__scnprint(int subtest);
 
+bool test__bp_signal_is_supported(void);
+
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d990ad0..27f41f2 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c

@@ -46,12 +46,15 @@ static struct annotate_browser_opt {
 	.jump_arrows	= true,
 };
 
+struct arch;
+
 struct annotate_browser {
 	struct ui_browser b;
 	struct rb_root	  entries;
 	struct rb_node	  *curr_hot;
 	struct disasm_line  *selection;
 	struct disasm_line  **offsets;
+	struct arch	    *arch;
 	int		    nr_events;
 	u64		    start;
 	int		    nr_asm_entries;
@@ -125,43 +128,57 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	int i, pcnt_width = annotate_browser__pcnt_width(ab);
 	double percent_max = 0.0;
 	char bf[256];
+	bool show_title = false;
 
 	for (i = 0; i < ab->nr_events; i++) {
 		if (bdl->samples[i].percent > percent_max)
 			percent_max = bdl->samples[i].percent;
 	}
 
+	if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) {
+		if (ab->have_cycles) {
+			if (dl->ipc == 0.0 && dl->cycles == 0)
+				show_title = true;
+		} else
+			show_title = true;
+	}
+
 	if (dl->offset != -1 && percent_max != 0.0) {
-		if (percent_max != 0.0) {
-			for (i = 0; i < ab->nr_events; i++) {
-				ui_browser__set_percent_color(browser,
-							bdl->samples[i].percent,
-							current_entry);
-				if (annotate_browser__opts.show_total_period) {
-					ui_browser__printf(browser, "%6" PRIu64 " ",
-							   bdl->samples[i].nr);
-				} else {
-					ui_browser__printf(browser, "%6.2f ",
-							   bdl->samples[i].percent);
-				}
+		for (i = 0; i < ab->nr_events; i++) {
+			ui_browser__set_percent_color(browser,
+						bdl->samples[i].percent,
+						current_entry);
+			if (annotate_browser__opts.show_total_period) {
+				ui_browser__printf(browser, "%6" PRIu64 " ",
+						   bdl->samples[i].nr);
+			} else {
+				ui_browser__printf(browser, "%6.2f ",
+						   bdl->samples[i].percent);
 			}
-		} else {
-			ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
-		ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
+
+		if (!show_title)
+			ui_browser__write_nstring(browser, " ", 7 * ab->nr_events);
+		else
+			ui_browser__printf(browser, "%*s", 7, "Percent");
 	}
 	if (ab->have_cycles) {
 		if (dl->ipc)
 			ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc);
-		else
+		else if (!show_title)
 			ui_browser__write_nstring(browser, " ", IPC_WIDTH);
+		else
+			ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC");
+
 		if (dl->cycles)
 			ui_browser__printf(browser, "%*" PRIu64 " ",
 					   CYCLES_WIDTH - 1, dl->cycles);
-		else
+		else if (!show_title)
 			ui_browser__write_nstring(browser, " ", CYCLES_WIDTH);
+		else
+			ui_browser__printf(browser, "%*s ", CYCLES_WIDTH - 1, "Cycle");
 	}
 
 	SLsmg_write_char(' ');
@@ -1056,7 +1073,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 		  (nr_pcnt - 1);
 	}
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), sizeof_bdl);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  sizeof_bdl, &browser.arch);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));

diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index e99ba86..d903fd4 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c

@@ -168,7 +168,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	if (map->dso->annotate_warned)
 		return -1;
 
-	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0);
+	err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				  0, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 59addd5..ddb2c6f 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -210,6 +210,8 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
 			return 0;
 
 		ret = b->callchain->max_depth - a->callchain->max_depth;
+		if (callchain_param.order == ORDER_CALLER)
+			ret = -ret;
 	}
 	return ret;
 }

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 683f834..be1caab 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c

@@ -239,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
 
-	if (c++ != NULL)
+	/*
+	 * skip over possible up to 2 operands to get to address, e.g.:
+	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+	 */
+	if (c++ != NULL) {
 		ops->target.addr = strtoull(c, NULL, 16);
-	else
+		if (!ops->target.addr) {
+			c = strchr(c, ',');
+			if (c++ != NULL)
+				ops->target.addr = strtoull(c, NULL, 16);
+		}
+	} else {
 		ops->target.addr = strtoull(ops->raw, NULL, 16);
+	}
 
 	if (s++ != NULL) {
 		ops->target.offset = strtoull(s, NULL, 16);
@@ -257,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
+	const char *c = strchr(ops->raw, ',');
+
 	if (!ops->target.addr || ops->target.offset < 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
+	if (c != NULL) {
+		const char *c2 = strchr(c + 1, ',');
+
+		/* check for 3-op insn */
+		if (c2 != NULL)
+			c = c2;
+		c++;
+
+		/* mirror arch objdump's space-after-comma style */
+		if (*c == ' ')
+			c++;
+	}
+
+	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64,
+			 ins->name, c ? c - ops->raw : 0, ops->raw,
+			 ops->target.offset);
 }
 
 static struct ins_ops jump_ops = {
@@ -1294,6 +1321,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	char linkname[PATH_MAX];
 	char *build_id_filename;
 	char *build_id_path = NULL;
+	char *pos;
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso))
@@ -1313,7 +1341,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	if (!build_id_path)
 		return -1;
 
-	dirname(build_id_path);
+	/*
+	 * old style build-id cache has name of XX/XXXXXXX.. while
+	 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+	 * extract the build-id part of dirname in the new style only.
+	 */
+	pos = strrchr(build_id_path, '/');
+	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+		dirname(build_id_path);
 
 	if (dso__is_kcore(dso) ||
 	    readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
@@ -1344,7 +1379,9 @@ static const char *annotate__norm_arch(const char *arch_name)
 	return normalize_arch((char *)arch_name);
 }
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize)
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch)
 {
 	struct dso *dso = map->dso;
 	char command[PATH_MAX * 2];
@@ -1370,6 +1407,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	if (arch == NULL)
 		return -ENOTSUP;
 
+	if (parch)
+		*parch = arch;
+
 	if (arch->init) {
 		err = arch->init(arch);
 		if (err) {
@@ -1396,31 +1436,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 				sizeof(symfs_filename));
 		}
 	} else if (dso__needs_decompress(dso)) {
-		char tmp[PATH_MAX];
-		struct kmod_path m;
-		int fd;
-		bool ret;
+		char tmp[KMOD_DECOMP_LEN];
 
-		if (kmod_path__parse_ext(&m, symfs_filename))
-			goto out;
-
-		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
-
-		fd = mkstemp(tmp);
-		if (fd < 0) {
-			free(m.ext);
-			goto out;
-		}
-
-		ret = decompress_to_file(m.ext, symfs_filename, fd);
-
-		if (ret)
-			pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename);
-
-		free(m.ext);
-		close(fd);
-
-		if (!ret)
+		if (dso__decompress_kmodule_path(dso, symfs_filename,
+						 tmp, sizeof(tmp)) < 0)
 			goto out;
 
 		strcpy(symfs_filename, tmp);
@@ -1429,7 +1448,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand",
+		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
 		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
@@ -1887,7 +1906,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
-	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), 0) < 0)
+	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel),
+				0, NULL) < 0)
 		return -1;
 
 	len = symbol__size(sym);

diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 948aa8e..2105503 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h

@@ -158,7 +158,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 int symbol__alloc_hist(struct symbol *sym);
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
-int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize);
+int symbol__disassemble(struct symbol *sym, struct map *map,
+			const char *arch_name, size_t privsize,
+			struct arch **parch);
 
 enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0,

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 0daf63b..5547457 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c

@@ -322,6 +322,13 @@ static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
 	return auxtrace_queues__add_buffer(queues, idx, buffer);
 }
 
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+	return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
 int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 			       struct perf_session *session,
 			       union perf_event *event, off_t data_offset,
@@ -331,6 +338,9 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 	unsigned int idx;
 	int err;
 
+	if (filter_cpu(session, event->auxtrace.cpu))
+		return 0;
+
 	buffer = zalloc(sizeof(struct auxtrace_buffer));
 	if (!buffer)
 		return -ENOMEM;
@@ -947,6 +957,8 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
 	synth_opts->instructions = true;
 	synth_opts->branches = true;
 	synth_opts->transactions = true;
+	synth_opts->ptwrites = true;
+	synth_opts->pwr_events = true;
 	synth_opts->errors = true;
 	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
 	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
@@ -1030,6 +1042,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'x':
 			synth_opts->transactions = true;
 			break;
+		case 'w':
+			synth_opts->ptwrites = true;
+			break;
+		case 'p':
+			synth_opts->pwr_events = true;
+			break;
 		case 'e':
 			synth_opts->errors = true;
 			break;

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 9f0de72..33b5e6c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h

@@ -59,6 +59,8 @@ enum itrace_period_type {
  * @instructions: whether to synthesize 'instructions' events
  * @branches: whether to synthesize 'branches' events
  * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
  * @errors: whether to synthesize decoder error events
  * @dont_decode: whether to skip decoding entirely
  * @log: write a decoding log
@@ -72,6 +74,7 @@ enum itrace_period_type {
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
  * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
  */
 struct itrace_synth_opts {
 	bool			set;
@@ -79,6 +82,8 @@ struct itrace_synth_opts {
 	bool			instructions;
 	bool			branches;
 	bool			transactions;
+	bool			ptwrites;
+	bool			pwr_events;
 	bool			errors;
 	bool			dont_decode;
 	bool			log;
@@ -92,6 +97,7 @@ struct itrace_synth_opts {
 	unsigned long long	period;
 	enum itrace_period_type	period_type;
 	unsigned long		initial_skip;
+	unsigned long		*cpu_bitmap;
 };
 
 /**

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 168cc49..e0148b0 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -278,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
 	return bf;
 }
 
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
-{
-	char *id_name = NULL, *ch;
-	struct stat sb;
-	char sbuild_id[SBUILD_ID_SIZE];
-
-	if (!dso->has_build_id)
-		goto err;
-
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-	id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
-	if (!id_name)
-		goto err;
-	if (access(id_name, F_OK))
-		goto err;
-	if (lstat(id_name, &sb) == -1)
-		goto err;
-	if ((size_t)sb.st_size > size - 1)
-		goto err;
-	if (readlink(id_name, bf, size - 1) < 0)
-		goto err;
-
-	bf[sb.st_size] = '\0';
-
-	/*
-	 * link should be:
-	 * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
-	 */
-	ch = strrchr(bf, '/');
-	if (!ch)
-		goto err;
-	if (ch - 3 < bf)
-		goto err;
-
-	free(id_name);
-	return strncmp(".ko", ch - 3, 3) == 0;
-err:
-	pr_err("Invalid build id: %s\n", id_name ? :
-					 dso->long_name ? :
-					 dso->short_name ? :
-					 "[unknown]");
-	free(id_name);
-	return false;
-}
-
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\

diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 8a89b19..96690a5 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h

@@ -17,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
 				    size_t size);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,

diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 0328f29..0175765 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h

@@ -5,6 +5,7 @@
 #include <subcmd/pager.h>
 #include "../ui/ui.h"
 
+#include <linux/compiler.h>
 #include <linux/string.h>
 
 #define CMD_EXEC_PATH "--exec-path"
@@ -24,6 +25,6 @@ static inline int is_absolute_path(const char *path)
 	return path[0] == '/';
 }
 
-char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __printf(1, 2);
 
 #endif /* __PERF_CACHE_H */

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 81fc29a..b4204b4 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -621,15 +621,20 @@ enum match_result {
 static enum match_result match_chain_srcline(struct callchain_cursor_node *node,
 					     struct callchain_list *cnode)
 {
-	char *left = get_srcline(cnode->ms.map->dso,
-				 map__rip_2objdump(cnode->ms.map, cnode->ip),
-				 cnode->ms.sym, true, false);
-	char *right = get_srcline(node->map->dso,
-				  map__rip_2objdump(node->map, node->ip),
-				  node->sym, true, false);
+	char *left = NULL;
+	char *right = NULL;
 	enum match_result ret = MATCH_EQ;
 	int cmp;
 
+	if (cnode->ms.map)
+		left = get_srcline(cnode->ms.map->dso,
+				 map__rip_2objdump(cnode->ms.map, cnode->ip),
+				 cnode->ms.sym, true, false);
+	if (node->map)
+		right = get_srcline(node->map->dso,
+				  map__rip_2objdump(node->map, node->ip),
+				  node->sym, true, false);
+
 	if (left && right)
 		cmp = strcmp(left, right);
 	else if (!left && right)

diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 8d724f0..31a7dea 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c

@@ -335,32 +335,42 @@ static int perf_parse_long(const char *value, long *ret)
 	return 0;
 }
 
-static void die_bad_config(const char *name)
+static void bad_config(const char *name)
 {
 	if (config_file_name)
-		die("bad config value for '%s' in %s", name, config_file_name);
-	die("bad config value for '%s'", name);
+		pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+	else
+		pr_warning("bad config value for '%s', ignoring...\n", name);
 }
 
-u64 perf_config_u64(const char *name, const char *value)
+int perf_config_u64(u64 *dest, const char *name, const char *value)
 {
 	long long ret = 0;
 
-	if (!perf_parse_llong(value, &ret))
-		die_bad_config(name);
-	return (u64) ret;
+	if (!perf_parse_llong(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+
+	*dest = ret;
+	return 0;
 }
 
-int perf_config_int(const char *name, const char *value)
+int perf_config_int(int *dest, const char *name, const char *value)
 {
 	long ret = 0;
-	if (!perf_parse_long(value, &ret))
-		die_bad_config(name);
-	return ret;
+	if (!perf_parse_long(value, &ret)) {
+		bad_config(name);
+		return -1;
+	}
+	*dest = ret;
+	return 0;
 }
 
 static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
 {
+	int ret;
+
 	*is_bool = 1;
 	if (!value)
 		return 1;
@@ -371,7 +381,7 @@ static int perf_config_bool_or_int(const char *name, const char *value, int *is_
 	if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
 		return 0;
 	*is_bool = 0;
-	return perf_config_int(name, value);
+	return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
 }
 
 int perf_config_bool(const char *name, const char *value)
@@ -657,8 +667,7 @@ static int perf_config_set__init(struct perf_config_set *set)
 
 	user_config = strdup(mkpath("%s/.perfconfig", home));
 	if (user_config == NULL) {
-		warning("Not enough memory to process %s/.perfconfig, "
-			"ignoring it.", home);
+		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
 		goto out;
 	}
 
@@ -671,8 +680,7 @@ static int perf_config_set__init(struct perf_config_set *set)
 	ret = 0;
 
 	if (st.st_uid && (st.st_uid != geteuid())) {
-		warning("File %s not owned by current user or root, "
-			"ignoring it.", user_config);
+		pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
 		goto out_free;
 	}
 
@@ -795,7 +803,8 @@ void perf_config_set__delete(struct perf_config_set *set)
  */
 int config_error_nonbool(const char *var)
 {
-	return error("Missing value for '%s'", var);
+	pr_err("Missing value for '%s'", var);
+	return -1;
 }
 
 void set_buildid_dir(const char *dir)

diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 1a59a6b..b6bb11f 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h

@@ -27,8 +27,8 @@ extern const char *config_exclusive_filename;
 typedef int (*config_fn_t)(const char *, const char *, void *);
 int perf_default_config(const char *, const char *, void *);
 int perf_config(config_fn_t fn, void *);
-int perf_config_int(const char *, const char *);
-u64 perf_config_u64(const char *, const char *);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u64(u64 *dest, const char *, const char *);
 int perf_config_bool(const char *, const char *);
 int config_error_nonbool(const char *);
 const char *perf_etc_perfconfig(void);

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 89d5031..3149b70 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c

@@ -1444,10 +1444,8 @@ static int convert__config(const char *var, const char *value, void *cb)
 {
 	struct convert *c = cb;
 
-	if (!strcmp(var, "convert.queue-size")) {
-		c->queue_size = perf_config_u64(var, value);
-		return 0;
-	}
+	if (!strcmp(var, "convert.queue-size"))
+		return perf_config_u64(&c->queue_size, var, value);
 
 	return 0;
 }

diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 8a23ea1..c818bdb 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h

@@ -4,6 +4,7 @@
 
 #include <stdbool.h>
 #include <string.h>
+#include <linux/compiler.h>
 #include "event.h"
 #include "../ui/helpline.h"
 #include "../ui/progress.h"
@@ -40,16 +41,16 @@ extern int debug_data_convert;
 
 #define STRERR_BUFSIZE	128	/* For the buffer size of str_error_r */
 
-int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+int dump_printf(const char *fmt, ...) __printf(1, 2);
 void trace_event(union perf_event *event);
 
-int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
 
 void pr_stat(const char *fmt, ...);
 
-int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index a96a99d..4e7ab61 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c

@@ -248,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso)
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
+static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf)
+{
+	int fd = -1;
+	struct kmod_path m;
+
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (kmod_path__parse_ext(&m, dso->long_name))
+		return -1;
+
+	if (!m.comp)
+		goto out;
+
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		dso->load_errno = errno;
+		goto out;
+	}
+
+	if (!decompress_to_file(m.ext, name, fd)) {
+		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+		close(fd);
+		fd = -1;
+	}
+
+out:
+	free(m.ext);
+	return fd;
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	unlink(tmpbuf);
+	return fd;
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	if (fd < 0) {
+		unlink(tmpbuf);
+		return -1;
+	}
+
+	strncpy(pathname, tmpbuf, len);
+	close(fd);
+	return 0;
+}
+
 /*
  * Parses kernel module specified in @path and updates
  * @m argument like:
@@ -335,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 	return 0;
 }
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine)
+{
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+	/* _KMODULE_COMP should be next to _KMODULE */
+	if (m->kmod && m->comp)
+		dso->symtab_type++;
+
+	dso__set_short_name(dso, strdup(m->name), true);
+}
+
 /*
  * Global list of open DSOs and the counter.
  */
@@ -381,7 +454,7 @@ static int do_open(char *name)
 
 static int __open_dso(struct dso *dso, struct machine *machine)
 {
-	int fd;
+	int fd = -EINVAL;
 	char *root_dir = (char *)"";
 	char *name = malloc(PATH_MAX);
 
@@ -392,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 		root_dir = machine->root_dir;
 
 	if (dso__read_binary_type_filename(dso, dso->binary_type,
-					    root_dir, name, PATH_MAX)) {
-		free(name);
-		return -EINVAL;
-	}
+					    root_dir, name, PATH_MAX))
+		goto out;
 
 	if (!is_regular_file(name))
-		return -EINVAL;
+		goto out;
+
+	if (dso__needs_decompress(dso)) {
+		char newpath[KMOD_DECOMP_LEN];
+		size_t len = sizeof(newpath);
+
+		if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+			fd = -dso->load_errno;
+			goto out;
+		}
+
+		strcpy(name, newpath);
+	}
 
 	fd = do_open(name);
+
+	if (dso__needs_decompress(dso))
+		unlink(name);
+
+out:
 	free(name);
 	return fd;
 }

diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 12350b1..bd061ba 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h

@@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext);
 bool is_kernel_module(const char *pathname, int cpumode);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len);
+
+#define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
 
 struct kmod_path {
 	char *name;
@@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
 #define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine);
+
 /*
  * The dso__data_* external interface provides following functions:
  *   dso__data_get_fd

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 7c3fa1c..9967c87 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h

@@ -252,6 +252,127 @@ enum auxtrace_error_type {
 	PERF_AUXTRACE_ERROR_MAX
 };
 
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH		(INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+	PERF_SYNTH_INTEL_PTWRITE,
+	PERF_SYNTH_INTEL_MWAIT,
+	PERF_SYNTH_INTEL_PWRE,
+	PERF_SYNTH_INTEL_EXSTOP,
+	PERF_SYNTH_INTEL_PWRX,
+	PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data().  It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+	u64	payload;
+};
+
+struct perf_synth_intel_mwait {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	hints		:  8,
+				reserved1	: 24,
+				extensions	:  2,
+				reserved2	: 30;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_pwre {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	reserved1	:  7,
+				hw		:  1,
+				subcstate	:  4,
+				cstate		:  4,
+				reserved2	: 48;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_exstop {
+	u32 padding;
+	union {
+		struct {
+			u32	ip		:  1,
+				reserved	: 31;
+		};
+		u32	flags;
+	};
+};
+
+struct perf_synth_intel_pwrx {
+	u32 padding;
+	u32 reserved;
+	union {
+		struct {
+			u64	deepest_cstate	:  4,
+				last_cstate	:  4,
+				wake_reason	:  4,
+				reserved1	: 52;
+		};
+		u64	payload;
+	};
+};
+
+struct perf_synth_intel_cbr {
+	u32 padding;
+	union {
+		struct {
+			u32	cbr		:  8,
+				reserved1	:  8,
+				max_nonturbo	:  8,
+				reserved2	:  8;
+		};
+		u32	flags;
+	};
+	u32 freq;
+	u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+	return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of

diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 94cea43..8d601fb 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h

@@ -1,6 +1,7 @@
 #ifndef __PERF_EVLIST_H
 #define __PERF_EVLIST_H 1
 
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/refcount.h>
 #include <linux/list.h>
@@ -34,7 +35,7 @@ struct perf_mmap {
 	refcount_t	 refcnt;
 	u64		 prev;
 	struct auxtrace_mmap auxtrace_mmap;
-	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
+	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
 };
 
 static inline size_t

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e4f7902..6f4882f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c

@@ -11,13 +11,17 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/bitops.h>
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <traceevent/event-parse.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
+#include <linux/compiler.h>
 #include <linux/err.h>
 #include <sys/ioctl.h>
 #include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
 #include "asm/bug.h"
 #include "callchain.h"
 #include "cgroup.h"
@@ -273,8 +277,20 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 	struct perf_evsel *evsel;
 
 	event_attr_init(&attr);
+	/*
+	 * Unnamed union member, not supported as struct member named
+	 * initializer in older compilers such as gcc 4.4.7
+	 *
+	 * Just for probing the precise_ip:
+	 */
+	attr.sample_period = 1;
 
 	perf_event_attr__set_max_precise_ip(&attr);
+	/*
+	 * Now let the usual logic to set up the perf_event_attr defaults
+	 * to kick in when we return and before perf_evsel__open() is called.
+	 */
+	attr.sample_period = 0;
 
 	evsel = perf_evsel__new(&attr);
 	if (evsel == NULL)
@@ -1429,7 +1445,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 }
 
 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
-				void *priv __attribute__((unused)))
+				void *priv __maybe_unused)
 {
 	return fprintf(fp, "  %-32s %s\n", name, val);
 }
@@ -2459,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 	return false;
 }
 
+static bool find_process(const char *name)
+{
+	size_t len = strlen(name);
+	DIR *dir;
+	struct dirent *d;
+	int ret = -1;
+
+	dir = opendir(procfs__mountpoint());
+	if (!dir)
+		return false;
+
+	/* Walk through the directory. */
+	while (ret && (d = readdir(dir)) != NULL) {
+		char path[PATH_MAX];
+		char *data;
+		size_t size;
+
+		if ((d->d_type != DT_DIR) ||
+		     !strcmp(".", d->d_name) ||
+		     !strcmp("..", d->d_name))
+			continue;
+
+		scnprintf(path, sizeof(path), "%s/%s/comm",
+			  procfs__mountpoint(), d->d_name);
+
+		if (filename__read_str(path, &data, &size))
+			continue;
+
+		ret = strncmp(name, data, len);
+		free(data);
+	}
+
+	closedir(dir);
+	return ret ? false : true;
+}
+
 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 			      int err, char *msg, size_t size)
 {

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index e415aee..583f3a6 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c

@@ -7,6 +7,7 @@
 #include "map.h"
 #include "strlist.h"
 #include "symbol.h"
+#include "srcline.h"
 
 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
 {
@@ -168,6 +169,38 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
 
+			if (symbol_conf.inline_name && node->map) {
+				struct inline_node *inode;
+
+				addr = map__rip_2objdump(node->map, node->ip),
+				inode = dso__parse_addr_inlines(node->map->dso, addr);
+
+				if (inode) {
+					struct inline_list *ilist;
+
+					list_for_each_entry(ilist, &inode->val, list) {
+						if (print_arrow)
+							printed += fprintf(fp, " <-");
+
+						/* IP is same, just skip it */
+						if (print_ip)
+							printed += fprintf(fp, "%c%16s",
+									   s, "");
+						if (print_sym)
+							printed += fprintf(fp, " %s",
+									   ilist->funcname);
+						if (print_srcline)
+							printed += fprintf(fp, "\n  %s:%d",
+									   ilist->filename,
+									   ilist->line_nr);
+						if (!print_oneline)
+							printed += fprintf(fp, "\n");
+					}
+
+					inline_node__delete(inode);
+				}
+			}
+
 			if (symbol_conf.bt_stop_list &&
 			    node->sym &&
 			    strlist__has_entry(symbol_conf.bt_stop_list,

diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 5980f7d..40789d8 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c

@@ -11,6 +11,7 @@
  * @remark Copyright 2007 OProfile authors
  * @author Philippe Elie
  */
+#include <linux/compiler.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <getopt.h>
@@ -125,7 +126,7 @@ struct debug_line_header {
 	 * and filesize, last entry is followed by en empty string.
 	 */
 	/* follow the first program statement */
-} __attribute__((packed));
+} __packed;
 
 /* DWARF 2 spec talk only about one possible compilation unit header while
  * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
@@ -138,7 +139,7 @@ struct compilation_unit_header {
 	uhalf version;
 	uword debug_abbrev_offset;
 	ubyte pointer_size;
-} __attribute__((packed));
+} __packed;
 
 #define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
 

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 314a071..76ed7d0 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -8,6 +8,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
@@ -841,7 +842,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
 
 /*
  * default get_cpuid(): nothing gets recorded
- * actual implementation must be in arch/$(ARCH)/util/header.c
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
  */
 int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
 {
@@ -1274,7 +1275,7 @@ read_event_desc(struct perf_header *ph, int fd)
 }
 
 static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
-				void *priv __attribute__((unused)))
+				void *priv __maybe_unused)
 {
 	return fprintf(fp, ", %s = %s", name, val);
 }
@@ -1469,8 +1470,16 @@ static int __event_process_build_id(struct build_id_event *bev,
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (!is_kernel_module(filename, cpumode))
-			dso->kernel = dso_type;
+		if (dso_type != DSO_TYPE_USER) {
+			struct kmod_path m = { .name = NULL, };
+
+			if (!kmod_path__parse_name(&m, filename) && m.kmod)
+				dso__set_module_info(dso, &m, machine);
+			else
+				dso->kernel = dso_type;
+
+			free(m.name);
+		}
 
 		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
 				  sbuild_id);

diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index 1c88ad6..15b9530 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c

@@ -12,7 +12,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value,
 				   void *cb __maybe_unused)
 {
 	if (!strcmp(var, "help.autocorrect"))
-		autocorrect = perf_config_int(var,value);
+		return perf_config_int(&autocorrect, var,value);
 
 	return 0;
 }

diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index b2834ac..218ee2b 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c

@@ -866,8 +866,6 @@ static void intel_bts_print_info(u64 *arr, int start, int finish)
 		fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
 }
 
-u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
-
 int intel_bts_process_auxtrace_info(union perf_event *event,
 				    struct perf_session *session)
 {

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7cf7f7a..aa1593c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c

@@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
 	INTEL_PT_STATE_FUP_NO_TIP,
 };
 
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+	switch (pkt_state) {
+	case INTEL_PT_STATE_NO_PSB:
+	case INTEL_PT_STATE_NO_IP:
+	case INTEL_PT_STATE_ERR_RESYNC:
+	case INTEL_PT_STATE_IN_SYNC:
+	case INTEL_PT_STATE_TNT:
+		return true;
+	case INTEL_PT_STATE_TIP:
+	case INTEL_PT_STATE_TIP_PGD:
+	case INTEL_PT_STATE_FUP:
+	case INTEL_PT_STATE_FUP_NO_TIP:
+		return false;
+	default:
+		return true;
+	};
+}
+
 #ifdef INTEL_PT_STRICT
 #define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
@@ -87,11 +106,13 @@ struct intel_pt_decoder {
 	const unsigned char *buf;
 	size_t len;
 	bool return_compression;
+	bool branch_enable;
 	bool mtc_insn;
 	bool pge;
 	bool have_tma;
 	bool have_cyc;
 	bool fixup_last_mtc;
+	bool have_last_ip;
 	uint64_t pos;
 	uint64_t last_ip;
 	uint64_t ip;
@@ -99,6 +120,7 @@ struct intel_pt_decoder {
 	uint64_t timestamp;
 	uint64_t tsc_timestamp;
 	uint64_t ref_timestamp;
+	uint64_t sample_timestamp;
 	uint64_t ret_addr;
 	uint64_t ctc_timestamp;
 	uint64_t ctc_delta;
@@ -119,6 +141,7 @@ struct intel_pt_decoder {
 	int pkt_len;
 	int last_packet_type;
 	unsigned int cbr;
+	unsigned int cbr_seen;
 	unsigned int max_non_turbo_ratio;
 	double max_non_turbo_ratio_fp;
 	double cbr_cyc_to_tsc;
@@ -136,9 +159,18 @@ struct intel_pt_decoder {
 	bool continuous_period;
 	bool overflow;
 	bool set_fup_tx_flags;
+	bool set_fup_ptw;
+	bool set_fup_mwait;
+	bool set_fup_pwre;
+	bool set_fup_exstop;
 	unsigned int fup_tx_flags;
 	unsigned int tx_flags;
+	uint64_t fup_ptw_payload;
+	uint64_t fup_mwait_payload;
+	uint64_t fup_pwre_payload;
+	uint64_t cbr_payload;
 	uint64_t timestamp_insn_cnt;
+	uint64_t sample_insn_cnt;
 	uint64_t stuck_ip;
 	int no_progress;
 	int stuck_ip_prd;
@@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->pgd_ip             = params->pgd_ip;
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
+	decoder->branch_enable      = params->branch_enable;
 
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
@@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
 	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+	decoder->have_last_ip = true;
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 	case INTEL_PT_PAD:
 	case INTEL_PT_VMCS:
 	case INTEL_PT_MNT:
+	case INTEL_PT_PTWRITE:
+	case INTEL_PT_PTWRITE_IP:
 		return 0;
 
 	case INTEL_PT_MTC:
@@ -675,6 +711,12 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 		break;
 
 	case INTEL_PT_TSC:
+		/*
+		 * For now, do not support using TSC packets - refer
+		 * intel_pt_calc_cyc_to_tsc().
+		 */
+		if (data->from_mtc)
+			return 1;
 		timestamp = pkt_info->packet.payload |
 			    (data->timestamp & (0xffULL << 56));
 		if (data->from_mtc && timestamp < data->timestamp &&
@@ -733,6 +775,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
 
 	case INTEL_PT_TIP_PGD:
 	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_EXSTOP:
+	case INTEL_PT_EXSTOP_IP:
+	case INTEL_PT_MWAIT:
+	case INTEL_PT_PWRE:
+	case INTEL_PT_PWRX:
 	case INTEL_PT_OVF:
 	case INTEL_PT_BAD: /* Does not happen */
 	default:
@@ -787,6 +834,14 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
 		.cbr_cyc_to_tsc = 0,
 	};
 
+	/*
+	 * For now, do not support using TSC packets for at least the reasons:
+	 * 1) timing might have stopped
+	 * 2) TSC packets within PSB+ can slip against CYC packets
+	 */
+	if (!from_mtc)
+		return;
+
 	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
 }
 
@@ -898,6 +953,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
 
 	decoder->tot_insn_cnt += insn_cnt;
 	decoder->timestamp_insn_cnt += insn_cnt;
+	decoder->sample_insn_cnt += insn_cnt;
 	decoder->period_insn_cnt += insn_cnt;
 
 	if (err) {
@@ -990,6 +1046,57 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
 	return err;
 }
 
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+	bool ret = false;
+
+	if (decoder->set_fup_tx_flags) {
+		decoder->set_fup_tx_flags = false;
+		decoder->tx_flags = decoder->fup_tx_flags;
+		decoder->state.type = INTEL_PT_TRANSACTION;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.flags = decoder->fup_tx_flags;
+		return true;
+	}
+	if (decoder->set_fup_ptw) {
+		decoder->set_fup_ptw = false;
+		decoder->state.type = INTEL_PT_PTW;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.ptw_payload = decoder->fup_ptw_payload;
+		return true;
+	}
+	if (decoder->set_fup_mwait) {
+		decoder->set_fup_mwait = false;
+		decoder->state.type = INTEL_PT_MWAIT_OP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.mwait_payload = decoder->fup_mwait_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_pwre) {
+		decoder->set_fup_pwre = false;
+		decoder->state.type |= INTEL_PT_PWR_ENTRY;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		decoder->state.pwre_payload = decoder->fup_pwre_payload;
+		ret = true;
+	}
+	if (decoder->set_fup_exstop) {
+		decoder->set_fup_exstop = false;
+		decoder->state.type |= INTEL_PT_EX_STOP;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.flags |= INTEL_PT_FUP_IP;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		ret = true;
+	}
+	return ret;
+}
+
 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 {
 	struct intel_pt_insn intel_pt_insn;
@@ -1003,15 +1110,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 		if (err == INTEL_PT_RETURN)
 			return 0;
 		if (err == -EAGAIN) {
-			if (decoder->set_fup_tx_flags) {
-				decoder->set_fup_tx_flags = false;
-				decoder->tx_flags = decoder->fup_tx_flags;
-				decoder->state.type = INTEL_PT_TRANSACTION;
-				decoder->state.from_ip = decoder->ip;
-				decoder->state.to_ip = 0;
-				decoder->state.flags = decoder->fup_tx_flags;
+			if (intel_pt_fup_event(decoder))
 				return 0;
-			}
 			return err;
 		}
 		decoder->set_fup_tx_flags = false;
@@ -1360,7 +1460,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 
 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
 {
-	unsigned int cbr = decoder->packet.payload;
+	unsigned int cbr = decoder->packet.payload & 0xff;
+
+	decoder->cbr_payload = decoder->packet.payload;
 
 	if (decoder->cbr == cbr)
 		return;
@@ -1417,6 +1519,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 		case INTEL_PT_TRACESTOP:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSB:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			decoder->have_tma = false;
 			intel_pt_log("ERROR: Unexpected packet\n");
 			return -EAGAIN;
@@ -1446,7 +1555,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_FUP:
 			decoder->pge = true;
-			intel_pt_set_last_ip(decoder);
+			if (decoder->packet.count)
+				intel_pt_set_last_ip(decoder);
 			break;
 
 		case INTEL_PT_MODE_TSX:
@@ -1497,6 +1607,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_MODE_TSX:
 		case INTEL_PT_BAD:
 		case INTEL_PT_PSBEND:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			intel_pt_log("ERROR: Missing TIP after FUP\n");
 			decoder->pkt_state = INTEL_PT_STATE_ERR3;
 			return -ENOENT;
@@ -1625,6 +1742,15 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 				break;
 			}
 			intel_pt_set_last_ip(decoder);
+			if (!decoder->branch_enable) {
+				decoder->ip = decoder->last_ip;
+				if (intel_pt_fup_event(decoder))
+					return 0;
+				no_tip = false;
+				break;
+			}
+			if (decoder->set_fup_mwait)
+				no_tip = true;
 			err = intel_pt_walk_fup(decoder);
 			if (err != -EAGAIN) {
 				if (err)
@@ -1650,6 +1776,8 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
 			intel_pt_clear_stack(&decoder->stack);
 			err = intel_pt_walk_psbend(decoder);
 			if (err == -EAGAIN)
@@ -1696,6 +1824,16 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_CBR:
 			intel_pt_calc_cbr(decoder);
+			if (!decoder->branch_enable &&
+			    decoder->cbr != decoder->cbr_seen) {
+				decoder->cbr_seen = decoder->cbr;
+				decoder->state.type = INTEL_PT_CBR_CHG;
+				decoder->state.from_ip = decoder->ip;
+				decoder->state.to_ip = 0;
+				decoder->state.cbr_payload =
+							decoder->packet.payload;
+				return 0;
+			}
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -1722,6 +1860,71 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 		case INTEL_PT_PAD:
 			break;
 
+		case INTEL_PT_PTWRITE_IP:
+			decoder->fup_ptw_payload = decoder->packet.payload;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_ptw = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_PTWRITE:
+			decoder->state.type = INTEL_PT_PTW;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.ptw_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_MWAIT:
+			decoder->fup_mwait_payload = decoder->packet.payload;
+			decoder->set_fup_mwait = true;
+			break;
+
+		case INTEL_PT_PWRE:
+			if (decoder->set_fup_mwait) {
+				decoder->fup_pwre_payload =
+							decoder->packet.payload;
+				decoder->set_fup_pwre = true;
+				break;
+			}
+			decoder->state.type = INTEL_PT_PWR_ENTRY;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
+		case INTEL_PT_EXSTOP_IP:
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_exstop = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+						decoder->pos);
+			}
+			goto next;
+
+		case INTEL_PT_EXSTOP:
+			decoder->state.type = INTEL_PT_EX_STOP;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
+		case INTEL_PT_PWRX:
+			decoder->state.type = INTEL_PT_PWR_EXIT;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			decoder->state.pwrx_payload = decoder->packet.payload;
+			return 0;
+
 		default:
 			return intel_pt_bug(decoder);
 		}
@@ -1730,8 +1933,9 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 
 static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
 {
-	return decoder->last_ip || decoder->packet.count == 0 ||
-	       decoder->packet.count == 3 || decoder->packet.count == 6;
+	return decoder->packet.count &&
+	       (decoder->have_last_ip || decoder->packet.count == 3 ||
+		decoder->packet.count == 6);
 }
 
 /* Walk PSB+ packets to get in sync. */
@@ -1750,6 +1954,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 			__fallthrough;
 		case INTEL_PT_TIP_PGE:
 		case INTEL_PT_TIP:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 			intel_pt_log("ERROR: Unexpected packet\n");
 			return -ENOENT;
 
@@ -1854,14 +2065,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_FUP:
-			if (decoder->overflow) {
-				if (intel_pt_have_ip(decoder))
-					intel_pt_set_ip(decoder);
-				if (decoder->ip)
-					return 0;
-			}
-			if (decoder->packet.count)
-				intel_pt_set_last_ip(decoder);
+			if (intel_pt_have_ip(decoder))
+				intel_pt_set_ip(decoder);
+			if (decoder->ip)
+				return 0;
 			break;
 
 		case INTEL_PT_MTC:
@@ -1910,6 +2117,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_PSB:
+			decoder->last_ip = 0;
+			decoder->have_last_ip = true;
+			intel_pt_clear_stack(&decoder->stack);
 			err = intel_pt_walk_psb(decoder);
 			if (err)
 				return err;
@@ -1925,6 +2135,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_VMCS:
 		case INTEL_PT_MNT:
 		case INTEL_PT_PAD:
+		case INTEL_PT_PTWRITE:
+		case INTEL_PT_PTWRITE_IP:
+		case INTEL_PT_EXSTOP:
+		case INTEL_PT_EXSTOP_IP:
+		case INTEL_PT_MWAIT:
+		case INTEL_PT_PWRE:
+		case INTEL_PT_PWRX:
 		default:
 			break;
 		}
@@ -1935,6 +2152,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
 {
 	int err;
 
+	decoder->set_fup_tx_flags = false;
+	decoder->set_fup_ptw = false;
+	decoder->set_fup_mwait = false;
+	decoder->set_fup_pwre = false;
+	decoder->set_fup_exstop = false;
+
+	if (!decoder->branch_enable) {
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->overflow = false;
+		decoder->state.type = 0; /* Do not have a sample */
+		return 0;
+	}
+
 	intel_pt_log("Scanning for full IP\n");
 	err = intel_pt_walk_to_ip(decoder);
 	if (err)
@@ -2043,6 +2273,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 
 	decoder->pge = false;
 	decoder->continuous_period = false;
+	decoder->have_last_ip = false;
 	decoder->last_ip = 0;
 	decoder->ip = 0;
 	intel_pt_clear_stack(&decoder->stack);
@@ -2051,6 +2282,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 	if (err)
 		return err;
 
+	decoder->have_last_ip = true;
 	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
 
 	err = intel_pt_walk_psb(decoder);
@@ -2069,7 +2301,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
 
 static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
 {
-	uint64_t est = decoder->timestamp_insn_cnt << 1;
+	uint64_t est = decoder->sample_insn_cnt << 1;
 
 	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
 		goto out;
@@ -2077,7 +2309,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
 	est *= decoder->max_non_turbo_ratio;
 	est /= decoder->cbr;
 out:
-	return decoder->timestamp + est;
+	return decoder->sample_timestamp + est;
 }
 
 const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
@@ -2093,8 +2325,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 			err = intel_pt_sync(decoder);
 			break;
 		case INTEL_PT_STATE_NO_IP:
+			decoder->have_last_ip = false;
 			decoder->last_ip = 0;
-			/* Fall through */
+			decoder->ip = 0;
+			__fallthrough;
 		case INTEL_PT_STATE_ERR_RESYNC:
 			err = intel_pt_sync_ip(decoder);
 			break;
@@ -2130,15 +2364,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
 		}
 	} while (err == -ENOLINK);
 
-	decoder->state.err = err ? intel_pt_ext_err(err) : 0;
-	decoder->state.timestamp = decoder->timestamp;
+	if (err) {
+		decoder->state.err = intel_pt_ext_err(err);
+		decoder->state.from_ip = decoder->ip;
+		decoder->sample_timestamp = decoder->timestamp;
+		decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+	} else {
+		decoder->state.err = 0;
+		if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+			decoder->cbr_seen = decoder->cbr;
+			decoder->state.type |= INTEL_PT_CBR_CHG;
+			decoder->state.cbr_payload = decoder->cbr_payload;
+		}
+		if (intel_pt_sample_time(decoder->pkt_state)) {
+			decoder->sample_timestamp = decoder->timestamp;
+			decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+		}
+	}
+
+	decoder->state.timestamp = decoder->sample_timestamp;
 	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
 	decoder->state.cr3 = decoder->cr3;
 	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
 
-	if (err)
-		decoder->state.from_ip = decoder->ip;
-
 	return &decoder->state;
 }
 

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e90619a..921b22e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h

@@ -25,11 +25,18 @@
 #define INTEL_PT_IN_TX		(1 << 0)
 #define INTEL_PT_ABORT_TX	(1 << 1)
 #define INTEL_PT_ASYNC		(1 << 2)
+#define INTEL_PT_FUP_IP		(1 << 3)
 
 enum intel_pt_sample_type {
 	INTEL_PT_BRANCH		= 1 << 0,
 	INTEL_PT_INSTRUCTION	= 1 << 1,
 	INTEL_PT_TRANSACTION	= 1 << 2,
+	INTEL_PT_PTW		= 1 << 3,
+	INTEL_PT_MWAIT_OP	= 1 << 4,
+	INTEL_PT_PWR_ENTRY	= 1 << 5,
+	INTEL_PT_EX_STOP	= 1 << 6,
+	INTEL_PT_PWR_EXIT	= 1 << 7,
+	INTEL_PT_CBR_CHG	= 1 << 8,
 };
 
 enum intel_pt_period_type {
@@ -63,6 +70,11 @@ struct intel_pt_state {
 	uint64_t timestamp;
 	uint64_t est_timestamp;
 	uint64_t trace_nr;
+	uint64_t ptw_payload;
+	uint64_t mwait_payload;
+	uint64_t pwre_payload;
+	uint64_t pwrx_payload;
+	uint64_t cbr_payload;
 	uint32_t flags;
 	enum intel_pt_insn_op insn_op;
 	int insn_len;
@@ -87,6 +99,7 @@ struct intel_pt_params {
 	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	bool return_compression;
+	bool branch_enable;
 	uint64_t period;
 	enum intel_pt_period_type period_type;
 	unsigned max_non_turbo_ratio;

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index debe751..45b64f9 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h

@@ -16,6 +16,7 @@
 #ifndef INCLUDE__INTEL_PT_LOG_H__
 #define INCLUDE__INTEL_PT_LOG_H__
 
+#include <linux/compiler.h>
 #include <stdint.h>
 #include <inttypes.h>
 
@@ -34,8 +35,7 @@ void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
 void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
 				 uint64_t ip);
 
-__attribute__((format(printf, 1, 2)))
-void __intel_pt_log(const char *fmt, ...);
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
 
 #define intel_pt_log(fmt, ...) \
 	do { \

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 7528ae4..ba4c9dd 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c

@@ -64,6 +64,13 @@ static const char * const packet_name[] = {
 	[INTEL_PT_PIP]		= "PIP",
 	[INTEL_PT_OVF]		= "OVF",
 	[INTEL_PT_MNT]		= "MNT",
+	[INTEL_PT_PTWRITE]	= "PTWRITE",
+	[INTEL_PT_PTWRITE_IP]	= "PTWRITE",
+	[INTEL_PT_EXSTOP]	= "EXSTOP",
+	[INTEL_PT_EXSTOP_IP]	= "EXSTOP",
+	[INTEL_PT_MWAIT]	= "MWAIT",
+	[INTEL_PT_PWRE]		= "PWRE",
+	[INTEL_PT_PWRX]		= "PWRX",
 };
 
 const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
 	if (len < 4)
 		return INTEL_PT_NEED_MORE_BYTES;
 	packet->type = INTEL_PT_CBR;
-	packet->payload = buf[2];
+	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
 	return 4;
 }
 
@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
 	}
 }
 
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+				struct intel_pt_pkt *packet)
+{
+	packet->count = (buf[1] >> 5) & 0x3;
+	packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+					 INTEL_PT_PTWRITE;
+
+	switch (packet->count) {
+	case 0:
+		if (len < 6)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+		return 6;
+	case 1:
+		if (len < 10)
+			return INTEL_PT_NEED_MORE_BYTES;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+		return 10;
+	default:
+		return INTEL_PT_BAD_PACKET;
+	}
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP;
+	return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+	packet->type = INTEL_PT_EXSTOP_IP;
+	return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+			      struct intel_pt_pkt *packet)
+{
+	if (len < 10)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_MWAIT;
+	packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+	return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 4)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRE;
+	memcpy_le64(&packet->payload, buf + 2, 2);
+	return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+			     struct intel_pt_pkt *packet)
+{
+	if (len < 7)
+		return INTEL_PT_NEED_MORE_BYTES;
+	packet->type = INTEL_PT_PWRX;
+	memcpy_le64(&packet->payload, buf + 2, 5);
+	return 7;
+}
+
 static int intel_pt_get_ext(const unsigned char *buf, size_t len,
 			    struct intel_pt_pkt *packet)
 {
 	if (len < 2)
 		return INTEL_PT_NEED_MORE_BYTES;
 
+	if ((buf[1] & 0x1f) == 0x12)
+		return intel_pt_get_ptwrite(buf, len, packet);
+
 	switch (buf[1]) {
 	case 0xa3: /* Long TNT */
 		return intel_pt_get_long_tnt(buf, len, packet);
@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
 		return intel_pt_get_tma(buf, len, packet);
 	case 0xC3: /* 3-byte header */
 		return intel_pt_get_3byte(buf, len, packet);
+	case 0x62: /* EXSTOP no IP */
+		return intel_pt_get_exstop(packet);
+	case 0xE2: /* EXSTOP with IP */
+		return intel_pt_get_exstop_ip(packet);
+	case 0xC2: /* MWAIT */
+		return intel_pt_get_mwait(buf, len, packet);
+	case 0x22: /* PWRE */
+		return intel_pt_get_pwre(buf, len, packet);
+	case 0xA2: /* PWRX */
+		return intel_pt_get_pwrx(buf, len, packet);
 	default:
 		return INTEL_PT_BAD_PACKET;
 	}
@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
 		ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
 			       name, payload, nr);
 		return ret;
+	case INTEL_PT_PTWRITE:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+	case INTEL_PT_PTWRITE_IP:
+		return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+	case INTEL_PT_EXSTOP:
+		return snprintf(buf, buf_len, "%s IP:0", name);
+	case INTEL_PT_EXSTOP_IP:
+		return snprintf(buf, buf_len, "%s IP:1", name);
+	case INTEL_PT_MWAIT:
+		return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+				name, payload, (unsigned int)(payload & 0xff),
+				(unsigned int)((payload >> 32) & 0x3));
+	case INTEL_PT_PWRE:
+		return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+				name, payload, !!(payload & 0x80),
+				(unsigned int)((payload >> 12) & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
+	case INTEL_PT_PWRX:
+		return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+				name, payload,
+				(unsigned int)((payload >> 4) & 0xf),
+				(unsigned int)(payload & 0xf),
+				(unsigned int)((payload >> 8) & 0xf));
 	default:
 		break;
 	}

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 781bb79..73ddc3a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h

@@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
 	INTEL_PT_PIP,
 	INTEL_PT_OVF,
 	INTEL_PT_MNT,
+	INTEL_PT_PTWRITE,
+	INTEL_PT_PTWRITE_IP,
+	INTEL_PT_EXSTOP,
+	INTEL_PT_EXSTOP_IP,
+	INTEL_PT_MWAIT,
+	INTEL_PT_PWRE,
+	INTEL_PT_PWRX,
 };
 
 struct intel_pt_pkt {

diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 767be7c..12e3771 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

@@ -1009,7 +1009,7 @@
 1: fxstor | RDGSBASE Ry (F3),(11B)
 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
 7: clflush | clflushopt (66) | sfence (11B)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 4c7718f..b58f9fd 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c

@@ -81,7 +81,6 @@ struct intel_pt {
 
 	bool sample_instructions;
 	u64 instructions_sample_type;
-	u64 instructions_sample_period;
 	u64 instructions_id;
 
 	bool sample_branches;
@@ -93,6 +92,18 @@ struct intel_pt {
 	u64 transactions_sample_type;
 	u64 transactions_id;
 
+	bool sample_ptwrites;
+	u64 ptwrites_sample_type;
+	u64 ptwrites_id;
+
+	bool sample_pwr_events;
+	u64 pwr_events_sample_type;
+	u64 mwait_id;
+	u64 pwre_id;
+	u64 exstop_id;
+	u64 pwrx_id;
+	u64 cbr_id;
+
 	bool synth_needs_swap;
 
 	u64 tsc_bit;
@@ -103,6 +114,7 @@ struct intel_pt {
 	u64 cyc_bit;
 	u64 noretcomp_bit;
 	unsigned max_non_turbo_ratio;
+	unsigned cbr2khz;
 
 	unsigned long num_events;
 
@@ -668,6 +680,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
 	return true;
 }
 
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+	struct perf_evsel *evsel;
+	u64 config;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+		    (config & 1) && !(config & 0x2000))
+			return false;
+	}
+	return true;
+}
+
 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
 {
 	struct perf_evsel *evsel;
@@ -799,6 +824,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	params.walk_insn = intel_pt_walk_next_insn;
 	params.data = ptq;
 	params.return_compression = intel_pt_return_compression(pt);
+	params.branch_enable = intel_pt_branch_enable(pt);
 	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
 	params.mtc_period = intel_pt_mtc_period(pt);
 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1044,6 +1070,36 @@ static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
 		bs->nr += 1;
 }
 
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+	return pt->synth_opts.initial_skip &&
+	       pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.size = sizeof(struct perf_event_header);
+
+	if (!pt->timeless_decoding)
+		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+	sample->cpumode = PERF_RECORD_MISC_USER;
+	sample->ip = ptq->state->from_ip;
+	sample->pid = ptq->pid;
+	sample->tid = ptq->tid;
+	sample->addr = ptq->state->to_ip;
+	sample->period = 1;
+	sample->cpu = ptq->cpu;
+	sample->flags = ptq->flags;
+	sample->insn_len = ptq->insn_len;
+	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+}
+
 static int intel_pt_inject_event(union perf_event *event,
 				 struct perf_sample *sample, u64 type,
 				 bool swapped)
@@ -1052,9 +1108,35 @@ static int intel_pt_inject_event(union perf_event *event,
 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
 }
 
-static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+				      union perf_event *event,
+				      struct perf_sample *sample, u64 type)
+{
+	if (!pt->synth_opts.inject)
+		return 0;
+
+	return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap);
+}
+
+static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
+					  union perf_event *event,
+					  struct perf_sample *sample, u64 type)
 {
 	int ret;
+
+	ret = intel_pt_opt_inject(pt, event, sample, type);
+	if (ret)
+		return ret;
+
+	ret = perf_session__deliver_synth_event(pt->session, event, sample);
+	if (ret)
+		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
@@ -1066,29 +1148,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
 		return 0;
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
+	intel_pt_prep_b_sample(pt, ptq, event, &sample);
 
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
 	sample.id = ptq->pt->branches_id;
 	sample.stream_id = ptq->pt->branches_id;
-	sample.period = 1;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
 	/*
 	 * perf report cannot handle events without a branch stack when using
@@ -1105,144 +1171,251 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
 	}
 
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->branches_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
+	return intel_pt_deliver_synth_b_event(pt, event, &sample,
+					      pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+				 struct intel_pt_queue *ptq,
+				 union perf_event *event,
+				 struct perf_sample *sample)
+{
+	intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+	if (pt->synth_opts.callchain) {
+		thread_stack__sample(ptq->thread, ptq->chain,
+				     pt->synth_opts.callchain_sz, sample->ip);
+		sample->callchain = ptq->chain;
 	}
 
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
-		       ret);
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample->branch_stack = ptq->last_branch;
+	}
+}
+
+static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
+					       struct intel_pt_queue *ptq,
+					       union perf_event *event,
+					       struct perf_sample *sample,
+					       u64 type)
+{
+	int ret;
+
+	ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
+
+	if (pt->synth_opts.last_branch)
+		intel_pt_reset_last_branch_rb(ptq);
 
 	return ret;
 }
 
 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 {
-	int ret;
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
+	intel_pt_prep_sample(pt, ptq, event, &sample);
 
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
 	sample.id = ptq->pt->instructions_id;
 	sample.stream_id = ptq->pt->instructions_id;
 	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 
-	if (pt->synth_opts.callchain) {
-		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample.ip);
-		sample.callchain = ptq->chain;
-	}
-
-	if (pt->synth_opts.last_branch) {
-		intel_pt_copy_last_branch_rb(ptq);
-		sample.branch_stack = ptq->last_branch;
-	}
-
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->instructions_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
-	}
-
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
-		       ret);
-
-	if (pt->synth_opts.last_branch)
-		intel_pt_reset_last_branch_rb(ptq);
-
-	return ret;
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->instructions_sample_type);
 }
 
 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 {
-	int ret;
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
 
-	if (pt->synth_opts.initial_skip &&
-	    pt->num_events++ < pt->synth_opts.initial_skip)
+	if (intel_pt_skip_event(pt))
 		return 0;
 
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
+	intel_pt_prep_sample(pt, ptq, event, &sample);
 
-	if (!pt->timeless_decoding)
-		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
-	sample.ip = ptq->state->from_ip;
-	sample.pid = ptq->pid;
-	sample.tid = ptq->tid;
-	sample.addr = ptq->state->to_ip;
 	sample.id = ptq->pt->transactions_id;
 	sample.stream_id = ptq->pt->transactions_id;
-	sample.period = 1;
-	sample.cpu = ptq->cpu;
-	sample.flags = ptq->flags;
-	sample.insn_len = ptq->insn_len;
-	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
 
-	if (pt->synth_opts.callchain) {
-		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample.ip);
-		sample.callchain = ptq->chain;
-	}
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->transactions_sample_type);
+}
 
-	if (pt->synth_opts.last_branch) {
-		intel_pt_copy_last_branch_rb(ptq);
-		sample.branch_stack = ptq->last_branch;
-	}
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+				   struct intel_pt_queue *ptq,
+				   union perf_event *event,
+				   struct perf_sample *sample)
+{
+	intel_pt_prep_sample(pt, ptq, event, sample);
 
-	if (pt->synth_opts.inject) {
-		ret = intel_pt_inject_event(event, &sample,
-					    pt->transactions_sample_type,
-					    pt->synth_needs_swap);
-		if (ret)
-			return ret;
-	}
+	/*
+	 * Zero IP is used to mean "trace start" but that is not the case for
+	 * power or PTWRITE events with no IP, so clear the flags.
+	 */
+	if (!sample->ip)
+		sample->flags = 0;
+}
 
-	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
-	if (ret)
-		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
-		       ret);
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_ptwrite raw;
 
-	if (pt->synth_opts.last_branch)
-		intel_pt_reset_last_branch_rb(ptq);
+	if (intel_pt_skip_event(pt))
+		return 0;
 
-	return ret;
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->ptwrites_id;
+	sample.stream_id = ptq->pt->ptwrites_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->ptwrites_sample_type);
+}
+
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_cbr raw;
+	u32 flags;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->cbr_id;
+	sample.stream_id = ptq->pt->cbr_id;
+
+	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+	raw.flags = cpu_to_le32(flags);
+	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+	raw.reserved3 = 0;
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_mwait raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->mwait_id;
+	sample.stream_id = ptq->pt->mwait_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwre raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwre_id;
+	sample.stream_id = ptq->pt->pwre_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_exstop raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->exstop_id;
+	sample.stream_id = ptq->pt->exstop_id;
+
+	raw.flags = 0;
+	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+	struct intel_pt *pt = ptq->pt;
+	union perf_event *event = ptq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+	struct perf_synth_intel_pwrx raw;
+
+	if (intel_pt_skip_event(pt))
+		return 0;
+
+	intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+	sample.id = ptq->pt->pwrx_id;
+	sample.stream_id = ptq->pt->pwrx_id;
+
+	raw.reserved = 0;
+	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+	sample.raw_size = perf_synth__raw_size(raw);
+	sample.raw_data = perf_synth__raw_data(&raw);
+
+	return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+					    pt->pwr_events_sample_type);
 }
 
 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1296,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
 			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
 }
 
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
+			  INTEL_PT_CBR_CHG)
+
 static int intel_pt_sample(struct intel_pt_queue *ptq)
 {
 	const struct intel_pt_state *state = ptq->state;
@@ -1307,24 +1484,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 
 	ptq->have_sample = false;
 
-	if (pt->sample_instructions &&
-	    (state->type & INTEL_PT_INSTRUCTION) &&
-	    (!pt->synth_opts.initial_skip ||
-	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
+	if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
+		if (state->type & INTEL_PT_CBR_CHG) {
+			err = intel_pt_synth_cbr_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_MWAIT_OP) {
+			err = intel_pt_synth_mwait_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_ENTRY) {
+			err = intel_pt_synth_pwre_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_EX_STOP) {
+			err = intel_pt_synth_exstop_sample(ptq);
+			if (err)
+				return err;
+		}
+		if (state->type & INTEL_PT_PWR_EXIT) {
+			err = intel_pt_synth_pwrx_sample(ptq);
+			if (err)
+				return err;
+		}
+	}
+
+	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
 		err = intel_pt_synth_instruction_sample(ptq);
 		if (err)
 			return err;
 	}
 
-	if (pt->sample_transactions &&
-	    (state->type & INTEL_PT_TRANSACTION) &&
-	    (!pt->synth_opts.initial_skip ||
-	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
+	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
 		err = intel_pt_synth_transaction_sample(ptq);
 		if (err)
 			return err;
 	}
 
+	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+		err = intel_pt_synth_ptwrite_sample(ptq);
+		if (err)
+			return err;
+	}
+
 	if (!(state->type & INTEL_PT_BRANCH))
 		return 0;
 
@@ -1925,36 +2130,65 @@ static int intel_pt_event_synth(struct perf_tool *tool,
 						 NULL);
 }
 
-static int intel_pt_synth_event(struct perf_session *session,
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
 				struct perf_event_attr *attr, u64 id)
 {
 	struct intel_pt_synth intel_pt_synth;
+	int err;
+
+	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+		 name, id, (u64)attr->sample_type);
 
 	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
 	intel_pt_synth.session = session;
 
-	return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
-					   &id, intel_pt_event_synth);
+	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+					  &id, intel_pt_event_synth);
+	if (err)
+		pr_err("%s: failed to synthesize '%s' event type\n",
+		       __func__, name);
+
+	return err;
+}
+
+static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
+				    const char *name)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->id && evsel->id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
+			break;
+		}
+	}
+}
+
+static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
+					 struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == pt->pmu_type && evsel->ids)
+			return evsel;
+	}
+
+	return NULL;
 }
 
 static int intel_pt_synth_events(struct intel_pt *pt,
 				 struct perf_session *session)
 {
 	struct perf_evlist *evlist = session->evlist;
-	struct perf_evsel *evsel;
+	struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
 	struct perf_event_attr attr;
-	bool found = false;
 	u64 id;
 	int err;
 
-	evlist__for_each_entry(evlist, evsel) {
-		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
-			found = true;
-			break;
-		}
-	}
-
-	if (!found) {
+	if (!evsel) {
 		pr_debug("There are no selected events with Intel Processor Trace data\n");
 		return 0;
 	}
@@ -1983,6 +2217,25 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 	if (!id)
 		id = 1;
 
+	if (pt->synth_opts.branches) {
+		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+		attr.sample_period = 1;
+		attr.sample_type |= PERF_SAMPLE_ADDR;
+		err = intel_pt_synth_event(session, "branches", &attr, id);
+		if (err)
+			return err;
+		pt->sample_branches = true;
+		pt->branches_sample_type = attr.sample_type;
+		pt->branches_id = id;
+		id += 1;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+	}
+
+	if (pt->synth_opts.callchain)
+		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+	if (pt->synth_opts.last_branch)
+		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
 	if (pt->synth_opts.instructions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
 		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
@@ -1990,70 +2243,90 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
 		else
 			attr.sample_period = pt->synth_opts.period;
-		pt->instructions_sample_period = attr.sample_period;
-		if (pt->synth_opts.callchain)
-			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
-		if (pt->synth_opts.last_branch)
-			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'instructions' event type\n",
-			       __func__);
+		err = intel_pt_synth_event(session, "instructions", &attr, id);
+		if (err)
 			return err;
-		}
 		pt->sample_instructions = true;
 		pt->instructions_sample_type = attr.sample_type;
 		pt->instructions_id = id;
 		id += 1;
 	}
 
+	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+	attr.sample_period = 1;
+
 	if (pt->synth_opts.transactions) {
 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
-		attr.sample_period = 1;
-		if (pt->synth_opts.callchain)
-			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
-		if (pt->synth_opts.last_branch)
-			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'transactions' event type\n",
-			       __func__);
+		err = intel_pt_synth_event(session, "transactions", &attr, id);
+		if (err)
 			return err;
-		}
 		pt->sample_transactions = true;
+		pt->transactions_sample_type = attr.sample_type;
 		pt->transactions_id = id;
+		intel_pt_set_event_name(evlist, id, "transactions");
 		id += 1;
-		evlist__for_each_entry(evlist, evsel) {
-			if (evsel->id && evsel->id[0] == pt->transactions_id) {
-				if (evsel->name)
-					zfree(&evsel->name);
-				evsel->name = strdup("transactions");
-				break;
-			}
-		}
 	}
 
-	if (pt->synth_opts.branches) {
-		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
-		attr.sample_period = 1;
-		attr.sample_type |= PERF_SAMPLE_ADDR;
-		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
-		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
-		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
-			 id, (u64)attr.sample_type);
-		err = intel_pt_synth_event(session, &attr, id);
-		if (err) {
-			pr_err("%s: failed to synthesize 'branches' event type\n",
-			       __func__);
+	attr.type = PERF_TYPE_SYNTH;
+	attr.sample_type |= PERF_SAMPLE_RAW;
+
+	if (pt->synth_opts.ptwrites) {
+		attr.config = PERF_SYNTH_INTEL_PTWRITE;
+		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+		if (err)
 			return err;
-		}
-		pt->sample_branches = true;
-		pt->branches_sample_type = attr.sample_type;
-		pt->branches_id = id;
+		pt->sample_ptwrites = true;
+		pt->ptwrites_sample_type = attr.sample_type;
+		pt->ptwrites_id = id;
+		intel_pt_set_event_name(evlist, id, "ptwrite");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events) {
+		pt->sample_pwr_events = true;
+		pt->pwr_events_sample_type = attr.sample_type;
+
+		attr.config = PERF_SYNTH_INTEL_CBR;
+		err = intel_pt_synth_event(session, "cbr", &attr, id);
+		if (err)
+			return err;
+		pt->cbr_id = id;
+		intel_pt_set_event_name(evlist, id, "cbr");
+		id += 1;
+	}
+
+	if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
+		attr.config = PERF_SYNTH_INTEL_MWAIT;
+		err = intel_pt_synth_event(session, "mwait", &attr, id);
+		if (err)
+			return err;
+		pt->mwait_id = id;
+		intel_pt_set_event_name(evlist, id, "mwait");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRE;
+		err = intel_pt_synth_event(session, "pwre", &attr, id);
+		if (err)
+			return err;
+		pt->pwre_id = id;
+		intel_pt_set_event_name(evlist, id, "pwre");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_EXSTOP;
+		err = intel_pt_synth_event(session, "exstop", &attr, id);
+		if (err)
+			return err;
+		pt->exstop_id = id;
+		intel_pt_set_event_name(evlist, id, "exstop");
+		id += 1;
+
+		attr.config = PERF_SYNTH_INTEL_PWRX;
+		err = intel_pt_synth_event(session, "pwrx", &attr, id);
+		if (err)
+			return err;
+		pt->pwrx_id = id;
+		intel_pt_set_event_name(evlist, id, "pwrx");
+		id += 1;
 	}
 
 	pt->synth_needs_swap = evsel->needs_swap;
@@ -2322,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
 		intel_pt_log("Maximum non-turbo ratio %u\n",
 			     pt->max_non_turbo_ratio);
+		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
 	}
 
 	if (pt->synth_opts.calls)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d97e014..5de2b86 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c

@@ -572,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 		if (dso == NULL)
 			goto out_unlock;
 
-		if (machine__is_host(machine))
-			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
-		else
-			dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
-
-		/* _KMODULE_COMP should be next to _KMODULE */
-		if (m->kmod && m->comp)
-			dso->symtab_type++;
-
-		dso__set_short_name(dso, strdup(m->name), true);
+		dso__set_module_info(dso, m, machine);
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
@@ -1218,10 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine)
 	 */
 	map_groups__fixup_end(&machine->kmaps);
 
-	if (machine__get_running_kernel_start(machine, &name, &addr)) {
-	} else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
-		machine__destroy_kernel_maps(machine);
-		return -1;
+	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+		if (name &&
+		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+			machine__destroy_kernel_maps(machine);
+			return -1;
+		}
 	}
 
 	return 0;

diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index ea7f450..389e972 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h

@@ -2,6 +2,7 @@
 #define __PMU_H
 
 #include <linux/bitmap.h>
+#include <linux/compiler.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
 #include "evsel.h"
@@ -83,8 +84,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
 		      bool long_desc, bool details_flag);
 bool pmu_have_event(const char *pname, const char *name);
 
-int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
-			...) __attribute__((format(scanf, 3, 4)));
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
 
 int perf_pmu__test(void);
 

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 84e7e69..a2670e9 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c

@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
 					   struct map *map, unsigned long offs)
 {
 	struct symbol *sym;
-	u64 addr = tp->address + tp->offset - offs;
+	u64 addr = tp->address - offs;
 
 	sym = map__find_symbol(map, addr);
 	if (!sym)

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 3738426..5812947 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h

@@ -1,6 +1,7 @@
 #ifndef _PROBE_EVENT_H
 #define _PROBE_EVENT_H
 
+#include <linux/compiler.h>
 #include <stdbool.h>
 #include "intlist.h"
 
@@ -171,8 +172,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct symbol *sym);
 
 /* If there is no space to write, returns -E2BIG. */
-int e_snprintf(char *str, size_t size, const char *format, ...)
-	__attribute__((format(printf, 3, 4)));
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX	1024

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 9d92af7..57b7a00 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c

@@ -28,6 +28,7 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/compiler.h>
 #include <linux/time64.h>
 
 #include "../../perf.h"
@@ -84,7 +85,7 @@ struct tables {
 
 static struct tables tables_global;
 
-static void handler_call_die(const char *handler_name) NORETURN;
+static void handler_call_die(const char *handler_name) __noreturn;
 static void handler_call_die(const char *handler_name)
 {
 	PyErr_Print();
@@ -1219,7 +1220,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# be retrieved using Python functions of the form "
 		"common_*(context).\n");
 
-	fprintf(ofp, "# See the perf-trace-python Documentation for the list "
+	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
 	fprintf(ofp, "import os\n");

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7dc1096..d19c40a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c

@@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 
 		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
 			pr_err("File does not contain CPU events. "
-			       "Remove -c option to proceed.\n");
+			       "Remove -C option to proceed.\n");
 			return -1;
 		}
 	}

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5762ae4..8b327c9 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -2532,12 +2532,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
 			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
 				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
-					error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+					pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
 				else
-					error("Invalid --sort key: `%s'", tok);
+					pr_err("Invalid --sort key: `%s'", tok);
 				break;
 			} else if (ret == -ESRCH) {
-				error("Unknown --sort key: `%s'", tok);
+				pr_err("Unknown --sort key: `%s'", tok);
 				break;
 			}
 		}
@@ -2594,7 +2594,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
 		return 0;
 
 	if (sort_order[1] == '\0') {
-		error("Invalid --sort key: `+'");
+		pr_err("Invalid --sort key: `+'");
 		return -EINVAL;
 	}
 
@@ -2604,7 +2604,7 @@ static int setup_sort_order(struct perf_evlist *evlist)
 	 */
 	if (asprintf(&new_sort_order, "%s,%s",
 		     get_default_sort_order(evlist), sort_order + 1) < 0) {
-		error("Not enough memory to set up --sort");
+		pr_err("Not enough memory to set up --sort");
 		return -ENOMEM;
 	}
 
@@ -2668,7 +2668,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 
 	str = strdup(sort_keys);
 	if (str == NULL) {
-		error("Not enough memory to setup sort keys");
+		pr_err("Not enough memory to setup sort keys");
 		return -ENOMEM;
 	}
 
@@ -2678,7 +2678,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 	if (!is_strict_order(field_order)) {
 		str = setup_overhead(str);
 		if (str == NULL) {
-			error("Not enough memory to setup overhead keys");
+			pr_err("Not enough memory to setup overhead keys");
 			return -ENOMEM;
 		}
 	}
@@ -2834,10 +2834,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
 			tok; tok = strtok_r(NULL, ", ", &tmp)) {
 		ret = output_field_add(list, tok);
 		if (ret == -EINVAL) {
-			error("Invalid --fields key: `%s'", tok);
+			pr_err("Invalid --fields key: `%s'", tok);
 			break;
 		} else if (ret == -ESRCH) {
-			error("Unknown --fields key: `%s'", tok);
+			pr_err("Unknown --fields key: `%s'", tok);
 			break;
 		}
 	}
@@ -2877,7 +2877,7 @@ static int __setup_output_field(void)
 
 	strp = str = strdup(field_order);
 	if (str == NULL) {
-		error("Not enough memory to setup output fields");
+		pr_err("Not enough memory to setup output fields");
 		return -ENOMEM;
 	}
 
@@ -2885,7 +2885,7 @@ static int __setup_output_field(void)
 		strp++;
 
 	if (!strlen(strp)) {
-		error("Invalid --fields key: `+'");
+		pr_err("Invalid --fields key: `+'");
 		goto out;
 	}
 

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index df051a5..ebc88a7 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c

@@ -56,7 +56,10 @@ static int inline_list__append(char *filename, char *funcname, int line_nr,
 		}
 	}
 
-	list_add_tail(&ilist->list, &node->val);
+	if (callchain_param.order == ORDER_CALLEE)
+		list_add_tail(&ilist->list, &node->val);
+	else
+		list_add(&ilist->list, &node->val);
 
 	return 0;
 }
@@ -200,12 +203,14 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 
 #define MAX_INLINE_NEST 1024
 
-static void inline_list__reverse(struct inline_node *node)
+static int inline_list__append_dso_a2l(struct dso *dso,
+				       struct inline_node *node)
 {
-	struct inline_list *ilist, *n;
+	struct a2l_data *a2l = dso->a2l;
+	char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL;
+	char *filename = a2l->filename ? strdup(a2l->filename) : NULL;
 
-	list_for_each_entry_safe_reverse(ilist, n, &node->val, list)
-		list_move_tail(&ilist->list, &node->val);
+	return inline_list__append(filename, funcname, a2l->line, node, dso);
 }
 
 static int addr2line(const char *dso_name, u64 addr,
@@ -230,36 +235,36 @@ static int addr2line(const char *dso_name, u64 addr,
 
 	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
 
-	if (a2l->found && unwind_inlines) {
+	if (!a2l->found)
+		return 0;
+
+	if (unwind_inlines) {
 		int cnt = 0;
 
+		if (node && inline_list__append_dso_a2l(dso, node))
+			return 0;
+
 		while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
 					     &a2l->funcname, &a2l->line) &&
 		       cnt++ < MAX_INLINE_NEST) {
 
 			if (node != NULL) {
-				if (inline_list__append(strdup(a2l->filename),
-							strdup(a2l->funcname),
-							a2l->line, node,
-							dso) != 0)
+				if (inline_list__append_dso_a2l(dso, node))
 					return 0;
+				// found at least one inline frame
+				ret = 1;
 			}
 		}
-
-		if ((node != NULL) &&
-		    (callchain_param.order != ORDER_CALLEE)) {
-			inline_list__reverse(node);
-		}
 	}
 
-	if (a2l->found && a2l->filename) {
-		*file = strdup(a2l->filename);
+	if (file) {
+		*file = a2l->filename ? strdup(a2l->filename) : NULL;
+		ret = *file ? 1 : 0;
+	}
+
+	if (line)
 		*line = a2l->line;
 
-		if (*file)
-			ret = 1;
-	}
-
 	return ret;
 }
 
@@ -278,8 +283,6 @@ void dso__free_a2l(struct dso *dso)
 static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	struct dso *dso)
 {
-	char *file = NULL;
-	unsigned int line = 0;
 	struct inline_node *node;
 
 	node = zalloc(sizeof(*node));
@@ -291,7 +294,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
 	INIT_LIST_HEAD(&node->val);
 	node->addr = addr;
 
-	if (!addr2line(dso_name, addr, &file, &line, dso, TRUE, node))
+	if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node))
 		goto out_free_inline_node;
 
 	if (list_empty(&node->val))

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc6..719d6cb 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c

@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
 static struct rblist runtime_saved_values;
 static bool have_frontend_stalled;
 
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
 
 	next = rb_first(&runtime_saved_values.entries);
 	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
 	return sanitize_val(1.0 - sum);
 }
 
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out);
 	} else {
 		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c581744..53b9a99 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c

@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
 };
 #undef ID
 

diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae2..7522bf1 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h

@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 

diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 318424e..802d743 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h

@@ -42,6 +42,7 @@
 #include <stdarg.h>
 #include <stddef.h>
 #include <string.h>
+#include <linux/compiler.h>
 #include <sys/types.h>
 
 extern char strbuf_slopbuf[];
@@ -85,8 +86,7 @@ static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
 	return strbuf_add(sb, s, strlen(s));
 }
 
-__attribute__((format(printf,2,3)))
-int strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
 
 /* XXX: if read fails, any partial read is undone */
 ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e7ee47f..502505c 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c

@@ -637,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata)
 	return 0;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name,
-			      enum dso_binary_type type)
-{
-	int fd = -1;
-	char tmpbuf[] = "/tmp/perf-kmod-XXXXXX";
-	struct kmod_path m;
-
-	if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		return -1;
-
-	if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		name = dso->long_name;
-
-	if (kmod_path__parse_ext(&m, name) || !m.comp)
-		return -1;
-
-	fd = mkstemp(tmpbuf);
-	if (fd < 0) {
-		dso->load_errno = errno;
-		goto out;
-	}
-
-	if (!decompress_to_file(m.ext, name, fd)) {
-		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
-		close(fd);
-		fd = -1;
-	}
-
-	unlink(tmpbuf);
-
-out:
-	free(m.ext);
-	return fd;
-}
-
 bool symsrc__possibly_runtime(struct symsrc *ss)
 {
 	return ss->dynsym || ss->opdsec;
@@ -705,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	int fd;
 
 	if (dso__needs_decompress(dso)) {
-		fd = decompress_kmodule(dso, name, type);
+		fd = dso__decompress_kmodule_fd(dso, name);
 		if (fd < 0)
 			return -1;
+
+		type = dso->symtab_type;
 	} else {
 		fd = open(name, O_RDONLY);
 		if (fd < 0) {

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 8f2b068..e7a98db 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -1562,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map)
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
-	if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		if (dso__build_id_is_kmod(dso, name, PATH_MAX))
-			kmod = true;
-
 	if (syms_ss)
 		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
 	else

diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 746bbee..e0a6e9a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c

@@ -24,7 +24,7 @@
 #include <errno.h>
 
 #include "../perf.h"
-#include "util.h"
+#include "debug.h"
 #include "trace-event.h"
 
 #include "sane_ctype.h"
@@ -150,7 +150,7 @@ void parse_ftrace_printk(struct pevent *pevent,
 	while (line) {
 		addr_str = strtok_r(line, ":", &fmt);
 		if (!addr_str) {
-			warning("printk format with empty entry");
+			pr_warning("printk format with empty entry");
 			break;
 		}
 		addr = strtoull(addr_str, NULL, 16);

diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index f90e11a..7755a5e0 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c

@@ -39,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip,
 		return 0;
 
 	mod = dwfl_addrmodule(ui->dwfl, ip);
+	if (mod) {
+		Dwarf_Addr s;
+
+		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		if (s != al->map->start)
+			mod = 0;
+	}
+
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
 				      dso->long_name, -1, al->map->start,
@@ -168,12 +176,24 @@ frame_callback(Dwfl_Frame *state, void *arg)
 {
 	struct unwind_info *ui = arg;
 	Dwarf_Addr pc;
+	bool isactivation;
 
 	if (!dwfl_frame_pc(state, &pc, NULL)) {
 		pr_err("%s", dwfl_errmsg(-1));
 		return DWARF_CB_ABORT;
 	}
 
+	// report the module before we query for isactivation
+	report_module(pc, ui);
+
+	if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+		pr_err("%s", dwfl_errmsg(-1));
+		return DWARF_CB_ABORT;
+	}
+
+	if (!isactivation)
+		--pc;
+
 	return entry(pc, ui) || !(--ui->max_stack) ?
 	       DWARF_CB_ABORT : DWARF_CB_OK;
 }
@@ -220,7 +240,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 
 	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
 
-	if (err && !ui->max_stack)
+	if (err && ui->max_stack != max_stack)
 		err = 0;
 
 	/*

diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f8455be..672c2ada 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c

@@ -692,6 +692,17 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
 
 		while (!ret && (unw_step(&c) > 0) && i < max_stack) {
 			unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+			/*
+			 * Decrement the IP for any non-activation frames.
+			 * this is required to properly find the srcline
+			 * for caller frames.
+			 * See also the documentation for dwfl_frame_pc(),
+			 * which this code tries to replicate.
+			 */
+			if (unw_is_signal_frame(&c) <= 0)
+				--ips[i];
+
 			++i;
 		}
 

diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 996046a..6cc9d98 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c

@@ -9,75 +9,17 @@
 #include "util.h"
 #include "debug.h"
 
-static void report(const char *prefix, const char *err, va_list params)
-{
-	char msg[1024];
-	vsnprintf(msg, sizeof(msg), err, params);
-	fprintf(stderr, " %s%s\n", prefix, msg);
-}
-
-static NORETURN void usage_builtin(const char *err)
+static __noreturn void usage_builtin(const char *err)
 {
 	fprintf(stderr, "\n Usage: %s\n", err);
 	exit(129);
 }
 
-static NORETURN void die_builtin(const char *err, va_list params)
-{
-	report(" Fatal: ", err, params);
-	exit(128);
-}
-
-static void error_builtin(const char *err, va_list params)
-{
-	report(" Error: ", err, params);
-}
-
-static void warn_builtin(const char *warn, va_list params)
-{
-	report(" Warning: ", warn, params);
-}
-
 /* If we are in a dlopen()ed .so write to a global variable would segfault
  * (ugh), so keep things static. */
-static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*error_routine)(const char *err, va_list params) = error_builtin;
-static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-
-void set_warning_routine(void (*routine)(const char *err, va_list params))
-{
-	warn_routine = routine;
-}
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
 
 void usage(const char *err)
 {
 	usage_routine(err);
 }
-
-void die(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	die_builtin(err, params);
-	va_end(params);
-}
-
-int error(const char *err, ...)
-{
-	va_list params;
-
-	va_start(params, err);
-	error_routine(err, params);
-	va_end(params);
-	return -1;
-}
-
-void warning(const char *warn, ...)
-{
-	va_list params;
-
-	va_start(params, warn);
-	warn_routine(warn, params);
-	va_end(params);
-}

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 28c9f33..988111e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c

@@ -343,43 +343,6 @@ int perf_event_paranoid(void)
 
 	return value;
 }
-
-bool find_process(const char *name)
-{
-	size_t len = strlen(name);
-	DIR *dir;
-	struct dirent *d;
-	int ret = -1;
-
-	dir = opendir(procfs__mountpoint());
-	if (!dir)
-		return false;
-
-	/* Walk through the directory. */
-	while (ret && (d = readdir(dir)) != NULL) {
-		char path[PATH_MAX];
-		char *data;
-		size_t size;
-
-		if ((d->d_type != DT_DIR) ||
-		     !strcmp(".", d->d_name) ||
-		     !strcmp("..", d->d_name))
-			continue;
-
-		scnprintf(path, sizeof(path), "%s/%s/comm",
-			  procfs__mountpoint(), d->d_name);
-
-		if (filename__read_str(path, &data, &size))
-			continue;
-
-		ret = strncmp(name, data, len);
-		free(data);
-	}
-
-	closedir(dir);
-	return ret ? false : true;
-}
-
 static int
 fetch_ubuntu_kernel_version(unsigned int *puint)
 {
@@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
 	size_t line_len = 0;
 	char *ptr, *line = NULL;
 	int version, patchlevel, sublevel, err;
-	FILE *vsig = fopen("/proc/version_signature", "r");
+	FILE *vsig;
 
+	if (!puint)
+		return 0;
+
+	vsig = fopen("/proc/version_signature", "r");
 	if (!vsig) {
 		pr_debug("Open /proc/version_signature failed: %s\n",
 			 strerror(errno));
@@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
 		goto errout;
 	}
 
-	if (puint)
-		*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
 	err = 0;
 errout:
 	free(line);
@@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
 		str[str_size - 1] = '\0';
 	}
 
+	if (!puint || int_ver_ready)
+		return 0;
+
 	err = sscanf(utsname.release, "%d.%d.%d",
 		     &version, &patchlevel, &sublevel);
 
@@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
 		return -1;
 	}
 
-	if (puint && !int_ver_ready)
-		*puint = (version << 16) + (patchlevel << 8) + sublevel;
+	*puint = (version << 16) + (patchlevel << 8) + sublevel;
 	return 0;
 }
 

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 5dfb9bb..2c9e58a 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h

@@ -1,7 +1,6 @@
 #ifndef GIT_COMPAT_UTIL_H
 #define GIT_COMPAT_UTIL_H
 
-#define _ALL_SOURCE 1
 #define _BSD_SOURCE 1
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
@@ -11,24 +10,12 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <linux/compiler.h>
 #include <linux/types.h>
 
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN
-#ifndef __attribute__
-#define __attribute__(x)
-#endif
-#endif
-
 /* General helper functions */
-void usage(const char *err) NORETURN;
-void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-
-void set_warning_routine(void (*routine)(const char *err, va_list params));
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
 
 static inline void *zalloc(size_t size)
 {
@@ -57,8 +44,6 @@ int hex2u64(const char *ptr, u64 *val);
 extern unsigned int page_size;
 extern int cacheline_size;
 
-bool find_process(const char *name);
-
 int fetch_kernel_version(unsigned int *puint,
 			 char *str, size_t str_sz);
 #define KVER_VERSION(x)		(((x) >> 16) & 0xff)

diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
new file mode 100644
index 0000000..cba3d99
--- /dev/null
+++ b/tools/power/acpi/.gitignore

@@ -0,0 +1,4 @@
+acpidbg
+acpidump
+ec
+include

diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 64cae1a..e1f75a1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c

@@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
 
-union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
 		u64 rev, u64 func, union acpi_object *argv4)
 {
 	union acpi_object *obj = ERR_PTR(-ENXIO);
@@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
 	rcu_read_lock();
 	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
 	if (ops)
-		obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4);
+		obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
 	rcu_read_unlock();
 
 	if (IS_ERR(obj))
-		return acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+		return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
 	return obj;
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index c218717..28859da 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c

@@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle;
 union acpi_object *result;
 
 static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4)
+		const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
 {
 	if (handle != &nfit_ctl_handle)
 		return ERR_PTR(-ENXIO);

diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index f54c003..d3d63dd 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h

@@ -13,6 +13,7 @@
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
 #include <linux/list.h>
+#include <linux/uuid.h>
 #include <linux/ioport.h>
 #include <linux/spinlock_types.h>
 
@@ -36,7 +37,8 @@ typedef void *acpi_handle;
 
 typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
 typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
-		const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4);
+		 const guid_t *guid, u64 rev, u64 func,
+		 union acpi_object *argv4);
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
 		unsigned long size);
 void __wrap_iounmap(volatile void __iomem *addr);

diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index 19d0604..487cbfb 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h

@@ -1,23 +1,42 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
-#include <asm/byteorder.h>
+#include <linux/swab.h>
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __bpf_ntohs(x)		__builtin_bswap16(x)
-# define __bpf_htons(x)		__builtin_bswap16(x)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-# define __bpf_ntohs(x)		(x)
-# define __bpf_htons(x)		(x)
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)			__builtin_bswap16(x)
+# define __bpf_htons(x)			__builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)	___constant_swab16(x)
+# define __bpf_constant_htons(x)	___constant_swab16(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)			(x)
+# define __bpf_htons(x)			(x)
+# define __bpf_constant_ntohs(x)	(x)
+# define __bpf_constant_htons(x)	(x)
 #else
-# error "Fix your __BYTE_ORDER?!"
+# error "Fix your compiler's __BYTE_ORDER__?!"
 #endif
 
 #define bpf_htons(x)				\
 	(__builtin_constant_p(x) ?		\
-	 __constant_htons(x) : __bpf_htons(x))
+	 __bpf_constant_htons(x) : __bpf_htons(x))
 #define bpf_ntohs(x)				\
 	(__builtin_constant_p(x) ?		\
-	 __constant_ntohs(x) : __bpf_ntohs(x))
+	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
 
-#endif
+#endif /* __BPF_ENDIAN__ */

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3773562..0ff8c55 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c

@@ -49,6 +49,7 @@
 #define MAX_NR_MAPS	4
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
 struct bpf_test {
 	const char *descr;
@@ -2615,6 +2616,30 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3341,6 +3366,70 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
 	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
 		"invalid memory access with multiple map_lookup_elem calls",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 10),
@@ -3660,6 +3749,72 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_context access",
 	},
 	{
+		"leak pointer into ctx 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 2 },
+		.errstr_unpriv = "R2 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into ctx 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.errstr_unpriv = "R2 leaks addr into ctx",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into map val",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr_unpriv = "R6 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
 		"helper access to map: full range",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -4937,7 +5092,149 @@ static struct bpf_test tests[] = {
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=map_value_or_null expected=map_ptr",
 		.result = REJECT,
-	}
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -5059,9 +5356,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, map_fds);
 
-	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				   prog, prog_len, "GPL", 0, bpf_vlog,
-				   sizeof(bpf_vlog));
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 0000000..f4d1ff7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc

@@ -0,0 +1,21 @@
+#!/bin/sh
+# description: Register/unregister many kprobe events
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc
+case `uname -m` in
+  x86_64|i[3456]86) OFFS=5;;
+  ppc*) OFFS=4;;
+  *) OFFS=0;;
+esac
+
+echo "Setup up to 256 kprobes"
+grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||:
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index a676d3e..13f5198 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh

@@ -305,7 +305,7 @@
 	echo "Running remote perf test $WITH DMA"
 	write_file "" $REMOTE_PERF/run
 	echo -n "  "
-	read_file $LOCAL_PERF/run
+	read_file $REMOTE_PERF/run
 	echo "  Passed"
 
 	_modprobe -r ntb_perf

diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index d9c49f4..e79ccd6 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c

@@ -42,12 +42,12 @@ int test_body(void)
 	printf("Check DSCR TM context switch: ");
 	fflush(stdout);
 	for (;;) {
-		rv = 1;
 		asm __volatile__ (
 			/* set a known value into the DSCR */
 			"ld      3, %[dscr1];"
 			"mtspr   %[sprn_dscr], 3;"
 
+			"li      %[rv], 1;"
 			/* start and suspend a transaction */
 			"tbegin.;"
 			"beq     1f;"

diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index eee31e2..70fca31 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh

@@ -27,7 +27,7 @@
 
 cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
 awk	'
-BEGIN	{
+{
 		print "if grep -q \"" $0 "\" < '"$T/.config"'";
 		print "then";
 		print "\t:";

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 00cb0db..c29f2ec 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh

@@ -45,7 +45,7 @@
 trap 'rm -rf $T' 0
 mkdir $T
 
-grep -v 'CONFIG_[A-Z]*_TORTURE_TEST' < ${config_template} > $T/config
+grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config
 cat << ___EOF___ >> $T/config
 CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
 CONFIG_VIRTIO_PCI=y

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 3b3c1b6..50091de 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh

@@ -296,10 +296,7 @@
 then
 	git status >> $resdir/$ds/testid.txt
 	git rev-parse HEAD >> $resdir/$ds/testid.txt
-	if ! git diff HEAD > $T/git-diff 2>&1
-	then
-		cp $T/git-diff $resdir/$ds
-	fi
+	git diff HEAD >> $resdir/$ds/testid.txt
 fi
 ___EOF___
 awk < $T/cfgcpu.pack \

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index a3a1a05..6a0b9f6 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST

@@ -9,6 +9,8 @@
 TREE09
 SRCU-N
 SRCU-P
+SRCU-t
+SRCU-u
 TINY01
 TINY02
 TASKS01

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
new file mode 100644
index 0000000..84a7d51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot

@@ -0,0 +1 @@
+rcutorture.torture_type=srcud

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 1a087c3..2da8b49 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N

@@ -5,4 +5,4 @@
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
-CONFIG_RCU_EXPERT=y
+#CHECK#CONFIG_RCU_EXPERT=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
index 4837430..ab7ccd3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P

@@ -2,7 +2,11 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 0000000..6c78022
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t

@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+#CHECK#CONFIG_PREEMPT_COUNT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 0000000..238bfe3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot

@@ -0,0 +1 @@
+rcutorture.torture_type=srcu

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 0000000..6bc24e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u

@@ -0,0 +1,9 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 0000000..84a7d51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot

@@ -0,0 +1 @@
+rcutorture.torture_type=srcud

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index a59f768..d867426 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02

@@ -6,10 +6,9 @@
 CONFIG_HZ_PERIODIC=y
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_TRACE=y
 CONFIG_PROVE_LOCKING=y
-CONFIG_PROVE_RCU_REPEATEDLY=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
-CONFIG_PREEMPT_COUNT=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 359cb25..b5b5397 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01

@@ -10,12 +10,9 @@
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_MAXSMP=y
+CONFIG_CPUMASK_OFFSTACK=y
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ZERO=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index adc3abc..1d14e13 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot

@@ -1 +1,5 @@
 rcutorture.torture_type=rcu_bh maxcpus=8
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcu_nocbs=0

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index c1ab592..35e639e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02

@@ -18,9 +18,6 @@
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
deleted file mode 100644
index 917d251..0000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ /dev/null

@@ -1,21 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=n
-CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=y
-#CHECK#CONFIG_PREEMPT_RCU=y
-CONFIG_HZ_PERIODIC=n
-CONFIG_NO_HZ_IDLE=y
-CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=y
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_NOCB_CPU=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=n
-CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 3b93ee5..2dc31b1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03

@@ -14,9 +14,5 @@
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=y
-CONFIG_RCU_KTHREAD_PRIO=2
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 120c0c8..5d2cc0b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot

@@ -1 +1,5 @@
 rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 5af758e..27d2269 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04

@@ -15,11 +15,7 @@
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=4
 CONFIG_RCU_FANOUT_LEAF=3
-CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
 CONFIG_RCU_EQS_DEBUG=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index d4cdc0d..2dde0d9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05

@@ -13,12 +13,8 @@
 CONFIG_RCU_FANOUT=6
 CONFIG_RCU_FANOUT_LEAF=6
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_NONE=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index 15b3e1a..c7fd050 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot

@@ -1,2 +1,5 @@
 rcutorture.torture_type=sched
 rcupdate.rcu_self_test_sched=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 4cb02bd..05a4eec 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06

@@ -18,8 +18,6 @@
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index dd90f28..ad18b52 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot

@@ -2,3 +2,6 @@
 rcupdate.rcu_self_test_bh=1
 rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index b12a3ea..0f4759f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07

@@ -1,6 +1,5 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=16
-CONFIG_CPUMASK_OFFSTACK=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
@@ -9,16 +8,11 @@
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_NO_HZ_FULL_ALL=n
-CONFIG_NO_HZ_FULL_SYSIDLE=y
 CONFIG_RCU_FAST_NO_HZ=n
 CONFIG_RCU_TRACE=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_RCU_FANOUT=2
 CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 099cc63..fb1c763 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08

@@ -15,7 +15,6 @@
 CONFIG_RCU_FANOUT=3
 CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ALL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
deleted file mode 100644
index 2ad13f0..0000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ /dev/null

@@ -1,21 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=16
-CONFIG_PREEMPT_NONE=n
-CONFIG_PREEMPT_VOLUNTARY=n
-CONFIG_PREEMPT=y
-#CHECK#CONFIG_PREEMPT_RCU=y
-CONFIG_HZ_PERIODIC=n
-CONFIG_NO_HZ_IDLE=y
-CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=y
-CONFIG_HOTPLUG_CPU=n
-CONFIG_SUSPEND=n
-CONFIG_HIBERNATION=n
-CONFIG_RCU_FANOUT=3
-CONFIG_RCU_FANOUT_LEAF=2
-CONFIG_RCU_NOCB_CPU=y
-CONFIG_RCU_NOCB_CPU_ALL=y
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_BOOST=n
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index fb066dc..1bd8efc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot

@@ -2,3 +2,4 @@
 rcupdate.rcu_self_test=1
 rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
+rcu_nocbs=0-7

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
new file mode 100644
index 0000000..fb05ef5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY

@@ -0,0 +1,16 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
index a312f67..721cfda 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE

@@ -7,7 +7,6 @@
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=n
 CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
index 985fb17..7629f5d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54

@@ -8,7 +8,6 @@
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_RCU_TRACE=n
 CONFIG_HOTPLUG_CPU=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n

diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 24396ae..a75b169 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt

@@ -18,7 +18,6 @@
 
 	In common code tested by TREE_RCU test cases.
 
-CONFIG_NO_HZ_FULL_SYSIDLE
 CONFIG_RCU_NOCB_CPU
 
 	Meaningless for TINY_RCU.

diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 364801b..9ad3f89 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt

@@ -9,28 +9,20 @@
 CONFIG_HOTPLUG_CPU -- Do half.  (Every second.)
 CONFIG_HZ_PERIODIC -- Do one.
 CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
-CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
-CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
-CONFIG_PROVE_RCU_REPEATEDLY -- Do one.
 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
-CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
 CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
 CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
-CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
-CONFIG_RCU_NOCB_CPU -- Do three, see below.
-CONFIG_RCU_NOCB_CPU_ALL -- Do one.
-CONFIG_RCU_NOCB_CPU_NONE -- Do one.
-CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
+CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
+CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
+	rcu_nocbs=0, and one with all rcu_nocbs CPUs.
 CONFIG_RCU_TRACE -- Do half.
 CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
 CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
 CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios.
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios.
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios.
 
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
@@ -52,10 +44,6 @@
 
 	Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
 
-CONFIG_NO_HZ_FULL_SYSIDLE_SMALL
-
-	Defer until Frederic uses this.
-
 CONFIG_PREEMPT_COUNT
 CONFIG_PREEMPT_RCU
 
@@ -78,30 +66,16 @@
 
 	Always used in KVM testing.
 
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
-
-	Inspection suffices, ignore.
-
 CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
 CONFIG_TINY_RCU
 
 	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
 
-CONFIG_SPARSE_RCU_POINTER
-
-	Makes sense only for sparse runs, not for kernel builds.
-
 CONFIG_SRCU
 CONFIG_TASKS_RCU
 
 	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
 
-CONFIG_RCU_TRACE
-
-	Implied by CONFIG_RCU_TRACE for Tree RCU.
-
 
 boot parameters ignored: TBD

diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
index 8ff8904..c9e8bc5 100755
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk

@@ -1,4 +1,4 @@
-#!/bin/awk -f
+#!/usr/bin/awk -f
 
 # Modify SRCU for formal verification. The first argument should be srcu.h and
 # the second should be srcu.c. Outputs modified srcu.h and srcu.c into the

diff --git a/usr/Kconfig b/usr/Kconfig
index c0c4850..ad0543e 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig

@@ -220,6 +220,7 @@
 endchoice
 
 config INITRAMFS_COMPRESSION
+	depends on INITRAMFS_SOURCE!=""
 	string
 	default ""      if INITRAMFS_COMPRESSION_NONE
 	default ".gz"   if INITRAMFS_COMPRESSION_GZIP

diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 32c3295..8794036 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c

@@ -22,7 +22,7 @@
 #include <asm/kvm_hyp.h>
 
 #define vtr_to_max_lr_idx(v)		((v) & 0xf)
-#define vtr_to_nr_pre_bits(v)		(((u32)(v) >> 26) + 1)
+#define vtr_to_nr_pre_bits(v)		((((u32)(v) >> 26) & 7) + 1)
 
 static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
 {

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index a2d6324..e2e5eff 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c

@@ -879,6 +879,9 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	pmd_t *pmd;
 
 	pud = stage2_get_pud(kvm, cache, addr);
+	if (!pud)
+		return NULL;
+
 	if (stage2_pud_none(*pud)) {
 		if (!cache)
 			return NULL;

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index 0a4283e..63e0bbd 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c

@@ -226,7 +226,13 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		val = vmcr.ctlr;
+		val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT;
+		val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT;
+		val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT;
+		val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT;
+		val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT;
+		val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT;
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*
@@ -267,7 +273,13 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 
 	switch (addr & 0xff) {
 	case GIC_CPU_CTRL:
-		vmcr.ctlr = val;
+		vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0);
+		vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1);
+		vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl);
+		vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn);
+		vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR);
+		vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS);
+
 		break;
 	case GIC_CPU_PRIMASK:
 		/*

diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 504b4bd..e4187e5 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c

@@ -177,7 +177,18 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 	u32 vmcr;
 
-	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) &
+		GICH_VMCR_ENABLE_GRP0_MASK;
+	vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) &
+		GICH_VMCR_ENABLE_GRP1_MASK;
+	vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) &
+		GICH_VMCR_ACK_CTL_MASK;
+	vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) &
+		GICH_VMCR_FIQ_EN_MASK;
+	vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) &
+		GICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) &
+		GICH_VMCR_EOI_MODE_MASK;
 	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
 		GICH_VMCR_ALIAS_BINPOINT_MASK;
 	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
@@ -195,8 +206,19 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
-			GICH_VMCR_CTRL_SHIFT;
+	vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >>
+		GICH_VMCR_ENABLE_GRP0_SHIFT;
+	vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >>
+		GICH_VMCR_ENABLE_GRP1_SHIFT;
+	vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >>
+		GICH_VMCR_ACK_CTL_SHIFT;
+	vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >>
+		GICH_VMCR_FIQ_EN_SHIFT;
+	vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >>
+		GICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >>
+		GICH_VMCR_EOI_MODE_SHIFT;
+
 	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
 			GICH_VMCR_ALIAS_BINPOINT_SHIFT;
 	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 6fe3f00..030248e 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c

@@ -159,15 +159,24 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) <<
-		 ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
-	vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) &
+			ICH_VMCR_ACK_CTL_MASK;
+		vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) &
+			ICH_VMCR_FIQ_EN_MASK;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcr = ICH_VMCR_FIQ_EN_MASK;
+	}
+
+	vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
+	vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
 	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
 	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
 	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
@@ -180,17 +189,27 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u32 vmcr;
 
 	vmcr = cpu_if->vgic_vmcr;
 
-	/*
-	 * Ignore the FIQen bit, because GIC emulation always implies
-	 * SRE=1 which means the vFIQEn bit is also RES1.
-	 */
-	vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) <<
-			ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
-	vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+		vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >>
+			ICH_VMCR_ACK_CTL_SHIFT;
+		vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >>
+			ICH_VMCR_FIQ_EN_SHIFT;
+	} else {
+		/*
+		 * When emulating GICv3 on GICv3 with SRE=1 on the
+		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
+		 */
+		vmcrp->fiqen = 1;
+		vmcrp->ackctl = 0;
+	}
+
+	vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
+	vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT;
 	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
 	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
 	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;

diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index da83e4c..bba7fa2 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h

@@ -111,14 +111,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
  * registers regardless of the hardware backed GIC used.
  */
 struct vgic_vmcr {
-	u32	ctlr;
+	u32	grpen0;
+	u32	grpen1;
+
+	u32	ackctl;
+	u32	fiqen;
+	u32	cbpr;
+	u32	eoim;
+
 	u32	abpr;
 	u32	bpr;
 	u32	pmr;  /* Priority mask field in the GICC_PMR and
 		       * ICC_PMR_EL1 priority field format */
-	/* Below member variable are valid only for GICv3 */
-	u32	grpen0;
-	u32	grpen1;
 };
 
 struct vgic_reg_attr {

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a8d5403..9120edf 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c

@@ -184,7 +184,7 @@ int __attribute__((weak)) kvm_arch_set_irq_inatomic(
  * Called with wqh->lock held and interrupts disabled
  */
 static int
-irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(wait, struct kvm_kernel_irqfd, wait);
commit	2a275382a4f78f386db3473d9bb9987580de3cf4	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Jul 03 13:36:23 2017 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Jul 03 13:36:23 2017 -0700
tree	01948bada631efa9a34ac219f4f77143fd3e076a
parent	59b60185b4a1adc46b115291dc34af2186cc9678 [diff]
parent	538ac46c64a6bc61e71982091fc1eef0026f322e [diff]