Blame - kernel/locking/qspinlock.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: 5fc8c311b8fe59d46decc2c5a049ce6e860a07b8 [file] [log] [blame]

Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	1	/*
				2	* Queued spinlock
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
				15	* (C) Copyright 2013-2014 Red Hat, Inc.
				16	* (C) Copyright 2015 Intel Corp.
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	17	* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	18	*
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	19	* Authors: Waiman Long <waiman.long@hpe.com>
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	20	* Peter Zijlstra <peterz@infradead.org>
				21	*/
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	22
				23	#ifndef _GEN_PV_LOCK_SLOWPATH
				24
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	25	#include <linux/smp.h>
				26	#include <linux/bug.h>
				27	#include <linux/cpumask.h>
				28	#include <linux/percpu.h>
				29	#include <linux/hardirq.h>
				30	#include <linux/mutex.h>
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	31	#include <asm/byteorder.h>
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	32	#include <asm/qspinlock.h>
				33
				34	/*
				35	* The basic principle of a queue-based spinlock can best be understood
				36	* by studying a classic queue-based spinlock implementation called the
				37	* MCS lock. The paper below provides a good description for this kind
				38	* of lock.
				39	*
				40	* http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
				41	*
				42	* This queued spinlock implementation is based on the MCS lock, however to make
				43	* it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
				44	* API, we must modify it somehow.
				45	*
				46	* In particular; where the traditional MCS lock consists of a tail pointer
				47	* (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
				48	* unlock the next pending (next->locked), we compress both these: {tail,
				49	* next->locked} into a single u32 value.
				50	*
				51	* Since a spinlock disables recursion of its own context and there is a limit
				52	* to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
				53	* are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
				54	* we can encode the tail by combining the 2-bit nesting level with the cpu
				55	* number. With one byte for the lock value and 3 bytes for the tail, only a
				56	* 32-bit word is now needed. Even though we only need 1 bit for the lock,
				57	* we extend it to a full byte to achieve better performance for architectures
				58	* that support atomic byte write.
				59	*
				60	* We also change the first spinner to spin on the lock bit instead of its
				61	* node; whereby avoiding the need to carry a node from lock to unlock, and
				62	* preserving existing lock API. This also makes the unlock code simpler and
				63	* faster.
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	64	*
				65	* N.B. The current implementation only supports architectures that allow
				66	* atomic operations on smaller 8-bit and 16-bit data types.
				67	*
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	68	*/
				69
				70	#include "mcs_spinlock.h"
				71
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	72	#ifdef CONFIG_PARAVIRT_SPINLOCKS
				73	#define MAX_NODES 8
				74	#else
				75	#define MAX_NODES 4
				76	#endif
				77
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	78	/*
				79	* Per-CPU queue node structures; we can never have more than 4 nested
				80	* contexts: task, softirq, hardirq, nmi.
				81	*
				82	* Exactly fits one 64-byte cacheline on a 64-bit architecture.
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	83	*
				84	* PV doubles the storage and uses the second cacheline for PV state.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	85	*/
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	86	static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	87
				88	/*
				89	* We must be able to distinguish between no-tail and the tail at 0:0,
				90	* therefore increment the cpu number by one.
				91	*/
				92
				93	static inline u32 encode_tail(int cpu, int idx)
				94	{
				95	u32 tail;
				96
				97	#ifdef CONFIG_DEBUG_SPINLOCK
				98	BUG_ON(idx > 3);
				99	#endif
				100	tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
				101	tail \|= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
				102
				103	return tail;
				104	}
				105
				106	static inline struct mcs_spinlock *decode_tail(u32 tail)
				107	{
				108	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
				109	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
				110
				111	return per_cpu_ptr(&mcs_nodes[idx], cpu);
				112	}
				113
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	114	#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK \| _Q_PENDING_MASK)
				115
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	116	/*
				117	* By using the whole 2nd least significant byte for the pending bit, we
				118	* can allow better optimization of the lock acquisition for the pending
				119	* bit holder.
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	120	*
				121	* This internal structure is also used by the set_locked function which
				122	* is not restricted to _Q_PENDING_BITS == 8.
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	123	*/
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	124	struct __qspinlock {
				125	union {
				126	atomic_t val;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	127	#ifdef __LITTLE_ENDIAN
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	128	struct {
				129	u8 locked;
				130	u8 pending;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	131	};
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	132	struct {
				133	u16 locked_pending;
				134	u16 tail;
				135	};
				136	#else
				137	struct {
				138	u16 tail;
				139	u16 locked_pending;
				140	};
				141	struct {
				142	u8 reserved[2];
				143	u8 pending;
				144	u8 locked;
				145	};
				146	#endif
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	147	};
				148	};
				149
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	150	#if _Q_PENDING_BITS == 8
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	151	/**
				152	* clear_pending_set_locked - take ownership and clear the pending bit.
				153	* @lock: Pointer to queued spinlock structure
				154	*
				155	* ,1,0 -> ,0,1
				156	*
				157	* Lock stealing is not allowed if this function is used.
				158	*/
				159	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
				160	{
				161	struct __qspinlock l = (void )lock;
				162
				163	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
				164	}
				165
				166	/*
				167	* xchg_tail - Put in the new queue tail code word & retrieve previous one
				168	* @lock : Pointer to queued spinlock structure
				169	* @tail : The new queue tail code word
				170	* Return: The previous queue tail code word
				171	*
				172	* xchg(lock, tail)
				173	*
				174	* p,, -> n,, ; prev = xchg(lock, node)
				175	*/
				176	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
				177	{
				178	struct __qspinlock l = (void )lock;
				179
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	180	/*
				181	* Use release semantics to make sure that the MCS node is properly
				182	* initialized before changing the tail code.
				183	*/
				184	return (u32)xchg_release(&l->tail,
				185	tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	186	}
				187
				188	#else /* _Q_PENDING_BITS == 8 */
				189
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	190	/**
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	191	* clear_pending_set_locked - take ownership and clear the pending bit.
				192	* @lock: Pointer to queued spinlock structure
				193	*
				194	* ,1,0 -> ,0,1
				195	*/
				196	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
				197	{
				198	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
				199	}
				200
				201	/**
				202	* xchg_tail - Put in the new queue tail code word & retrieve previous one
				203	* @lock : Pointer to queued spinlock structure
				204	* @tail : The new queue tail code word
				205	* Return: The previous queue tail code word
				206	*
				207	* xchg(lock, tail)
				208	*
				209	* p,, -> n,, ; prev = xchg(lock, node)
				210	*/
				211	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
				212	{
				213	u32 old, new, val = atomic_read(&lock->val);
				214
				215	for (;;) {
				216	new = (val & _Q_LOCKED_PENDING_MASK) \| tail;
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	217	/*
				218	* Use release semantics to make sure that the MCS node is
				219	* properly initialized before changing the tail code.
				220	*/
				221	old = atomic_cmpxchg_release(&lock->val, val, new);
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	222	if (old == val)
				223	break;
				224
				225	val = old;
				226	}
				227	return old;
				228	}
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	229	#endif /* _Q_PENDING_BITS == 8 */
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	230
				231	/**
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	232	* set_locked - Set the lock bit and own the lock
				233	* @lock: Pointer to queued spinlock structure
				234	*
				235	* ,,0 -> *,0,1
				236	*/
				237	static __always_inline void set_locked(struct qspinlock *lock)
				238	{
				239	struct __qspinlock l = (void )lock;
				240
				241	WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
				242	}
				243
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	244
				245	/*
				246	* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
				247	* all the PV callbacks.
				248	*/
				249
				250	static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
Waiman Long	cd0272f	2015-11-09 19:09:27 -0500	[diff] [blame]	251	static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
				252	struct mcs_spinlock *prev) { }
Waiman Long	75d2270	2015-07-11 16:36:52 -0400	[diff] [blame]	253	static __always_inline void __pv_kick_node(struct qspinlock *lock,
				254	struct mcs_spinlock *node) { }
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	255	static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
				256	struct mcs_spinlock *node)
				257	{ return 0; }
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	258
				259	#define pv_enabled() false
				260
				261	#define pv_init_node __pv_init_node
				262	#define pv_wait_node __pv_wait_node
				263	#define pv_kick_node __pv_kick_node
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	264	#define pv_wait_head_or_lock __pv_wait_head_or_lock
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	265
				266	#ifdef CONFIG_PARAVIRT_SPINLOCKS
				267	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
				268	#endif
				269
Peter Zijlstra	2c61002	2016-06-08 10:19:51 +0200	[diff] [blame]	270	/*
				271	* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
				272	* issuing an _unordered_ store to set _Q_LOCKED_VAL.
				273	*
				274	* This means that the store can be delayed, but no later than the
				275	* store-release from the unlock. This means that simply observing
				276	* _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
				277	*
				278	* There are two paths that can issue the unordered store:
				279	*
				280	* (1) clear_pending_set_locked(): ,1,0 -> ,0,1
				281	*
				282	* (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
				283	* atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
				284	*
				285	* However, in both cases we have other !0 state we've set before to queue
				286	* ourseves:
				287	*
				288	* For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
				289	* load is constrained by that ACQUIRE to not pass before that, and thus must
				290	* observe the store.
				291	*
				292	* For (2) we have a more intersting scenario. We enqueue ourselves using
				293	* xchg_tail(), which ends up being a RELEASE. This in itself is not
				294	* sufficient, however that is followed by an smp_cond_acquire() on the same
				295	* word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
				296	* guarantees we must observe that store.
				297	*
				298	* Therefore both cases have other !0 state that is observable before the
				299	* unordered locked byte store comes through. This means we can use that to
				300	* wait for the lock store, and then wait for an unlock.
				301	*/
				302	#ifndef queued_spin_unlock_wait
				303	void queued_spin_unlock_wait(struct qspinlock *lock)
				304	{
				305	u32 val;
				306
				307	for (;;) {
				308	val = atomic_read(&lock->val);
				309
				310	if (!val) /* not locked, we're done */
				311	goto done;
				312
				313	if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
				314	break;
				315
				316	/* not locked, but pending, wait until we observe the lock */
				317	cpu_relax();
				318	}
				319
				320	/* any unlock is good */
				321	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
				322	cpu_relax();
				323
				324	done:
				325	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
				326	}
				327	EXPORT_SYMBOL(queued_spin_unlock_wait);
				328	#endif
				329
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	330	#endif /* _GEN_PV_LOCK_SLOWPATH */
				331
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	332	/**
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	333	* queued_spin_lock_slowpath - acquire the queued spinlock
				334	* @lock: Pointer to queued spinlock structure
				335	* @val: Current value of the queued spinlock 32-bit word
				336	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	337	* (queue tail, pending bit, lock value)
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	338	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	339	* fast : slow : unlock
				340	* : :
				341	* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (,,0)
				342	* : \| ^--------.------. / :
				343	* : v \ \ \| :
				344	* pending : (0,1,1) +--> (0,1,0) \ \| :
				345	* : \| ^--' \| \| :
				346	* : v \| \| :
				347	* uncontended : (n,x,y) +--> (n,0,0) --' \| :
				348	* queue : \| ^--' \| :
				349	* : v \| :
				350	* contended : (,x,y) +--> (,0,0) ---> (*,0,1) -' :
				351	* queue : ^--' :
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	352	*/
				353	void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
				354	{
				355	struct mcs_spinlock prev, next, *node;
				356	u32 new, old, tail;
				357	int idx;
				358
				359	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
				360
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	361	if (pv_enabled())
				362	goto queue;
				363
Peter Zijlstra	43b3f02	2015-09-04 17:25:23 +0200	[diff] [blame]	364	if (virt_spin_lock(lock))
Peter Zijlstra (Intel)	2aa79af	2015-04-24 14:56:36 -0400	[diff] [blame]	365	return;
				366
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	367	/*
				368	* wait for in-progress pending->locked hand-overs
				369	*
				370	* 0,1,0 -> 0,0,1
				371	*/
				372	if (val == _Q_PENDING_VAL) {
				373	while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
				374	cpu_relax();
				375	}
				376
				377	/*
				378	* trylock \|\| pending
				379	*
				380	* 0,0,0 -> 0,0,1 ; trylock
				381	* 0,0,1 -> 0,1,1 ; pending
				382	*/
				383	for (;;) {
				384	/*
				385	* If we observe any contention; queue.
				386	*/
				387	if (val & ~_Q_LOCKED_MASK)
				388	goto queue;
				389
				390	new = _Q_LOCKED_VAL;
				391	if (val == new)
				392	new \|= _Q_PENDING_VAL;
				393
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	394	/*
				395	* Acquire semantic is required here as the function may
				396	* return immediately if the lock was free.
				397	*/
				398	old = atomic_cmpxchg_acquire(&lock->val, val, new);
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	399	if (old == val)
				400	break;
				401
				402	val = old;
				403	}
				404
				405	/*
				406	* we won the trylock
				407	*/
				408	if (new == _Q_LOCKED_VAL)
				409	return;
				410
				411	/*
				412	* we're pending, wait for the owner to go away.
				413	*
				414	* ,1,1 -> ,1,0
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	415	*
				416	* this wait loop must be a load-acquire such that we match the
				417	* store-release that clears the locked bit and create lock
				418	* sequentiality; this is because not all clear_pending_set_locked()
				419	* implementations imply full barriers.
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	420	*/
Waiman Long	cb037fd	2015-12-10 15:17:44 -0500	[diff] [blame]	421	smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	422
				423	/*
				424	* take ownership and clear the pending bit.
				425	*
				426	* ,1,0 -> ,0,1
				427	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	428	clear_pending_set_locked(lock);
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	429	return;
				430
				431	/*
				432	* End of pending bit optimistic spinning and beginning of MCS
				433	* queuing.
				434	*/
				435	queue:
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	436	node = this_cpu_ptr(&mcs_nodes[0]);
				437	idx = node->count++;
				438	tail = encode_tail(smp_processor_id(), idx);
				439
				440	node += idx;
				441	node->locked = 0;
				442	node->next = NULL;
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	443	pv_init_node(node);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	444
				445	/*
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	446	* We touched a (possibly) cold cacheline in the per-cpu queue node;
				447	* attempt the trylock once more in the hope someone let go while we
				448	* weren't watching.
				449	*/
				450	if (queued_spin_trylock(lock))
				451	goto release;
				452
				453	/*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	454	* We have already touched the queueing cacheline; don't bother with
				455	* pending stuff.
				456	*
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	457	* p,, -> n,,
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	458	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	459	old = xchg_tail(lock, tail);
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	460	next = NULL;
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	461
				462	/*
				463	* if there was a previous node; link it and wait until reaching the
				464	* head of the waitqueue.
				465	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	466	if (old & _Q_TAIL_MASK) {
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	467	prev = decode_tail(old);
				468	WRITE_ONCE(prev->next, node);
				469
Waiman Long	cd0272f	2015-11-09 19:09:27 -0500	[diff] [blame]	470	pv_wait_node(node, prev);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	471	arch_mcs_spin_lock_contended(&node->locked);
Waiman Long	81b5598	2015-11-09 19:09:22 -0500	[diff] [blame]	472
				473	/*
				474	* While waiting for the MCS lock, the next pointer may have
				475	* been set by another lock waiter. We optimistically load
				476	* the next pointer & prefetch the cacheline for writing
				477	* to reduce latency in the upcoming MCS unlock operation.
				478	*/
				479	next = READ_ONCE(node->next);
				480	if (next)
				481	prefetchw(next);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	482	}
				483
				484	/*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	485	* we're at the head of the waitqueue, wait for the owner & pending to
				486	* go away.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	487	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	488	* ,x,y -> ,0,0
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	489	*
				490	* this wait loop must use a load-acquire such that we match the
				491	* store-release that clears the locked bit and create lock
				492	* sequentiality; this is because the set_locked() function below
				493	* does not imply a full barrier.
				494	*
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	495	* The PV pv_wait_head_or_lock function, if active, will acquire
				496	* the lock and return a non-zero value. So we have to skip the
Waiman Long	cb037fd	2015-12-10 15:17:44 -0500	[diff] [blame]	497	* smp_cond_acquire() call. As the next PV queue head hasn't been
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	498	* designated yet, there is no way for the locked value to become
				499	* _Q_SLOW_VAL. So both the set_locked() and the
				500	* atomic_cmpxchg_relaxed() calls will be safe.
				501	*
				502	* If PV isn't active, 0 will be returned instead.
				503	*
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	504	*/
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	505	if ((val = pv_wait_head_or_lock(lock, node)))
				506	goto locked;
				507
Peter Zijlstra	b3e0b1b	2015-10-16 14:39:38 +0200	[diff] [blame]	508	smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	509
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	510	locked:
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	511	/*
				512	* claim the lock:
				513	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	514	* n,0,0 -> 0,0,1 : lock, uncontended
				515	* ,0,0 -> ,0,1 : lock, contended
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	516	*
				517	* If the queue head is the only one in the queue (lock value == tail),
				518	* clear the tail code and grab the lock. Otherwise, we only need
				519	* to grab the lock.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	520	*/
				521	for (;;) {
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	522	/* In the PV case we might already have _Q_LOCKED_VAL set */
				523	if ((val & _Q_TAIL_MASK) != tail) {
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	524	set_locked(lock);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	525	break;
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	526	}
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	527	/*
Waiman Long	cb037fd	2015-12-10 15:17:44 -0500	[diff] [blame]	528	* The smp_cond_acquire() call above has provided the necessary
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	529	* acquire semantics required for locking. At most two
				530	* iterations of this loop may be ran.
				531	*/
				532	old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	533	if (old == val)
				534	goto release; /* No contention */
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	535
				536	val = old;
				537	}
				538
				539	/*
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	540	* contended path; wait for next if not observed yet, release.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	541	*/
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	542	if (!next) {
				543	while (!(next = READ_ONCE(node->next)))
				544	cpu_relax();
				545	}
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	546
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	547	arch_mcs_spin_unlock_contended(&next->locked);
Waiman Long	75d2270	2015-07-11 16:36:52 -0400	[diff] [blame]	548	pv_kick_node(lock, next);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	549
				550	release:
				551	/*
				552	* release the node
				553	*/
				554	this_cpu_dec(mcs_nodes[0].count);
				555	}
				556	EXPORT_SYMBOL(queued_spin_lock_slowpath);
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	557
				558	/*
				559	* Generate the paravirt code for queued_spin_unlock_slowpath().
				560	*/
				561	#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
				562	#define _GEN_PV_LOCK_SLOWPATH
				563
				564	#undef pv_enabled
				565	#define pv_enabled() true
				566
				567	#undef pv_init_node
				568	#undef pv_wait_node
				569	#undef pv_kick_node
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	570	#undef pv_wait_head_or_lock
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	571
				572	#undef queued_spin_lock_slowpath
				573	#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
				574
				575	#include "qspinlock_paravirt.h"
				576	#include "qspinlock.c"
				577
				578	#endif