Blame - kernel/locking/qspinlock.c - SHIFTPHONES/mainline/linux

blob: 348c8cec104266e245773e38a97d15c436e1a5e0 [file] [log] [blame]

Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	1	/*
				2	* Queued spinlock
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
				15	* (C) Copyright 2013-2014 Red Hat, Inc.
				16	* (C) Copyright 2015 Intel Corp.
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	17	* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	18	*
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	19	* Authors: Waiman Long <waiman.long@hpe.com>
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	20	* Peter Zijlstra <peterz@infradead.org>
				21	*/
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	22
				23	#ifndef _GEN_PV_LOCK_SLOWPATH
				24
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	25	#include <linux/smp.h>
				26	#include <linux/bug.h>
				27	#include <linux/cpumask.h>
				28	#include <linux/percpu.h>
				29	#include <linux/hardirq.h>
				30	#include <linux/mutex.h>
Stafford Horne	5671360	2017-07-08 04:56:58 +0900	[diff] [blame]	31	#include <linux/prefetch.h>
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	32	#include <asm/byteorder.h>
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	33	#include <asm/qspinlock.h>
				34
				35	/*
				36	* The basic principle of a queue-based spinlock can best be understood
				37	* by studying a classic queue-based spinlock implementation called the
				38	* MCS lock. The paper below provides a good description for this kind
				39	* of lock.
				40	*
				41	* http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
				42	*
				43	* This queued spinlock implementation is based on the MCS lock, however to make
				44	* it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
				45	* API, we must modify it somehow.
				46	*
				47	* In particular; where the traditional MCS lock consists of a tail pointer
				48	* (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
				49	* unlock the next pending (next->locked), we compress both these: {tail,
				50	* next->locked} into a single u32 value.
				51	*
				52	* Since a spinlock disables recursion of its own context and there is a limit
				53	* to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
				54	* are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
				55	* we can encode the tail by combining the 2-bit nesting level with the cpu
				56	* number. With one byte for the lock value and 3 bytes for the tail, only a
				57	* 32-bit word is now needed. Even though we only need 1 bit for the lock,
				58	* we extend it to a full byte to achieve better performance for architectures
				59	* that support atomic byte write.
				60	*
				61	* We also change the first spinner to spin on the lock bit instead of its
				62	* node; whereby avoiding the need to carry a node from lock to unlock, and
				63	* preserving existing lock API. This also makes the unlock code simpler and
				64	* faster.
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	65	*
				66	* N.B. The current implementation only supports architectures that allow
				67	* atomic operations on smaller 8-bit and 16-bit data types.
				68	*
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	69	*/
				70
				71	#include "mcs_spinlock.h"
				72
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	73	#ifdef CONFIG_PARAVIRT_SPINLOCKS
				74	#define MAX_NODES 8
				75	#else
				76	#define MAX_NODES 4
				77	#endif
				78
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	79	/*
				80	* Per-CPU queue node structures; we can never have more than 4 nested
				81	* contexts: task, softirq, hardirq, nmi.
				82	*
				83	* Exactly fits one 64-byte cacheline on a 64-bit architecture.
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	84	*
				85	* PV doubles the storage and uses the second cacheline for PV state.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	86	*/
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	87	static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	88
				89	/*
				90	* We must be able to distinguish between no-tail and the tail at 0:0,
				91	* therefore increment the cpu number by one.
				92	*/
				93
Peter Zijlstra	8d53fa1	2016-06-08 09:12:30 +0200	[diff] [blame]	94	static inline __pure u32 encode_tail(int cpu, int idx)
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	95	{
				96	u32 tail;
				97
				98	#ifdef CONFIG_DEBUG_SPINLOCK
				99	BUG_ON(idx > 3);
				100	#endif
				101	tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
				102	tail \|= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
				103
				104	return tail;
				105	}
				106
Peter Zijlstra	8d53fa1	2016-06-08 09:12:30 +0200	[diff] [blame]	107	static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	108	{
				109	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
				110	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
				111
				112	return per_cpu_ptr(&mcs_nodes[idx], cpu);
				113	}
				114
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	115	#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK \| _Q_PENDING_MASK)
				116
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	117	/*
				118	* By using the whole 2nd least significant byte for the pending bit, we
				119	* can allow better optimization of the lock acquisition for the pending
				120	* bit holder.
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	121	*
				122	* This internal structure is also used by the set_locked function which
				123	* is not restricted to _Q_PENDING_BITS == 8.
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	124	*/
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	125	struct __qspinlock {
				126	union {
				127	atomic_t val;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	128	#ifdef __LITTLE_ENDIAN
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	129	struct {
				130	u8 locked;
				131	u8 pending;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	132	};
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	133	struct {
				134	u16 locked_pending;
				135	u16 tail;
				136	};
				137	#else
				138	struct {
				139	u16 tail;
				140	u16 locked_pending;
				141	};
				142	struct {
				143	u8 reserved[2];
				144	u8 pending;
				145	u8 locked;
				146	};
				147	#endif
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	148	};
				149	};
				150
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	151	#if _Q_PENDING_BITS == 8
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	152	/**
				153	* clear_pending_set_locked - take ownership and clear the pending bit.
				154	* @lock: Pointer to queued spinlock structure
				155	*
				156	* ,1,0 -> ,0,1
				157	*
				158	* Lock stealing is not allowed if this function is used.
				159	*/
				160	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
				161	{
				162	struct __qspinlock l = (void )lock;
				163
				164	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
				165	}
				166
				167	/*
				168	* xchg_tail - Put in the new queue tail code word & retrieve previous one
				169	* @lock : Pointer to queued spinlock structure
				170	* @tail : The new queue tail code word
				171	* Return: The previous queue tail code word
				172	*
Paul E. McKenney	548095d	2017-10-09 11:22:50 -0700	[diff] [blame]	173	* xchg(lock, tail), which heads an address dependency
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	174	*
				175	* p,, -> n,, ; prev = xchg(lock, node)
				176	*/
				177	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
				178	{
				179	struct __qspinlock l = (void )lock;
				180
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	181	/*
				182	* Use release semantics to make sure that the MCS node is properly
				183	* initialized before changing the tail code.
				184	*/
				185	return (u32)xchg_release(&l->tail,
				186	tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	187	}
				188
				189	#else /* _Q_PENDING_BITS == 8 */
				190
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	191	/**
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	192	* clear_pending_set_locked - take ownership and clear the pending bit.
				193	* @lock: Pointer to queued spinlock structure
				194	*
				195	* ,1,0 -> ,0,1
				196	*/
				197	static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
				198	{
				199	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
				200	}
				201
				202	/**
				203	* xchg_tail - Put in the new queue tail code word & retrieve previous one
				204	* @lock : Pointer to queued spinlock structure
				205	* @tail : The new queue tail code word
				206	* Return: The previous queue tail code word
				207	*
				208	* xchg(lock, tail)
				209	*
				210	* p,, -> n,, ; prev = xchg(lock, node)
				211	*/
				212	static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
				213	{
				214	u32 old, new, val = atomic_read(&lock->val);
				215
				216	for (;;) {
				217	new = (val & _Q_LOCKED_PENDING_MASK) \| tail;
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	218	/*
				219	* Use release semantics to make sure that the MCS node is
				220	* properly initialized before changing the tail code.
				221	*/
				222	old = atomic_cmpxchg_release(&lock->val, val, new);
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	223	if (old == val)
				224	break;
				225
				226	val = old;
				227	}
				228	return old;
				229	}
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	230	#endif /* _Q_PENDING_BITS == 8 */
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	231
				232	/**
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	233	* set_locked - Set the lock bit and own the lock
				234	* @lock: Pointer to queued spinlock structure
				235	*
				236	* ,,0 -> *,0,1
				237	*/
				238	static __always_inline void set_locked(struct qspinlock *lock)
				239	{
				240	struct __qspinlock l = (void )lock;
				241
				242	WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
				243	}
				244
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	245
				246	/*
				247	* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
				248	* all the PV callbacks.
				249	*/
				250
				251	static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
Waiman Long	cd0272f	2015-11-09 19:09:27 -0500	[diff] [blame]	252	static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
				253	struct mcs_spinlock *prev) { }
Waiman Long	75d2270	2015-07-11 16:36:52 -0400	[diff] [blame]	254	static __always_inline void __pv_kick_node(struct qspinlock *lock,
				255	struct mcs_spinlock *node) { }
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	256	static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
				257	struct mcs_spinlock *node)
				258	{ return 0; }
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	259
				260	#define pv_enabled() false
				261
				262	#define pv_init_node __pv_init_node
				263	#define pv_wait_node __pv_wait_node
				264	#define pv_kick_node __pv_kick_node
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	265	#define pv_wait_head_or_lock __pv_wait_head_or_lock
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	266
				267	#ifdef CONFIG_PARAVIRT_SPINLOCKS
				268	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
				269	#endif
				270
				271	#endif /* _GEN_PV_LOCK_SLOWPATH */
				272
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	273	/**
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	274	* queued_spin_lock_slowpath - acquire the queued spinlock
				275	* @lock: Pointer to queued spinlock structure
				276	* @val: Current value of the queued spinlock 32-bit word
				277	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	278	* (queue tail, pending bit, lock value)
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	279	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	280	* fast : slow : unlock
				281	* : :
				282	* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (,,0)
				283	* : \| ^--------.------. / :
				284	* : v \ \ \| :
				285	* pending : (0,1,1) +--> (0,1,0) \ \| :
				286	* : \| ^--' \| \| :
				287	* : v \| \| :
				288	* uncontended : (n,x,y) +--> (n,0,0) --' \| :
				289	* queue : \| ^--' \| :
				290	* : v \| :
				291	* contended : (,x,y) +--> (,0,0) ---> (*,0,1) -' :
				292	* queue : ^--' :
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	293	*/
				294	void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
				295	{
				296	struct mcs_spinlock prev, next, *node;
				297	u32 new, old, tail;
				298	int idx;
				299
				300	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
				301
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	302	if (pv_enabled())
				303	goto queue;
				304
Peter Zijlstra	43b3f02	2015-09-04 17:25:23 +0200	[diff] [blame]	305	if (virt_spin_lock(lock))
Peter Zijlstra (Intel)	2aa79af	2015-04-24 14:56:36 -0400	[diff] [blame]	306	return;
				307
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	308	/*
				309	* wait for in-progress pending->locked hand-overs
				310	*
				311	* 0,1,0 -> 0,0,1
				312	*/
				313	if (val == _Q_PENDING_VAL) {
				314	while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
				315	cpu_relax();
				316	}
				317
				318	/*
				319	* trylock \|\| pending
				320	*
				321	* 0,0,0 -> 0,0,1 ; trylock
				322	* 0,0,1 -> 0,1,1 ; pending
				323	*/
				324	for (;;) {
				325	/*
				326	* If we observe any contention; queue.
				327	*/
				328	if (val & ~_Q_LOCKED_MASK)
				329	goto queue;
				330
				331	new = _Q_LOCKED_VAL;
				332	if (val == new)
				333	new \|= _Q_PENDING_VAL;
				334
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	335	/*
				336	* Acquire semantic is required here as the function may
				337	* return immediately if the lock was free.
				338	*/
				339	old = atomic_cmpxchg_acquire(&lock->val, val, new);
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	340	if (old == val)
				341	break;
				342
				343	val = old;
				344	}
				345
				346	/*
				347	* we won the trylock
				348	*/
				349	if (new == _Q_LOCKED_VAL)
				350	return;
				351
				352	/*
				353	* we're pending, wait for the owner to go away.
				354	*
				355	* ,1,1 -> ,1,0
Peter Zijlstra (Intel)	69f9cae	2015-04-24 14:56:34 -0400	[diff] [blame]	356	*
				357	* this wait loop must be a load-acquire such that we match the
				358	* store-release that clears the locked bit and create lock
				359	* sequentiality; this is because not all clear_pending_set_locked()
				360	* implementations imply full barriers.
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	361	*/
Peter Zijlstra	1f03e8d	2016-04-04 10:57:12 +0200	[diff] [blame]	362	smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	363
				364	/*
				365	* take ownership and clear the pending bit.
				366	*
				367	* ,1,0 -> ,0,1
				368	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	369	clear_pending_set_locked(lock);
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	370	return;
				371
				372	/*
				373	* End of pending bit optimistic spinning and beginning of MCS
				374	* queuing.
				375	*/
				376	queue:
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	377	node = this_cpu_ptr(&mcs_nodes[0]);
				378	idx = node->count++;
				379	tail = encode_tail(smp_processor_id(), idx);
				380
				381	node += idx;
				382	node->locked = 0;
				383	node->next = NULL;
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	384	pv_init_node(node);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	385
				386	/*
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	387	* We touched a (possibly) cold cacheline in the per-cpu queue node;
				388	* attempt the trylock once more in the hope someone let go while we
				389	* weren't watching.
				390	*/
				391	if (queued_spin_trylock(lock))
				392	goto release;
				393
				394	/*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	395	* We have already touched the queueing cacheline; don't bother with
				396	* pending stuff.
				397	*
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	398	* p,, -> n,,
Peter Zijlstra	8d53fa1	2016-06-08 09:12:30 +0200	[diff] [blame]	399	*
				400	* RELEASE, such that the stores to @node must be complete.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	401	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	402	old = xchg_tail(lock, tail);
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	403	next = NULL;
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	404
				405	/*
				406	* if there was a previous node; link it and wait until reaching the
				407	* head of the waitqueue.
				408	*/
Waiman Long	6403bd7	2015-04-24 14:56:33 -0400	[diff] [blame]	409	if (old & _Q_TAIL_MASK) {
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	410	prev = decode_tail(old);
Peter Zijlstra	8d53fa1	2016-06-08 09:12:30 +0200	[diff] [blame]	411
Will Deacon	95bcade	2018-02-13 13:22:56 +0000	[diff] [blame^]	412	/*
				413	* We must ensure that the stores to @node are observed before
				414	* the write to prev->next. The address dependency from
				415	* xchg_tail is not sufficient to ensure this because the read
				416	* component of xchg_tail is unordered with respect to the
				417	* initialisation of @node.
				418	*/
				419	smp_store_release(&prev->next, node);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	420
Waiman Long	cd0272f	2015-11-09 19:09:27 -0500	[diff] [blame]	421	pv_wait_node(node, prev);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	422	arch_mcs_spin_lock_contended(&node->locked);
Waiman Long	81b5598	2015-11-09 19:09:22 -0500	[diff] [blame]	423
				424	/*
				425	* While waiting for the MCS lock, the next pointer may have
				426	* been set by another lock waiter. We optimistically load
				427	* the next pointer & prefetch the cacheline for writing
				428	* to reduce latency in the upcoming MCS unlock operation.
				429	*/
				430	next = READ_ONCE(node->next);
				431	if (next)
				432	prefetchw(next);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	433	}
				434
				435	/*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	436	* we're at the head of the waitqueue, wait for the owner & pending to
				437	* go away.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	438	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	439	* ,x,y -> ,0,0
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	440	*
				441	* this wait loop must use a load-acquire such that we match the
				442	* store-release that clears the locked bit and create lock
				443	* sequentiality; this is because the set_locked() function below
				444	* does not imply a full barrier.
				445	*
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	446	* The PV pv_wait_head_or_lock function, if active, will acquire
				447	* the lock and return a non-zero value. So we have to skip the
Peter Zijlstra	1f03e8d	2016-04-04 10:57:12 +0200	[diff] [blame]	448	* smp_cond_load_acquire() call. As the next PV queue head hasn't been
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	449	* designated yet, there is no way for the locked value to become
				450	* _Q_SLOW_VAL. So both the set_locked() and the
				451	* atomic_cmpxchg_relaxed() calls will be safe.
				452	*
				453	* If PV isn't active, 0 will be returned instead.
				454	*
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	455	*/
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	456	if ((val = pv_wait_head_or_lock(lock, node)))
				457	goto locked;
				458
Peter Zijlstra	1f03e8d	2016-04-04 10:57:12 +0200	[diff] [blame]	459	val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	460
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	461	locked:
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	462	/*
				463	* claim the lock:
				464	*
Peter Zijlstra (Intel)	c1fb159	2015-04-24 14:56:32 -0400	[diff] [blame]	465	* n,0,0 -> 0,0,1 : lock, uncontended
				466	* ,0,0 -> ,0,1 : lock, contended
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	467	*
				468	* If the queue head is the only one in the queue (lock value == tail),
				469	* clear the tail code and grab the lock. Otherwise, we only need
				470	* to grab the lock.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	471	*/
				472	for (;;) {
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	473	/* In the PV case we might already have _Q_LOCKED_VAL set */
				474	if ((val & _Q_TAIL_MASK) != tail) {
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	475	set_locked(lock);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	476	break;
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	477	}
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	478	/*
Peter Zijlstra	1f03e8d	2016-04-04 10:57:12 +0200	[diff] [blame]	479	* The smp_cond_load_acquire() call above has provided the
				480	* necessary acquire semantics required for locking. At most
				481	* two iterations of this loop may be ran.
Waiman Long	64d816c	2015-11-09 19:09:21 -0500	[diff] [blame]	482	*/
				483	old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	484	if (old == val)
				485	goto release; /* No contention */
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	486
				487	val = old;
				488	}
				489
				490	/*
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	491	* contended path; wait for next if not observed yet, release.
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	492	*/
Waiman Long	aa68744	2015-11-09 19:09:23 -0500	[diff] [blame]	493	if (!next) {
				494	while (!(next = READ_ONCE(node->next)))
				495	cpu_relax();
				496	}
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	497
Waiman Long	2c83e8e	2015-04-24 14:56:35 -0400	[diff] [blame]	498	arch_mcs_spin_unlock_contended(&next->locked);
Waiman Long	75d2270	2015-07-11 16:36:52 -0400	[diff] [blame]	499	pv_kick_node(lock, next);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	500
				501	release:
				502	/*
				503	* release the node
				504	*/
Pan Xinhui	0dceeaf	2016-06-14 14:37:27 +0800	[diff] [blame]	505	__this_cpu_dec(mcs_nodes[0].count);
Waiman Long	a33fda3	2015-04-24 14:56:30 -0400	[diff] [blame]	506	}
				507	EXPORT_SYMBOL(queued_spin_lock_slowpath);
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	508
				509	/*
				510	* Generate the paravirt code for queued_spin_unlock_slowpath().
				511	*/
				512	#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
				513	#define _GEN_PV_LOCK_SLOWPATH
				514
				515	#undef pv_enabled
				516	#define pv_enabled() true
				517
				518	#undef pv_init_node
				519	#undef pv_wait_node
				520	#undef pv_kick_node
Waiman Long	1c4941f	2015-11-10 16:18:56 -0500	[diff] [blame]	521	#undef pv_wait_head_or_lock
Waiman Long	a23db28	2015-04-24 14:56:37 -0400	[diff] [blame]	522
				523	#undef queued_spin_lock_slowpath
				524	#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
				525
				526	#include "qspinlock_paravirt.h"
				527	#include "qspinlock.c"
				528
				529	#endif