diff --git a/MAINTAINERS b/MAINTAINERS
index fbd6f52..1c32f8a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6403,7 +6403,7 @@
 M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	"David S. Miller" <davem@davemloft.net>
-M:	Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+M:	Masami Hiramatsu <mhiramat@kernel.org>
 S:	Maintained
 F:	Documentation/kprobes.txt
 F:	include/linux/kprobes.h
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index df4f369..506c353 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -559,25 +559,25 @@
 #endif
 
 /**
- * fetch_or - perform *ptr |= mask and return old value of *ptr
- * @ptr: pointer to value
- * @mask: mask to OR on the value
- *
- * cmpxchg based fetch_or, macro so it works for different integer types
+ * atomic_fetch_or - perform *p |= mask and return old value of *p
+ * @p: pointer to atomic_t
+ * @mask: mask to OR on the atomic_t
  */
-#ifndef fetch_or
-#define fetch_or(ptr, mask)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
-	for (;;) {							\
-		__old = cmpxchg((ptr), __val, __val | (mask));		\
-		if (__old == __val)					\
-			break;						\
-		__val = __old;						\
-	}								\
-	__old;								\
-})
-#endif
+#ifndef atomic_fetch_or
+static inline int atomic_fetch_or(atomic_t *p, int mask)
+{
+	int old, val = atomic_read(p);
 
+	for (;;) {
+		old = atomic_cmpxchg(p, val, val | mask);
+		if (old == val)
+			break;
+		val = old;
+	}
+
+	return old;
+}
+#endif
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 60bba7e..52c4847 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -720,7 +720,7 @@
 	struct task_cputime cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 
 	struct list_head cpu_timers[3];
@@ -1549,7 +1549,7 @@
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index aa9f104..621fa8a 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -1 +1,5 @@
 #include <linux/compiler.h>
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 53ab2f8..2324ba5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2000,6 +2000,77 @@
 }
 
 /*
+ * Returns the next chain_key iteration
+ */
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+{
+	u64 new_chain_key = iterate_chain_key(chain_key, class_idx);
+
+	printk(" class_idx:%d -> chain_key:%016Lx",
+		class_idx,
+		(unsigned long long)new_chain_key);
+	return new_chain_key;
+}
+
+static void
+print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next)
+{
+	struct held_lock *hlock;
+	u64 chain_key = 0;
+	int depth = curr->lockdep_depth;
+	int i;
+
+	printk("depth: %u\n", depth + 1);
+	for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
+		hlock = curr->held_locks + i;
+		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
+
+		print_lock(hlock);
+	}
+
+	print_chain_key_iteration(hlock_next->class_idx, chain_key);
+	print_lock(hlock_next);
+}
+
+static void print_chain_keys_chain(struct lock_chain *chain)
+{
+	int i;
+	u64 chain_key = 0;
+	int class_id;
+
+	printk("depth: %u\n", chain->depth);
+	for (i = 0; i < chain->depth; i++) {
+		class_id = chain_hlocks[chain->base + i];
+		chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+
+		print_lock_name(lock_classes + class_id);
+		printk("\n");
+	}
+}
+
+static void print_collision(struct task_struct *curr,
+			struct held_lock *hlock_next,
+			struct lock_chain *chain)
+{
+	printk("\n");
+	printk("======================\n");
+	printk("[chain_key collision ]\n");
+	print_kernel_ident();
+	printk("----------------------\n");
+	printk("%s/%d: ", current->comm, task_pid_nr(current));
+	printk("Hash chain already cached but the contents don't match!\n");
+
+	printk("Held locks:");
+	print_chain_keys_held_locks(curr, hlock_next);
+
+	printk("Locks in cached chain:");
+	print_chain_keys_chain(chain);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+}
+
+/*
  * Checks whether the chain and the current held locks are consistent
  * in depth and also in content. If they are not it most likely means
  * that there was a collision during the calculation of the chain_key.
@@ -2014,14 +2085,18 @@
 
 	i = get_first_held_lock(curr, hlock);
 
-	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) {
+		print_collision(curr, hlock, chain);
 		return 0;
+	}
 
 	for (j = 0; j < chain->depth - 1; j++, i++) {
 		id = curr->held_locks[i].class_idx - 1;
 
-		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
+			print_collision(curr, hlock, chain);
 			return 0;
+		}
 	}
 #endif
 	return 1;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8465ee..8b489fc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -321,6 +321,24 @@
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
+/*
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#define fetch_or(ptr, mask)						\
+	({								\
+		typeof(ptr) _ptr = (ptr);				\
+		typeof(mask) _mask = (mask);				\
+		typeof(*_ptr) _old, _val = *_ptr;			\
+									\
+		for (;;) {						\
+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
+			if (_old == _val)				\
+				break;					\
+			_val = _old;					\
+		}							\
+	_old;								\
+})
+
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 084b79f..58e3310 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -157,52 +157,50 @@
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
-static unsigned long tick_dep_mask;
+static atomic_t tick_dep_mask;
 
-static void trace_tick_dependency(unsigned long dep)
+static bool check_tick_dependency(atomic_t *dep)
 {
-	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+	int val = atomic_read(dep);
+
+	if (val & TICK_DEP_MASK_POSIX_TIMER) {
 		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+	if (val & TICK_DEP_MASK_PERF_EVENTS) {
 		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_SCHED) {
+	if (val & TICK_DEP_MASK_SCHED) {
 		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+	if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
 		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+		return true;
+	}
+
+	return false;
 }
 
 static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (tick_dep_mask) {
-		trace_tick_dependency(tick_dep_mask);
+	if (check_tick_dependency(&tick_dep_mask))
 		return false;
-	}
 
-	if (ts->tick_dep_mask) {
-		trace_tick_dependency(ts->tick_dep_mask);
+	if (check_tick_dependency(&ts->tick_dep_mask))
 		return false;
-	}
 
-	if (current->tick_dep_mask) {
-		trace_tick_dependency(current->tick_dep_mask);
+	if (check_tick_dependency(&current->tick_dep_mask))
 		return false;
-	}
 
-	if (current->signal->tick_dep_mask) {
-		trace_tick_dependency(current->signal->tick_dep_mask);
+	if (check_tick_dependency(&current->signal->tick_dep_mask))
 		return false;
-	}
 
 	return true;
 }
@@ -259,12 +257,12 @@
 	preempt_enable();
 }
 
-static void tick_nohz_dep_set_all(unsigned long *dep,
+static void tick_nohz_dep_set_all(atomic_t *dep,
 				  enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 
-	prev = fetch_or(dep, BIT_MASK(bit));
+	prev = atomic_fetch_or(dep, BIT(bit));
 	if (!prev)
 		tick_nohz_full_kick_all();
 }
@@ -280,7 +278,7 @@
 
 void tick_nohz_dep_clear(enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tick_dep_mask);
+	atomic_andnot(BIT(bit), &tick_dep_mask);
 }
 
 /*
@@ -289,12 +287,12 @@
  */
 void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 	struct tick_sched *ts;
 
 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
 	if (!prev) {
 		preempt_disable();
 		/* Perf needs local kick that is NMI safe */
@@ -313,7 +311,7 @@
 {
 	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	clear_bit(bit, &ts->tick_dep_mask);
+	atomic_andnot(BIT(bit), &ts->tick_dep_mask);
 }
 
 /*
@@ -331,7 +329,7 @@
 
 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tsk->tick_dep_mask);
+	atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
 }
 
 /*
@@ -345,7 +343,7 @@
 
 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &sig->tick_dep_mask);
+	atomic_andnot(BIT(bit), &sig->tick_dep_mask);
 }
 
 /*
@@ -366,7 +364,8 @@
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->tick_stopped) {
-		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+		if (atomic_read(&current->tick_dep_mask) ||
+		    atomic_read(&current->signal->tick_dep_mask))
 			tick_nohz_full_kick();
 	}
 out:
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index eb4e325..bf38226 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,7 +60,7 @@
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
-	unsigned long			tick_dep_mask;
+	atomic_t			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 5334ad9..1069d96 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -3,7 +3,7 @@
 make &> /dev/null
 
 for i in `ls tests/*.c`; do
-	testname=$(basename -s .c "$i")
+	testname=$(basename "$i" .c)
 	gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
 	echo -ne "$testname... "
 	if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
@@ -11,11 +11,13 @@
 	else
 		echo "FAILED!"
 	fi
-	rm tests/$testname
+	if [ -f "tests/$testname" ]; then
+		rm tests/$testname
+	fi
 done
 
 for i in `ls tests/*.c`; do
-	testname=$(basename -s .c "$i")
+	testname=$(basename "$i" .c)
 	gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
 	echo -ne "(PRELOAD) $testname... "
 	if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
@@ -23,5 +25,7 @@
 	else
 		echo "FAILED!"
 	fi
-	rm tests/$testname
+	if [ -f "tests/$testname" ]; then
+		rm tests/$testname
+	fi
 done
