percpu: add __percpu for sparse.

We have to make __kernel "__attribute__((address_space(0)))" so we can
cast to it.

tj: * put_cpu_var() update.

    * Annotations added to dynamic allocator interface.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f965f83..2c0d31a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -30,8 +30,12 @@
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
 
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
 #define put_cpu_var(var) do {				\
-	(void)(var);					\
+	(void)&(var);					\
 	preempt_enable();				\
 } while (0)
 
@@ -130,9 +134,9 @@
  */
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
 
-extern void *__alloc_reserved_percpu(size_t size, size_t align);
-extern void *__alloc_percpu(size_t size, size_t align);
-extern void free_percpu(void *__pdata);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(void __percpu *__pdata);
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
@@ -142,7 +146,7 @@
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	/*
 	 * Can't easily make larger alignment work with kmalloc.  WARN
@@ -153,7 +157,7 @@
 	return kzalloc(size, GFP_KERNEL);
 }
 
-static inline void free_percpu(void *p)
+static inline void free_percpu(void __percpu *p)
 {
 	kfree(p);
 }
@@ -168,7 +172,7 @@
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	\
-	(typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
 
 /*
  * Optional methods for optimized non-lvalue per-cpu variable access.