[S390] Convert s390 to GENERIC_CLOCKEVENTS.

This way we get rid of s390's NO_IDLE_HZ and use the generic dynticks
variant instead. In addition we get high resolution timers for free.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index da6ea64..f6a68e1 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -43,6 +43,9 @@
 config GENERIC_TIME
 	def_bool y
 
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
 config GENERIC_BUG
 	bool
 	depends on BUG
@@ -73,6 +76,8 @@
 
 comment "Processor type and features"
 
+source "kernel/time/Kconfig"
+
 config 64BIT
 	bool "64 bit kernel"
 	help
@@ -487,25 +492,6 @@
 
 source kernel/Kconfig.hz
 
-config NO_IDLE_HZ
-	bool "No HZ timer ticks in idle"
-	help
-	  Switches the regular HZ timer off when the system is going idle.
-	  This helps z/VM to detect that the Linux system is idle. VM can
-	  then "swap-out" this guest which reduces memory usage. It also
-	  reduces the overhead of idle systems.
-
-	  The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer.
-	  hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ
-	  timer is active.
-
-config NO_IDLE_HZ_INIT
-	bool "HZ timer in idle off by default"
-	depends on NO_IDLE_HZ
-	help
-	  The HZ timer is switched off in idle by default. That means the
-	  HZ timer is already disabled at boot time.
-
 config S390_HYPFS_FS
 	bool "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index eb768ce8..df03324 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/utsname.h>
+#include <linux/tick.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -167,9 +168,10 @@
 void cpu_idle(void)
 {
 	for (;;) {
+		tick_nohz_stop_sched_tick();
 		while (!need_resched())
 			default_idle();
-
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 3a8772d..947d8c7 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -120,12 +120,9 @@
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 	s390_idle_check();
-	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-		/**
-		 * Make sure that the i/o interrupt did not "overtake"
-		 * the last HZ timer interrupt.
-		 */
-		account_ticks(S390_lowcore.int_clock);
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
 	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
         index = ext_hash(code);
 	for (p = ext_int_hash[index]; p; p = p->next) {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 22040d0..7141147 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -428,7 +428,7 @@
 	lc->io_new_psw.mask = psw_kernel_bits;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->ipl_device = S390_lowcore.ipl_device;
-	lc->jiffy_timer = -1LL;
+	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
 	lc->async_stack = (unsigned long)
 		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 925f9dc..17c4de9 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/timex.h>
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
-
+#include <linux/clockchips.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -57,9 +57,9 @@
 
 static ext_int_info_t ext_int_info_cc;
 static ext_int_info_t ext_int_etr_cc;
-static u64 init_timer_cc;
 static u64 jiffies_timer_cc;
-static u64 xtime_cc;
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -95,162 +95,40 @@
 #define s390_do_profile()	do { ; } while(0)
 #endif /* CONFIG_PROFILING */
 
-/*
- * Advance the per cpu tick counter up to the time given with the
- * "time" argument. The per cpu update consists of accounting
- * the virtual cpu time, calling update_process_times and calling
- * the profiling hook. If xtime is before time it is advanced as well.
- */
-void account_ticks(u64 time)
+void clock_comparator_work(void)
 {
-	__u32 ticks;
-	__u64 tmp;
+	struct clock_event_device *cd;
 
-	/* Calculate how many ticks have passed. */
-	if (time < S390_lowcore.jiffy_timer)
-		return;
-	tmp = time - S390_lowcore.jiffy_timer;
-	if (tmp >= 2*CLK_TICKS_PER_JIFFY) {  /* more than two ticks ? */
-		ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1;
-		S390_lowcore.jiffy_timer +=
-			CLK_TICKS_PER_JIFFY * (__u64) ticks;
-	} else if (tmp >= CLK_TICKS_PER_JIFFY) {
-		ticks = 2;
-		S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY;
-	} else {
-		ticks = 1;
-		S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY;
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * Do not rely on the boot cpu to do the calls to do_timer.
-	 * Spread it over all cpus instead.
-	 */
-	write_seqlock(&xtime_lock);
-	if (S390_lowcore.jiffy_timer > xtime_cc) {
-		__u32 xticks;
-		tmp = S390_lowcore.jiffy_timer - xtime_cc;
-		if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
-			xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
-			xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY;
-		} else {
-			xticks = 1;
-			xtime_cc += CLK_TICKS_PER_JIFFY;
-		}
-		do_timer(xticks);
-	}
-	write_sequnlock(&xtime_lock);
-#else
-	do_timer(ticks);
-#endif
-
-	while (ticks--)
-		update_process_times(user_mode(get_irq_regs()));
-
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	cd = &__get_cpu_var(comparators);
+	cd->event_handler(cd);
 	s390_do_profile();
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
-
-#ifdef CONFIG_NO_IDLE_HZ_INIT
-int sysctl_hz_timer = 0;
-#else
-int sysctl_hz_timer = 1;
-#endif
-
 /*
- * Stop the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
+ * Fixup the clock comparator.
  */
-static void stop_hz_timer(void)
+static void fixup_clock_comparator(unsigned long long delta)
 {
-	unsigned long flags;
-	unsigned long seq, next;
-	__u64 timer, todval;
-	int cpu = smp_processor_id();
-
-	if (sysctl_hz_timer != 0)
+	/* If nobody is waiting there's nothing to fix. */
+	if (S390_lowcore.clock_comparator == -1ULL)
 		return;
-
-	cpu_set(cpu, nohz_cpu_mask);
-
-	/*
-	 * Leave the clock comparator set up for the next timer
-	 * tick if either rcu or a softirq is pending.
-	 */
-	if (rcu_needs_cpu(cpu) || local_softirq_pending()) {
-		cpu_clear(cpu, nohz_cpu_mask);
-		return;
-	}
-
-	/*
-	 * This cpu is going really idle. Set up the clock comparator
-	 * for the next event.
-	 */
-	next = next_timer_interrupt();
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-	todval = -1ULL;
-	/* Be careful about overflows. */
-	if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) {
-		timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY;
-		if (timer >= jiffies_timer_cc)
-			todval = timer;
-	}
-	set_clock_comparator(todval);
+	S390_lowcore.clock_comparator += delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-/*
- * Start the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
- */
-static void start_hz_timer(void)
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
 {
-	if (!cpu_isset(smp_processor_id(), nohz_cpu_mask))
-		return;
-	account_ticks(get_clock());
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
-	cpu_clear(smp_processor_id(), nohz_cpu_mask);
+	S390_lowcore.clock_comparator = get_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
 }
 
-static int nohz_idle_notify(struct notifier_block *self,
-			    unsigned long action, void *hcpu)
+static void s390_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
 {
-	switch (action) {
-	case S390_CPU_IDLE:
-		stop_hz_timer();
-		break;
-	case S390_CPU_NOT_IDLE:
-		start_hz_timer();
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block nohz_idle_nb = {
-	.notifier_call = nohz_idle_notify,
-};
-
-static void __init nohz_init(void)
-{
-	if (register_idle_notifier(&nohz_idle_nb))
-		panic("Couldn't register idle notifier");
-}
-
-#endif
-
-/*
- * Set up per cpu jiffy timer and set the clock comparator.
- */
-static void setup_jiffy_timer(void)
-{
-	/* Set up clock comparator to next jiffy. */
-	S390_lowcore.jiffy_timer =
-		jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY;
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 /*
@@ -259,7 +137,26 @@
  */
 void init_cpu_timer(void)
 {
-	setup_jiffy_timer();
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= s390_next_event;
+	cd->set_mode		= s390_set_mode;
+
+	clockevents_register_device(cd);
 
 	/* Enable clock comparator timer interrupt. */
 	__ctl_set_bit(0,11);
@@ -270,8 +167,6 @@
 
 static void clock_comparator_interrupt(__u16 code)
 {
-	/* set clock comparator for next tick */
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 static void etr_reset(void);
@@ -316,8 +211,9 @@
  */
 void __init time_init(void)
 {
+	u64 init_timer_cc;
+
 	init_timer_cc = reset_tod_clock();
-	xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY;
 	jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
 
 	/* set xtime */
@@ -342,10 +238,6 @@
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
-#ifdef CONFIG_NO_IDLE_HZ
-	nohz_init();
-#endif
-
 #ifdef CONFIG_VIRT_TIMER
 	vtime_init();
 #endif
@@ -699,53 +591,49 @@
 }
 
 /*
- * The time is "clock". xtime is what we think the time is.
+ * The time is "clock". old is what we think the time is.
  * Adjust the value by a multiple of jiffies and add the delta to ntp.
  * "delay" is an approximation how long the synchronization took. If
  * the time correction is positive, then "delay" is subtracted from
  * the time difference and only the remaining part is passed to ntp.
  */
-static void etr_adjust_time(unsigned long long clock, unsigned long long delay)
+static unsigned long long etr_adjust_time(unsigned long long old,
+					  unsigned long long clock,
+					  unsigned long long delay)
 {
 	unsigned long long delta, ticks;
 	struct timex adjust;
 
-	/*
-	 * We don't have to take the xtime lock because the cpu
-	 * executing etr_adjust_time is running disabled in
-	 * tasklet context and all other cpus are looping in
-	 * etr_sync_cpu_start.
-	 */
-	if (clock > xtime_cc) {
+	if (clock > old) {
 		/* It is later than we thought. */
-		delta = ticks = clock - xtime_cc;
+		delta = ticks = clock - old;
 		delta = ticks = (delta < delay) ? 0 : delta - delay;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc + delta;
-		jiffies_timer_cc = jiffies_timer_cc + delta;
-		xtime_cc = xtime_cc + delta;
 		adjust.offset = ticks * (1000000 / HZ);
 	} else {
 		/* It is earlier than we thought. */
-		delta = ticks = xtime_cc - clock;
+		delta = ticks = old - clock;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc - delta;
-		jiffies_timer_cc = jiffies_timer_cc - delta;
-		xtime_cc = xtime_cc - delta;
+		delta = -delta;
 		adjust.offset = -ticks * (1000000 / HZ);
 	}
+	jiffies_timer_cc += delta;
 	if (adjust.offset != 0) {
 		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
 		       adjust.offset);
 		adjust.modes = ADJ_OFFSET_SINGLESHOT;
 		do_adjtimex(&adjust);
 	}
+	return delta;
 }
 
+static struct {
+	int in_sync;
+	unsigned long long fixup_cc;
+} etr_sync;
+
 static void etr_sync_cpu_start(void *dummy)
 {
-	int *in_sync = dummy;
-
 	etr_enable_sync_clock();
 	/*
 	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@ -753,7 +641,7 @@
 	 * __udelay will stop the cpu on an enabled wait psw until the
 	 * TOD is running again.
 	 */
-	while (*in_sync == 0) {
+	while (etr_sync.in_sync == 0) {
 		__udelay(1);
 		/*
 		 * A different cpu changes *in_sync. Therefore use
@@ -761,14 +649,14 @@
 		 */
 		barrier();
 	}
-	if (*in_sync != 1)
+	if (etr_sync.in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
 		etr_disable_sync_clock(NULL);
 	/*
 	 * This round of TOD syncing is done. Set the clock comparator
 	 * to the next tick and let the processor continue.
 	 */
-	setup_jiffy_timer();
+	fixup_clock_comparator(etr_sync.fixup_cc);
 }
 
 static void etr_sync_cpu_end(void *dummy)
@@ -783,8 +671,8 @@
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
 	struct etr_aib *sync_port;
-	unsigned long long clock, delay;
-	int in_sync, follows;
+	unsigned long long clock, old_clock, delay, delta;
+	int follows;
 	int rc;
 
 	/* Check if the current aib is adjacent to the sync port aib. */
@@ -799,9 +687,9 @@
 	 * successfully synced the clock. smp_call_function will
 	 * return after all other cpus are in etr_sync_cpu_start.
 	 */
-	in_sync = 0;
+	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start,&in_sync,0,0);
+	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
 	local_irq_disable();
 	etr_enable_sync_clock();
 
@@ -809,6 +697,7 @@
 	__ctl_set_bit(14, 21);
 	__ctl_set_bit(0, 29);
 	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+	old_clock = get_clock();
 	if (set_clock(clock) == 0) {
 		__udelay(1);	/* Wait for the clock to start. */
 		__ctl_clear_bit(0, 29);
@@ -817,16 +706,17 @@
 		/* Adjust Linux timing variables. */
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		etr_adjust_time(clock, delay);
-		setup_jiffy_timer();
+		delta = etr_adjust_time(old_clock, clock, delay);
+		etr_sync.fixup_cc = delta;
+		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
 		if (!etr_aib_follows(sync_port, aib, port)) {
 			/* Didn't work. */
 			etr_disable_sync_clock(NULL);
-			in_sync = -EAGAIN;
+			etr_sync.in_sync = -EAGAIN;
 			rc = -EAGAIN;
 		} else {
-			in_sync = 1;
+			etr_sync.in_sync = 1;
 			rc = 0;
 		}
 	} else {
@@ -834,7 +724,7 @@
 		__ctl_clear_bit(0, 29);
 		__ctl_clear_bit(14, 21);
 		etr_disable_sync_clock(NULL);
-		in_sync = -EAGAIN;
+		etr_sync.in_sync = -EAGAIN;
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 70f2a86..eae21a8 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -34,7 +34,7 @@
  */
 void __udelay(unsigned long usecs)
 {
-	u64 end, time, jiffy_timer = 0;
+	u64 end, time, old_cc = 0;
 	unsigned long flags, cr0, mask, dummy;
 	int irq_context;
 
@@ -43,8 +43,8 @@
 		local_bh_disable();
 	local_irq_save(flags);
 	if (raw_irqs_disabled_flags(flags)) {
-		jiffy_timer = S390_lowcore.jiffy_timer;
-		S390_lowcore.jiffy_timer = -1ULL - (4096 << 12);
+		old_cc = S390_lowcore.clock_comparator;
+		S390_lowcore.clock_comparator = -1ULL;
 		__ctl_store(cr0, 0, 0);
 		dummy = (cr0 & 0xffff00e0) | 0x00000800;
 		__ctl_load(dummy , 0, 0);
@@ -55,8 +55,8 @@
 
 	end = get_clock() + ((u64) usecs << 12);
 	do {
-		time = end < S390_lowcore.jiffy_timer ?
-			end : S390_lowcore.jiffy_timer;
+		time = end < S390_lowcore.clock_comparator ?
+			end : S390_lowcore.clock_comparator;
 		set_clock_comparator(time);
 		trace_hardirqs_on();
 		__load_psw_mask(mask);
@@ -65,10 +65,10 @@
 
 	if (raw_irqs_disabled_flags(flags)) {
 		__ctl_load(cr0, 0, 0);
-		S390_lowcore.jiffy_timer = jiffy_timer;
+		S390_lowcore.clock_comparator = old_cc;
 	}
 	if (!irq_context)
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.jiffy_timer);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	local_irq_restore(flags);
 }