Revert "time: Remove xtime_cache"
This reverts commit 7bc7d637452383d56ba4368d4336b0dde1bb476d, as
requested by John Stultz. Quoting John:
"Petr Titěra reported an issue where he saw odd atime regressions with
2.6.33 where there were a full second worth of nanoseconds in the
nanoseconds field.
He also reviewed the time code and narrowed down the problem: unhandled
overflow of the nanosecond field caused by rounding up the
sub-nanosecond accumulated time.
Details:
* At the end of update_wall_time(), we currently round up the
sub-nanosecond portion of accumulated time when storing it into xtime.
This was added to avoid time inconsistencies caused when the
sub-nanosecond portion was truncated when storing into xtime.
Unfortunately we don't handle the possible second overflow caused by
that rounding.
* Previously the xtime_cache code hid this overflow by normalizing the
xtime value when storing into the xtime_cache.
* We could try to handle the second overflow after the rounding up, but
since this affects the timekeeping's internal state, this would further
complicate the next accumulation cycle, causing small errors in ntp
steering. As much as I'd like to get rid of it, the xtime_cache code is
known to work.
* The correct fix is really to include the sub-nanosecond portion in the
timekeeping accessor function, so we don't need to round up at during
accumulation. This would greatly simplify the accumulation code.
Unfortunately, we can't do this safely until the last three
non-GENERIC_TIME arches (sparc32, arm, cris) are converted (those
patches are in -mm) and we kill off the spots where arches set xtime
directly. This is all 2.6.34 material, so I think reverting the
xtime_cache change is the best approach for now.
Many thanks to Petr for both reporting and finding the issue!"
Reported-by: Petr Titěra <P.Titera@century.cz>
Requested-by: john stultz <johnstul@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index af4135f..7faaa32 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,6 +165,13 @@
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
+static struct timespec xtime_cache __attribute__ ((aligned (16)));
+void update_xtime_cache(u64 nsec)
+{
+ xtime_cache = xtime;
+ timespec_add_ns(&xtime_cache, nsec);
+}
+
/* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
@@ -325,6 +332,8 @@
xtime = *tv;
+ update_xtime_cache(0);
+
timekeeper.ntp_error = 0;
ntp_clear();
@@ -550,6 +559,7 @@
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
+ update_xtime_cache(0);
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -583,6 +593,7 @@
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
total_sleep_time = timespec_add_safe(total_sleep_time, ts);
}
+ update_xtime_cache(0);
/* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
@@ -722,6 +733,7 @@
timekeeper.ntp_error_shift;
}
+
/**
* logarithmic_accumulation - shifted accumulation of cycles
*
@@ -765,6 +777,7 @@
return offset;
}
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -774,6 +787,7 @@
{
struct clocksource *clock;
cycle_t offset;
+ u64 nsecs;
int shift = 0, maxshift;
/* Make sure we're fully resumed: */
@@ -839,6 +853,9 @@
timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift;
+ nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
+ update_xtime_cache(nsecs);
+
/* check to see if there is a new clocksource to use */
update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
@@ -875,13 +892,13 @@
unsigned long get_seconds(void)
{
- return xtime.tv_sec;
+ return xtime_cache.tv_sec;
}
EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
- return xtime;
+ return xtime_cache;
}
struct timespec current_kernel_time(void)
@@ -891,7 +908,8 @@
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime;
+
+ now = xtime_cache;
} while (read_seqretry(&xtime_lock, seq));
return now;
@@ -905,7 +923,8 @@
do {
seq = read_seqbegin(&xtime_lock);
- now = xtime;
+
+ now = xtime_cache;
mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq));