Message ID | 1495856035-6622-5-git-send-email-john.stultz@linaro.org |
---|---|
State | New |
Headers | show |
Series | Fixes for two recently found timekeeping bugs | expand |
Quoting John Stultz (2017-05-27 04:33:55) > Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW, > remove the duplicitive tk->raw_time.tv_nsec, which can be > stored in tk->tkr_raw.xtime_nsec (similarly to how its handled > for monotonic time). This patch breaks tkr_raw pretty badly. It is now accumulating 2x faster than tkr_mono, with the occasional wacky jump between two reads. e.g: @@ -838,6 +840,11 @@ ktime_t ktime_get_raw(void) } while (read_seqcount_retry(&tk_core.seq, seq)); + pr_err("%s: raw.base=%llu, mono.base=%llu: nsecs=%llu, mono.nsecs=%llu\n", + __func__, + ktime_to_ns(base), ktime_to_ns(tk->tkr_mono.base), + nsecs, timekeeping_get_ns(&tk->tkr_mono)); + gave ktime_get_raw: raw.base=40255680121, mono.base=39940532364: nsecs=261321742, mono.nsecs=318630514 ktime_get_raw: raw.base=40339680124, mono.base=39940532364: nsecs=345836800, mono.nsecs=403109209 https://bugs.freedesktop.org/show_bug.cgi?id=102336 The patch still reverts cleanly and aiui was not part of the bugfixes, but a cleanup afterwards; so can be reapplied at leisure. > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Ingo Molnar <mingo@kernel.org> > Cc: Miroslav Lichvar <mlichvar@redhat.com> > Cc: Richard Cochran <richardcochran@gmail.com> > Cc: Prarit Bhargava <prarit@redhat.com> > Cc: Stephen Boyd <stephen.boyd@linaro.org> > Cc: Kevin Brodsky <kevin.brodsky@arm.com> > Cc: Will Deacon <will.deacon@arm.com> > Cc: Daniel Mentz <danielmentz@google.com> > Signed-off-by: John Stultz <john.stultz@linaro.org> > --- > arch/arm64/kernel/vdso.c | 6 ++--- > include/linux/timekeeper_internal.h | 4 ++-- > kernel/time/timekeeping.c | 47 ++++++++++++++++++++----------------- > 3 files changed, 29 insertions(+), 28 deletions(-) > > diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c > index d0cb007..7492d90 100644 > --- a/arch/arm64/kernel/vdso.c > +++ b/arch/arm64/kernel/vdso.c > @@ -220,10 +220,8 @@ void update_vsyscall(struct timekeeper *tk) > if (!use_syscall) { > /* tkr_mono.cycle_last == tkr_raw.cycle_last */ > vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; > - vdso_data->raw_time_sec = tk->raw_time.tv_sec; > - vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << > - tk->tkr_raw.shift) + > - tk->tkr_raw.xtime_nsec; > + vdso_data->raw_time_sec = tk->raw_sec; > + vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; > vdso_data->xtime_clock_sec = tk->xtime_sec; > vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; > vdso_data->cs_mono_mult = tk->tkr_mono.mult; > diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h > index 528cc86..8abf6df 100644 > --- a/include/linux/timekeeper_internal.h > +++ b/include/linux/timekeeper_internal.h > @@ -52,7 +52,7 @@ struct tk_read_base { > * @clock_was_set_seq: The sequence number of clock was set events > * @cs_was_changed_seq: The sequence number of clocksource change events > * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second > - * @raw_time: Monotonic raw base time in timespec64 format > + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds > * @cycle_interval: Number of clock cycles in one NTP interval > * @xtime_interval: Number of clock shifted nano seconds in one NTP > * interval. > @@ -94,7 +94,7 @@ struct timekeeper { > unsigned int clock_was_set_seq; > u8 cs_was_changed_seq; > ktime_t next_leap_ktime; > - struct timespec64 raw_time; > + u64 raw_sec; > > /* The following members are for timekeeping internal use */ > u64 cycle_interval; > diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c > index 35d3ba3..0c3b8c1 100644 > --- a/kernel/time/timekeeping.c > +++ b/kernel/time/timekeeping.c > @@ -72,6 +72,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) > tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; > tk->xtime_sec++; > } > + while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { > + tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; > + tk->raw_sec++; > + } > } > > static inline struct timespec64 tk_xtime(struct timekeeper *tk) > @@ -287,12 +291,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) > /* if changing clocks, convert xtime_nsec shift units */ > if (old_clock) { > int shift_change = clock->shift - old_clock->shift; > - if (shift_change < 0) > + if (shift_change < 0) { > tk->tkr_mono.xtime_nsec >>= -shift_change; > - else > + tk->tkr_raw.xtime_nsec >>= -shift_change; > + } else { > tk->tkr_mono.xtime_nsec <<= shift_change; > + tk->tkr_raw.xtime_nsec <<= shift_change; > + } > } > - tk->tkr_raw.xtime_nsec = 0; > > tk->tkr_mono.shift = clock->shift; > tk->tkr_raw.shift = clock->shift; > @@ -617,9 +623,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) > nsec = (u32) tk->wall_to_monotonic.tv_nsec; > tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); > > - /* Update the monotonic raw base */ > - tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); > - > /* > * The sum of the nanoseconds portions of xtime and > * wall_to_monotonic can be greater/equal one second. Take > @@ -629,6 +632,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) > if (nsec >= NSEC_PER_SEC) > seconds++; > tk->ktime_sec = seconds; > + > + /* Update the monotonic raw base */ > + seconds = tk->raw_sec; > + nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); > + tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); > } > > /* must hold timekeeper_lock */ > @@ -670,7 +678,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) > static void timekeeping_forward_now(struct timekeeper *tk) > { > u64 cycle_now, delta; > - u64 nsec; > > cycle_now = tk_clock_read(&tk->tkr_mono); > delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); > @@ -682,10 +689,13 @@ static void timekeeping_forward_now(struct timekeeper *tk) > /* If arch requires, add in get_arch_timeoffset() */ > tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; > > - tk_normalize_xtime(tk); > > - nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); > - timespec64_add_ns(&tk->raw_time, nsec); > + tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; > + > + /* If arch requires, add in get_arch_timeoffset() */ > + tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift; > + > + tk_normalize_xtime(tk); > } > > /** > @@ -1371,19 +1381,18 @@ int timekeeping_notify(struct clocksource *clock) > void getrawmonotonic64(struct timespec64 *ts) > { > struct timekeeper *tk = &tk_core.timekeeper; > - struct timespec64 ts64; > unsigned long seq; > u64 nsecs; > > do { > seq = read_seqcount_begin(&tk_core.seq); > + ts->tv_sec = tk->raw_sec; > nsecs = timekeeping_get_ns(&tk->tkr_raw); > - ts64 = tk->raw_time; > > } while (read_seqcount_retry(&tk_core.seq, seq)); > > - timespec64_add_ns(&ts64, nsecs); > - *ts = ts64; > + ts->tv_nsec = 0; > + timespec64_add_ns(ts, nsecs); > } > EXPORT_SYMBOL(getrawmonotonic64); > > @@ -1507,8 +1516,7 @@ void __init timekeeping_init(void) > tk_setup_internals(tk, clock); > > tk_set_xtime(tk, &now); > - tk->raw_time.tv_sec = 0; > - tk->raw_time.tv_nsec = 0; > + tk->raw_sec = 0; > if (boot.tv_sec == 0 && boot.tv_nsec == 0) > boot = tk_xtime(tk); > > @@ -2009,17 +2017,12 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, > *clock_set |= accumulate_nsecs_to_secs(tk); > > /* Accumulate raw time */ > - tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec > - << tk->tkr_raw.shift; > tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; > nsecps = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; > while (tk->tkr_raw.xtime_nsec >= nsecps) { > tk->tkr_raw.xtime_nsec -= nsecps; > - tk->raw_time.tv_sec++; > + tk->raw_sec++; > } > - tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift; > - tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec > - << tk->tkr_raw.shift; > > /* Accumulate error between NTP and clock interval */ > tk->ntp_error += tk->ntp_tick << shift; > -- > 2.7.4 >
On Fri, Aug 25, 2017 at 6:40 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > Quoting John Stultz (2017-05-27 04:33:55) >> Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW, >> remove the duplicitive tk->raw_time.tv_nsec, which can be >> stored in tk->tkr_raw.xtime_nsec (similarly to how its handled >> for monotonic time). > > This patch breaks tkr_raw pretty badly. It is now accumulating 2x faster > than tkr_mono, with the occasional wacky jump between two reads. > > e.g: > > @@ -838,6 +840,11 @@ ktime_t ktime_get_raw(void) > > } while (read_seqcount_retry(&tk_core.seq, seq)); > > + pr_err("%s: raw.base=%llu, mono.base=%llu: nsecs=%llu, mono.nsecs=%llu\n", > + __func__, > + ktime_to_ns(base), ktime_to_ns(tk->tkr_mono.base), > + nsecs, timekeeping_get_ns(&tk->tkr_mono)); > + > > gave > ktime_get_raw: raw.base=40255680121, mono.base=39940532364: nsecs=261321742, mono.nsecs=318630514 > ktime_get_raw: raw.base=40339680124, mono.base=39940532364: nsecs=345836800, mono.nsecs=403109209 > > https://bugs.freedesktop.org/show_bug.cgi?id=102336 > > The patch still reverts cleanly and aiui was not part of the bugfixes, > but a cleanup afterwards; so can be reapplied at leisure. Thanks for the bug report! Its odd, as I'm not seeing such behavior in my testing (using the raw_skew or change_skew tests in kselftest/timers). Can you try running those tests to see if they fail on your hardware? From the bug report, it says it BYT specific, but I'm not sure what that means. Are there any hardware specific details you can share? What clocksource is being used? Can you send a dmesg log? I'll look over the code again to see if I can catch anything by review. Worse case if we can't get any traction on this in a day or so I'll submit a revert. thanks -john
On Fri, Aug 25, 2017 at 11:55 AM, John Stultz <john.stultz@linaro.org> wrote: > I'll look over the code again to see if I can catch anything by > review. Worse case if we can't get any traction on this in a day or so > I'll submit a revert. I think I found the issue. In tk_update_ktime_data() I add the raw_sec and shifted down tk->tkr_raw.xtime_nsec to the base. But we already add the tk->tkr_raw.xtime_nsec to the offset and shift it all down in the timekeeping_delta_to_ns called from ktime_get_raw, so we effectively are accumulating the nsecs portion faster then we should. This only crops up for internal ktime_get_raw() users, but not getrawmonotonic64() which uses the timespec generation rather then the ktime method, which is why this wasn't seen by userspace time tests. I'll send a patch for testing shortly. thanks -john
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d0cb007..7492d90 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -220,10 +220,8 @@ void update_vsyscall(struct timekeeper *tk) if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << - tk->tkr_raw.shift) + - tk->tkr_raw.xtime_nsec; + vdso_data->raw_time_sec = tk->raw_sec; + vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; vdso_data->cs_mono_mult = tk->tkr_mono.mult; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 528cc86..8abf6df 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -52,7 +52,7 @@ struct tk_read_base { * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second - * @raw_time: Monotonic raw base time in timespec64 format + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -94,7 +94,7 @@ struct timekeeper { unsigned int clock_was_set_seq; u8 cs_was_changed_seq; ktime_t next_leap_ktime; - struct timespec64 raw_time; + u64 raw_sec; /* The following members are for timekeeping internal use */ u64 cycle_interval; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 35d3ba3..0c3b8c1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -72,6 +72,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } + while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { + tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + tk->raw_sec++; + } } static inline struct timespec64 tk_xtime(struct timekeeper *tk) @@ -287,12 +291,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; - if (shift_change < 0) + if (shift_change < 0) { tk->tkr_mono.xtime_nsec >>= -shift_change; - else + tk->tkr_raw.xtime_nsec >>= -shift_change; + } else { tk->tkr_mono.xtime_nsec <<= shift_change; + tk->tkr_raw.xtime_nsec <<= shift_change; + } } - tk->tkr_raw.xtime_nsec = 0; tk->tkr_mono.shift = clock->shift; tk->tkr_raw.shift = clock->shift; @@ -617,9 +623,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); - /* Update the monotonic raw base */ - tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); - /* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take @@ -629,6 +632,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; + + /* Update the monotonic raw base */ + seconds = tk->raw_sec; + nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); + tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); } /* must hold timekeeper_lock */ @@ -670,7 +678,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) static void timekeeping_forward_now(struct timekeeper *tk) { u64 cycle_now, delta; - u64 nsec; cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); @@ -682,10 +689,13 @@ static void timekeeping_forward_now(struct timekeeper *tk) /* If arch requires, add in get_arch_timeoffset() */ tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; - tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); - timespec64_add_ns(&tk->raw_time, nsec); + tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; + + /* If arch requires, add in get_arch_timeoffset() */ + tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift; + + tk_normalize_xtime(tk); } /** @@ -1371,19 +1381,18 @@ int timekeeping_notify(struct clocksource *clock) void getrawmonotonic64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; - struct timespec64 ts64; unsigned long seq; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); + ts->tv_sec = tk->raw_sec; nsecs = timekeeping_get_ns(&tk->tkr_raw); - ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); - timespec64_add_ns(&ts64, nsecs); - *ts = ts64; + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsecs); } EXPORT_SYMBOL(getrawmonotonic64); @@ -1507,8 +1516,7 @@ void __init timekeeping_init(void) tk_setup_internals(tk, clock); tk_set_xtime(tk, &now); - tk->raw_time.tv_sec = 0; - tk->raw_time.tv_nsec = 0; + tk->raw_sec = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -2009,17 +2017,12 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec - << tk->tkr_raw.shift; tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; nsecps = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; while (tk->tkr_raw.xtime_nsec >= nsecps) { tk->tkr_raw.xtime_nsec -= nsecps; - tk->raw_time.tv_sec++; + tk->raw_sec++; } - tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift; - tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec - << tk->tkr_raw.shift; /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift;
Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW, remove the duplicitive tk->raw_time.tv_nsec, which can be stored in tk->tkr_raw.xtime_nsec (similarly to how its handled for monotonic time). Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@kernel.org> Cc: Miroslav Lichvar <mlichvar@redhat.com> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Stephen Boyd <stephen.boyd@linaro.org> Cc: Kevin Brodsky <kevin.brodsky@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Daniel Mentz <danielmentz@google.com> Signed-off-by: John Stultz <john.stultz@linaro.org> --- arch/arm64/kernel/vdso.c | 6 ++--- include/linux/timekeeper_internal.h | 4 ++-- kernel/time/timekeeping.c | 47 ++++++++++++++++++++----------------- 3 files changed, 29 insertions(+), 28 deletions(-) -- 2.7.4