Skip to content

Commit e2d977c

Browse files
committed
timekeeping: Provide multi-timestamp accessor to NMI safe timekeeper
printk wants to store various timestamps (MONOTONIC, REALTIME, BOOTTIME) to make correlation of dmesg from several systems easier. Provide an interface to retrieve all three timestamps in one go. There are some caveats: 1) Boot time and late sleep time injection Boot time is a racy access on 32bit systems if the sleep time injection happens late during resume and not in timekeeping_resume(). That could be avoided by expanding struct tk_read_base with boot offset for 32bit and adding more overhead to the update. As this is a hard to observe once per resume event which can be filtered with reasonable effort using the accurate mono/real timestamps, it's probably not worth the trouble. Aside of that it might be possible on 32 and 64 bit to observe the following when the sleep time injection happens late: CPU 0 CPU 1 timekeeping_resume() ktime_get_fast_timestamps() mono, real = __ktime_get_real_fast() inject_sleep_time() update boot offset boot = mono + bootoffset; That means that boot time already has the sleep time adjustment, but real time does not. On the next readout both are in sync again. Preventing this for 64bit is not really feasible without destroying the careful cache layout of the timekeeper because the sequence count and struct tk_read_base would then need two cache lines instead of one. 2) Suspend/resume timestamps Access to the time keeper clock source is disabled accross the innermost steps of suspend/resume. The accessors still work, but the timestamps are frozen until time keeping is resumed which happens very early. For regular suspend/resume there is no observable difference vs. sched clock, but it might affect some of the nasty low level debug printks. OTOH, access to sched clock is not guaranteed accross suspend/resume on all systems either so it depends on the hardware in use. If that turns out to be a real problem then this could be mitigated by using sched clock in a similar way as during early boot. But it's not as trivial as on early boot because it needs some careful protection against the clock monotonic timestamp jumping backwards on resume. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Petr Mladek <pmladek@suse.com> Link: https://lore.kernel.org/r/20200814115512.159981360@linutronix.de
1 parent 71419b3 commit e2d977c

2 files changed

Lines changed: 80 additions & 11 deletions

File tree

include/linux/timekeeping.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,18 @@ extern bool timekeeping_rtc_skipresume(void);
222222

223223
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
224224

225+
/*
226+
* struct ktime_timestanps - Simultaneous mono/boot/real timestamps
227+
* @mono: Monotonic timestamp
228+
* @boot: Boottime timestamp
229+
* @real: Realtime timestamp
230+
*/
231+
struct ktime_timestamps {
232+
u64 mono;
233+
u64 boot;
234+
u64 real;
235+
};
236+
225237
/**
226238
* struct system_time_snapshot - simultaneous raw/real time capture with
227239
* counter value
@@ -280,6 +292,9 @@ extern int get_device_system_crosststamp(
280292
*/
281293
extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
282294

295+
/* NMI safe mono/boot/realtime timestamps */
296+
extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap);
297+
283298
/*
284299
* Persistent clock related interfaces
285300
*/

kernel/time/timekeeping.c

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -530,40 +530,94 @@ u64 notrace ktime_get_boot_fast_ns(void)
530530
}
531531
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
532532

533-
534533
/*
535534
* See comment for __ktime_get_fast_ns() vs. timestamp ordering
536535
*/
537-
static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
536+
static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
538537
{
539538
struct tk_read_base *tkr;
539+
u64 basem, baser, delta;
540540
unsigned int seq;
541-
u64 now;
542541

543542
do {
544543
seq = raw_read_seqcount_latch(&tkf->seq);
545544
tkr = tkf->base + (seq & 0x01);
546-
now = ktime_to_ns(tkr->base_real);
545+
basem = ktime_to_ns(tkr->base);
546+
baser = ktime_to_ns(tkr->base_real);
547547

548-
now += timekeeping_delta_to_ns(tkr,
549-
clocksource_delta(
550-
tk_clock_read(tkr),
551-
tkr->cycle_last,
552-
tkr->mask));
548+
delta = timekeeping_delta_to_ns(tkr,
549+
clocksource_delta(tk_clock_read(tkr),
550+
tkr->cycle_last, tkr->mask));
553551
} while (read_seqcount_retry(&tkf->seq, seq));
554552

555-
return now;
553+
if (mono)
554+
*mono = basem + delta;
555+
return baser + delta;
556556
}
557557

558558
/**
559559
* ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
560560
*/
561561
u64 ktime_get_real_fast_ns(void)
562562
{
563-
return __ktime_get_real_fast_ns(&tk_fast_mono);
563+
return __ktime_get_real_fast(&tk_fast_mono, NULL);
564564
}
565565
EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
566566

567+
/**
568+
* ktime_get_fast_timestamps: - NMI safe timestamps
569+
* @snapshot: Pointer to timestamp storage
570+
*
571+
* Stores clock monotonic, boottime and realtime timestamps.
572+
*
573+
* Boot time is a racy access on 32bit systems if the sleep time injection
574+
* happens late during resume and not in timekeeping_resume(). That could
575+
* be avoided by expanding struct tk_read_base with boot offset for 32bit
576+
* and adding more overhead to the update. As this is a hard to observe
577+
* once per resume event which can be filtered with reasonable effort using
578+
* the accurate mono/real timestamps, it's probably not worth the trouble.
579+
*
580+
* Aside of that it might be possible on 32 and 64 bit to observe the
581+
* following when the sleep time injection happens late:
582+
*
583+
* CPU 0 CPU 1
584+
* timekeeping_resume()
585+
* ktime_get_fast_timestamps()
586+
* mono, real = __ktime_get_real_fast()
587+
* inject_sleep_time()
588+
* update boot offset
589+
* boot = mono + bootoffset;
590+
*
591+
* That means that boot time already has the sleep time adjustment, but
592+
* real time does not. On the next readout both are in sync again.
593+
*
594+
* Preventing this for 64bit is not really feasible without destroying the
595+
* careful cache layout of the timekeeper because the sequence count and
596+
* struct tk_read_base would then need two cache lines instead of one.
597+
*
598+
* Access to the time keeper clock source is disabled accross the innermost
599+
* steps of suspend/resume. The accessors still work, but the timestamps
600+
* are frozen until time keeping is resumed which happens very early.
601+
*
602+
* For regular suspend/resume there is no observable difference vs. sched
603+
* clock, but it might affect some of the nasty low level debug printks.
604+
*
605+
* OTOH, access to sched clock is not guaranteed accross suspend/resume on
606+
* all systems either so it depends on the hardware in use.
607+
*
608+
* If that turns out to be a real problem then this could be mitigated by
609+
* using sched clock in a similar way as during early boot. But it's not as
610+
* trivial as on early boot because it needs some careful protection
611+
* against the clock monotonic timestamp jumping backwards on resume.
612+
*/
613+
void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
614+
{
615+
struct timekeeper *tk = &tk_core.timekeeper;
616+
617+
snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
618+
snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
619+
}
620+
567621
/**
568622
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
569623
* @tk: Timekeeper to snapshot.

0 commit comments

Comments
 (0)