>From f685b18be0bbf08899b84818174de19bfc8b53e4 Mon Sep 17 00:00:00 2001 From: "Eric S. Raymond" Date: Thu, 26 Feb 2015 17:33:55 -0500 Subject: [PATCH] Refactor the SHM refclock driver and add protective memory barriers. This change factors the SHM segment querying out of shm_timer() into a new function shm_query() which isolates all the stuff that is dependent on the SHM segment data layout. The new function fills in a struct containing the extracted data and returns a useful status code. Age control and the delta check remain in shm_timer(). The memory_barrier() function needs a C11 extension from stdatomic.h; it is guarded by STD_ATOMIC_H. If the compiler does not have this feature, but does compile memcpy to an uninterruptible bitlblt instruction, update atomicity should still be guaranteed. If neither of these is true, the pre-existing count check will still yield a CLASH status when a concurrent write might screw up a read. The shm_query() code has been tested and works as part of the new ntpmon utility in GPSD. The integration with the revised shm_timer() code has not been tested but should be straightforward to understand and validate. --- ntpd/refclock_shm.c | 300 +++++++++++++++++++++++++-------------- ntpdc/nl.pl | 2 +- sntp/libevent/evconfig-private.h | 2 +- 3 files changed, 194 insertions(+), 110 deletions(-) diff --git a/ntpd/refclock_shm.c b/ntpd/refclock_shm.c index 7174abd..8eccf48 100644 --- a/ntpd/refclock_shm.c +++ b/ntpd/refclock_shm.c @@ -348,10 +348,153 @@ shm_poll( shm_clockstats(unit, peer); } + +enum segstat_t { + OK, NO_SEGMENT, NOT_READY, BAD_MODE, CLASH +}; + +struct shm_stat_t { + int status; + int mode; + struct timespec tvc, tvr, tvt; + int precision; + int leap; +}; + +static inline void memory_barrier(void) +{ +#ifdef STD_ATOMIC_H + atomic_thread_fence(memory_order_seq_cst); +#endif /* STD_ATOMIC_H */ +} + +static enum segstat_t shm_query(volatile struct shmTime *shm_in, struct shm_stat_t *shm_stat) +/* try to grab a sample from the specified SHM segment */ +{ + volatile struct shmTime shmcopy, *shm = shm_in; + volatile int cnt; + + unsigned int cns_new, rns_new; + + /* + * This is the main routine. It snatches the time from the shm + * board and tacks on a local timestamp. + */ + if (shm == NULL) { + shm_stat->status = NO_SEGMENT; + return NO_SEGMENT; + } + + /address@hidden@*//* splint is confused about struct timespec */ + shm_stat->tvc.tv_sec = shm_stat->tvc.tv_nsec = 0; + clock_gettime(CLOCK_REALTIME, &shm_stat->tvc); + + /* relying on word access to be atomic here */ + if (shm->valid == 0) { + shm_stat->status = NOT_READY; + return NOT_READY; + } + + cnt = shm->count; + + /* + * This is proof against concurrency issues if either + * (a) the memory_barrier() call works on this host, or + * (b) memset compiles to an uninterruptible single-instruction bitblt. + */ + memory_barrier(); + memcpy((void *)&shmcopy, (void *)shm, sizeof(struct shmTime)); + shm->valid = 0; + memory_barrier(); + + /* + * Clash detection in case neither (a) nor (b) was true. + * Not supported in mode 0, and word access to the count field + * must be atomic for this to work. + */ + if (shmcopy.mode > 0 && cnt != shm->count) { + shm_stat->status = CLASH; + return shm_stat->status; + } + + shm_stat->status = OK; + shm_stat->mode = shmcopy.mode; + + switch (shmcopy.mode) { + case 0: + shm_stat->tvr.tv_sec = shmcopy.receiveTimeStampSec; + shm_stat->tvr.tv_nsec = shmcopy.receiveTimeStampUSec * 1000; + rns_new = shmcopy.receiveTimeStampNSec; + shm_stat->tvt.tv_sec = shmcopy.clockTimeStampSec; + shm_stat->tvt.tv_nsec = shmcopy.clockTimeStampUSec * 1000; + cns_new = shmcopy.clockTimeStampNSec; + + /* Since the following comparisons are between unsigned + ** variables they are always well defined, and any + ** (signed) underflow will turn into very large unsigned + ** values, well above the 1000 cutoff. + ** + ** Note: The usecs *must* be a *truncated* + ** representation of the nsecs. This code will fail for + ** *rounded* usecs, and the logic to deal with + ** wrap-arounds in the presence of rounded values is + ** much more convoluted. + */ + if ( ((cns_new - (unsigned)shm_stat->tvt.tv_nsec) < 1000) + && ((rns_new - (unsigned)shm_stat->tvr.tv_nsec) < 1000)) { + shm_stat->tvt.tv_nsec = cns_new; + shm_stat->tvr.tv_nsec = rns_new; + } + /* At this point shm_stat->tvr and shm_stat->tvt contain valid ns-level + ** timestamps, possibly generated by extending the old + ** us-level timestamps + */ + break; + + case 1: + + shm_stat->tvr.tv_sec = shmcopy.receiveTimeStampSec; + shm_stat->tvr.tv_nsec = shmcopy.receiveTimeStampUSec * 1000; + rns_new = shmcopy.receiveTimeStampNSec; + shm_stat->tvt.tv_sec = shmcopy.clockTimeStampSec; + shm_stat->tvt.tv_nsec = shmcopy.clockTimeStampUSec * 1000; + cns_new = shmcopy.clockTimeStampNSec; + + /* See the case above for an explanation of the + ** following test. + */ + if ( ((cns_new - (unsigned)shm_stat->tvt.tv_nsec) < 1000) + && ((rns_new - (unsigned)shm_stat->tvr.tv_nsec) < 1000)) { + shm_stat->tvt.tv_nsec = cns_new; + shm_stat->tvr.tv_nsec = rns_new; + } + /* At this point shm_stat->tvr and shm_stat->tvt contains valid ns-level + ** timestamps, possibly generated by extending the old + ** us-level timestamps + */ + break; + + default: + shm_stat->status = BAD_MODE; + break; + } + /address@hidden@*/ + + /* + * leap field is not a leap offset but a leap notification code. + * The values are magic numbers used by NTP and set by GPSD, if at all, in + * the subframe code. + */ + shm_stat->leap = shmcopy.leap; + shm_stat->precision = shmcopy.precision; + + return shm_stat->status; +} + /* - * shm_timer - called onece every second. + * shm_timer - called once every second. * - * This tries to grab a sample from the SHM segment + * This tries to grab a sample from the SHM segment, filtering bad ones */ static void shm_timer( @@ -362,33 +505,20 @@ shm_timer( struct refclockproc * const pp = peer->procptr; struct shmunit * const up = pp->unitptr; - /* access order is important for lock-free SHM access; we - ** enforce order by treating the whole structure volatile. - ** - ** IMPORTANT NOTE: This does not protect from reordering on CPU - ** level, and it does nothing for cache consistency and - ** visibility of changes by other cores. We need atomic and/or - ** fence instructions for that. - */ volatile struct shmTime *shm; - struct timespec tvr; - struct timespec tvt; l_fp tsrcv; l_fp tsref; - unsigned int c; - unsigned int cns_new, rns_new; - int cnt; + int c; /* for formatting 'a_lastcode': */ struct calendar cd; - time_t tt, now; + time_t tt; vint64 ts; - /* - * This is the main routine. It snatches the time from the shm - * board and tacks on a local timestamp. - */ + enum segstat_t status; + struct shm_stat_t shm_stat; + up->ticks++; if ((shm = up->shm) == NULL) { /* try to map again - this may succeed if meanwhile some- @@ -400,88 +530,43 @@ shm_timer( return; } } - if ( ! shm->valid) { - DPRINTF(1, ("%s: SHM not ready\n", - refnumtoa(&peer->srcadr))); - up->notready++; - return; - } - - switch (shm->mode) { - case 0: - tvr.tv_sec = shm->receiveTimeStampSec; - tvr.tv_nsec = shm->receiveTimeStampUSec * 1000; - rns_new = shm->receiveTimeStampNSec; - tvt.tv_sec = shm->clockTimeStampSec; - tvt.tv_nsec = shm->clockTimeStampUSec * 1000; - cns_new = shm->clockTimeStampNSec; - - /* Since the following comparisons are between unsigned - ** variables they are always well defined, and any - ** (signed) underflow will turn into very large unsigned - ** values, well above the 1000 cutoff. - ** - ** Note: The usecs *must* be a *truncated* - ** representation of the nsecs. This code will fail for - ** *rounded* usecs, and the logic to deal with - ** wrap-arounds in the presence of rounded values is - ** much more convoluted. - */ - if ( ((cns_new - (unsigned)tvt.tv_nsec) < 1000) - && ((rns_new - (unsigned)tvr.tv_nsec) < 1000)) { - tvt.tv_nsec = cns_new; - tvr.tv_nsec = rns_new; - } - /* At this point tvr and tvt contains valid ns-level - ** timestamps, possibly generated by extending the old - ** us-level timestamps - */ - DPRINTF(2, ("%s: SHM type 0 sample\n", - refnumtoa(&peer->srcadr))); - break; - - case 1: - cnt = shm->count; - - tvr.tv_sec = shm->receiveTimeStampSec; - tvr.tv_nsec = shm->receiveTimeStampUSec * 1000; - rns_new = shm->receiveTimeStampNSec; - tvt.tv_sec = shm->clockTimeStampSec; - tvt.tv_nsec = shm->clockTimeStampUSec * 1000; - cns_new = shm->clockTimeStampNSec; - if (cnt != shm->count) { - DPRINTF(1, ("%s: type 1 access clash\n", - refnumtoa(&peer->srcadr))); - msyslog (LOG_NOTICE, "SHM: access clash in shared memory"); - up->clash++; - return; - } - - /* See the case above for an explanation of the - ** following test. - */ - if ( ((cns_new - (unsigned)tvt.tv_nsec) < 1000) - && ((rns_new - (unsigned)tvr.tv_nsec) < 1000)) { - tvt.tv_nsec = cns_new; - tvr.tv_nsec = rns_new; - } - /* At this point tvr and tvt contains valid ns-level - ** timestamps, possibly generated by extending the old - ** us-level timestamps - */ - DPRINTF(2, ("%s: SHM type 1 sample\n", - refnumtoa(&peer->srcadr))); - break; + /* query the segment, atomically */ + status = shm_query(shm, &shm_stat); + + switch (status) { + case OK: + DPRINTF(2, ("%s: SHM type %d sample\n", + refnumtoa(&peer->srcadr), shm_stat.mode)); + break; + case NO_SEGMENT: + /* should never happen, but is harmless */ + return; + case NOT_READY: + DPRINTF(1, ("%s: SHM not ready\n",refnumtoa(&peer->srcadr))); + up->notready++; + return; + case BAD_MODE: + DPRINTF(1, ("%s: SHM type blooper, mode=%d\n", + refnumtoa(&peer->srcadr), shm->mode)); + up->bad++; + msyslog (LOG_ERR, "SHM: bad mode found in shared memory: %d", + shm->mode); + return; + case CLASH: + DPRINTF(1, ("%s: type 1 access clash\n", + refnumtoa(&peer->srcadr))); + msyslog (LOG_NOTICE, "SHM: access clash in shared memory"); + up->clash++; + return; default: - DPRINTF(1, ("%s: SHM type blooper, mode=%d\n", - refnumtoa(&peer->srcadr), shm->mode)); - up->bad++; - msyslog (LOG_ERR, "SHM: bad mode found in shared memory: %d", - shm->mode); - return; + DPRINTF(1, ("%s: internal error, unknown SHM fetch status\n", + refnumtoa(&peer->srcadr))); + msyslog (LOG_NOTICE, "internal error, unknown SHM fetch status"); + up->bad++; + return; } - shm->valid = 0; + /* format the last time code in human-readable form into * 'pp->a_lastcode'. Someone claimed: "NetBSD has incompatible @@ -489,7 +574,7 @@ shm_timer( * around that potential problem. BTW, simply casting a pointer * is a receipe for disaster on some architectures. */ - tt = (time_t)tvt.tv_sec; + tt = (time_t)shm_stat.tvt.tv_sec; ts = time_to_vint64(&tt); ntpcal_time_to_date(&cd, &ts); @@ -498,12 +583,11 @@ shm_timer( "%04u-%02u-%02uT%02u:%02u:%02u.%09ldZ", cd.year, cd.month, cd.monthday, cd.hour, cd.minute, cd.second, - (long)tvt.tv_nsec); + (long)shm_stat.tvt.tv_nsec); pp->lencode = (c < sizeof(pp->a_lastcode)) ? c : 0; /* check 1: age control of local time stamp */ - time(&now); - tt = now - tvr.tv_sec; + tt = shm_stat.tvc.tv_sec - shm_stat.tvr.tv_sec; if (tt < 0 || tt > up->max_delay) { DPRINTF(1, ("%s:SHM stale/bad receive time, delay=%llds\n", refnumtoa(&peer->srcadr), (long long)tt)); @@ -514,7 +598,7 @@ shm_timer( } /* check 2: delta check */ - tt = tvr.tv_sec - tvt.tv_sec - (tvr.tv_nsec < tvt.tv_nsec); + tt = shm_stat.tvr.tv_sec - shm_stat.tvt.tv_sec - (shm_stat.tvr.tv_nsec < shm_stat.tvt.tv_nsec); if (tt < 0) tt = -tt; if (up->max_delta > 0 && tt > up->max_delta) { @@ -529,10 +613,10 @@ shm_timer( /* if we really made it to this point... we're winners! */ DPRINTF(2, ("%s: SHM feeding data\n", refnumtoa(&peer->srcadr))); - tsrcv = tspec_stamp_to_lfp(tvr); - tsref = tspec_stamp_to_lfp(tvt); - pp->leap = shm->leap; - peer->precision = shm->precision; + tsrcv = tspec_stamp_to_lfp(shm_stat.tvr); + tsref = tspec_stamp_to_lfp(shm_stat.tvt); + pp->leap = shm_stat.leap; + peer->precision = shm_stat.precision; refclock_process_offset(pp, tsref, tsrcv, pp->fudgetime1); up->good++; } diff --git a/ntpdc/nl.pl b/ntpdc/nl.pl index 4a418fb..546f9ba 100755 --- a/ntpdc/nl.pl +++ b/ntpdc/nl.pl @@ -1,4 +1,4 @@ -#! /usr/local/perl-5.8.8/bin/perl -w +#! /usr/bin/perl -w $found = 0; $last = 0; diff --git a/sntp/libevent/evconfig-private.h b/sntp/libevent/evconfig-private.h index ea4940d..f6b08e3 100644 --- a/sntp/libevent/evconfig-private.h +++ b/sntp/libevent/evconfig-private.h @@ -26,7 +26,7 @@ #endif /* Number of bits in a file offset, on hosts where this is settable. */ -#define _FILE_OFFSET_BITS 64 +/* #undef _FILE_OFFSET_BITS */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ -- 2.1.0