Use H/W Monotonic clock and updates to AE (#7644)

Update adds a general source for retrieving a monotonic time.
In addition, AE has been updated to utilize the new monotonic
clock for timer processing.

This performance improvement is **not** enabled in a default build due to various H/W compatibility
concerns, see README.md for details. It does however change the default use of gettimeofday with
clock_gettime and somewhat improves performance.

This update provides the following
1. An interface for retrieving a monotonic clock. getMonotonicUs returns a uint64_t (aka monotime)
   with the number of micro-seconds from an arbitrary point. No more messing with tv_sec/tv_usec.
   Simple routines are provided for measuring elapsed milli-seconds or elapsed micro-seconds (the
   most common use case for a monotonic timer). No worries about time moving backwards.
2. High-speed assembler implementation for x86 and ARM. The standard method for retrieving the
   monotonic clock is POSIX.1b (1993): clock_gettime(CLOCK_MONOTONIC, timespec*). However, most
   modern processors provide a constant speed instruction clock which can be retrieved in a fraction
   of the time that it takes to call clock_gettime. For x86, this is provided by the RDTSC
   instruction. For ARM, this is provided by the CNTVCT_EL0 instruction. As a compile-time option,
   these high-speed timers can be chosen. (Default is POSIX clock_gettime.)
3. Refactor of event loop timers. The timer processing in ae.c has been refactored to use the new
   monotonic clock interface. This results in simpler/cleaner logic and improved performance.
This commit is contained in:
Jim Brunner 2020-08-28 01:54:10 -07:00 committed by GitHub
parent 9fcd9e191e
commit c01e94a431
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 279 additions and 90 deletions

View File

@ -106,6 +106,18 @@ To compile against jemalloc on Mac OS X systems, use:
% make MALLOC=jemalloc
Monotonic clock
---------------
By default, Redis will build using the POSIX clock_gettime function as the
monotonic clock source. On most modern systems, the internal processor clock
can be used to improve performance. Cautions can be found here:
http://oliveryang.net/2015/09/pitfalls-of-TSC-usage/
To build with support for the processor's internal instruction clock, use:
% make CFLAGS="-DUSE_PROCESSOR_CLOCK"
Verbose build
-------------

View File

@ -228,11 +228,11 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o setcpuaffinity.o
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crcspeed.o crc64.o siphash.o crc16.o
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o
REDIS_BENCHMARK_NAME=redis-benchmark
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o monotonic.o
REDIS_CHECK_RDB_NAME=redis-check-rdb
REDIS_CHECK_AOF_NAME=redis-check-aof

114
src/ae.c
View File

@ -30,6 +30,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "ae.h"
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
@ -40,7 +42,6 @@
#include <time.h>
#include <errno.h>
#include "ae.h"
#include "zmalloc.h"
#include "config.h"
@ -60,16 +61,18 @@
#endif
#endif
aeEventLoop *aeCreateEventLoop(int setsize) {
aeEventLoop *eventLoop;
int i;
monotonicInit(); /* just in case the calling app didn't initialize */
if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
eventLoop->setsize = setsize;
eventLoop->lastTime = time(NULL);
eventLoop->timeEventHead = NULL;
eventLoop->timeEventNextId = 0;
eventLoop->stop = 0;
@ -199,29 +202,6 @@ int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
return fe->mask;
}
static void aeGetTime(long *seconds, long *milliseconds)
{
struct timeval tv;
gettimeofday(&tv, NULL);
*seconds = tv.tv_sec;
*milliseconds = tv.tv_usec/1000;
}
static void aeAddMillisecondsToNow(long long milliseconds, long *sec, long *ms) {
long cur_sec, cur_ms, when_sec, when_ms;
aeGetTime(&cur_sec, &cur_ms);
when_sec = cur_sec + milliseconds/1000;
when_ms = cur_ms + milliseconds%1000;
if (when_ms >= 1000) {
when_sec ++;
when_ms -= 1000;
}
*sec = when_sec;
*ms = when_ms;
}
long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
aeTimeProc *proc, void *clientData,
aeEventFinalizerProc *finalizerProc)
@ -232,7 +212,7 @@ long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
te = zmalloc(sizeof(*te));
if (te == NULL) return AE_ERR;
te->id = id;
aeAddMillisecondsToNow(milliseconds,&te->when_sec,&te->when_ms);
te->when = getMonotonicUs() + milliseconds * 1000;
te->timeProc = proc;
te->finalizerProc = finalizerProc;
te->clientData = clientData;
@ -258,10 +238,8 @@ int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
return AE_ERR; /* NO event with the specified ID found */
}
/* Search the first timer to fire.
* This operation is useful to know how many time the select can be
* put in sleep without to delay any event.
* If there are no timers NULL is returned.
/* How many milliseconds until the first timer should fire.
* If there are no timers, -1 is returned.
*
* Note that's O(N) since time events are unsorted.
* Possible optimizations (not needed by Redis so far, but...):
@ -269,19 +247,20 @@ int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
* Much better but still insertion or deletion of timers is O(N).
* 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)).
*/
static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
{
static long msUntilEarliestTimer(aeEventLoop *eventLoop) {
aeTimeEvent *te = eventLoop->timeEventHead;
aeTimeEvent *nearest = NULL;
if (te == NULL) return -1;
while(te) {
if (!nearest || te->when_sec < nearest->when_sec ||
(te->when_sec == nearest->when_sec &&
te->when_ms < nearest->when_ms))
nearest = te;
aeTimeEvent *earliest = NULL;
while (te) {
if (!earliest || te->when < earliest->when)
earliest = te;
te = te->next;
}
return nearest;
monotime now = getMonotonicUs();
return (now >= earliest->when)
? 0 : (long)((earliest->when - now) / 1000);
}
/* Process time events */
@ -289,29 +268,11 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
int processed = 0;
aeTimeEvent *te;
long long maxId;
time_t now = time(NULL);
/* If the system clock is moved to the future, and then set back to the
* right value, time events may be delayed in a random way. Often this
* means that scheduled operations will not be performed soon enough.
*
* Here we try to detect system clock skews, and force all the time
* events to be processed ASAP when this happens: the idea is that
* processing events earlier is less dangerous than delaying them
* indefinitely, and practice suggests it is. */
if (now < eventLoop->lastTime) {
te = eventLoop->timeEventHead;
while(te) {
te->when_sec = 0;
te = te->next;
}
}
eventLoop->lastTime = now;
te = eventLoop->timeEventHead;
maxId = eventLoop->timeEventNextId-1;
monotime now = getMonotonicUs();
while(te) {
long now_sec, now_ms;
long long id;
/* Remove events scheduled for deletion. */
@ -330,8 +291,10 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
eventLoop->timeEventHead = te->next;
if (te->next)
te->next->prev = te->prev;
if (te->finalizerProc)
if (te->finalizerProc) {
te->finalizerProc(eventLoop, te->clientData);
now = getMonotonicUs();
}
zfree(te);
te = next;
continue;
@ -346,10 +309,8 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
te = te->next;
continue;
}
aeGetTime(&now_sec, &now_ms);
if (now_sec > te->when_sec ||
(now_sec == te->when_sec && now_ms >= te->when_ms))
{
if (te->when <= now) {
int retval;
id = te->id;
@ -357,8 +318,9 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
retval = te->timeProc(eventLoop, id, te->clientData);
te->refcount--;
processed++;
now = getMonotonicUs();
if (retval != AE_NOMORE) {
aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
te->when = now + retval * 1000;
} else {
te->id = AE_DELETED_EVENT_ID;
}
@ -397,30 +359,16 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
if (eventLoop->maxfd != -1 ||
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
int j;
aeTimeEvent *shortest = NULL;
struct timeval tv, *tvp;
long msUntilTimer = -1;
if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
shortest = aeSearchNearestTimer(eventLoop);
if (shortest) {
long now_sec, now_ms;
msUntilTimer = msUntilEarliestTimer(eventLoop);
aeGetTime(&now_sec, &now_ms);
if (msUntilTimer >= 0) {
tv.tv_sec = msUntilTimer / 1000;
tv.tv_usec = (msUntilTimer % 1000) * 1000;
tvp = &tv;
/* How many milliseconds we need to wait for the next
* time event to fire? */
long long ms =
(shortest->when_sec - now_sec)*1000 +
shortest->when_ms - now_ms;
if (ms > 0) {
tvp->tv_sec = ms/1000;
tvp->tv_usec = (ms % 1000)*1000;
} else {
tvp->tv_sec = 0;
tvp->tv_usec = 0;
}
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout

View File

@ -33,7 +33,7 @@
#ifndef __AE_H__
#define __AE_H__
#include <time.h>
#include "monotonic.h"
#define AE_OK 0
#define AE_ERR -1
@ -79,8 +79,7 @@ typedef struct aeFileEvent {
/* Time event structure */
typedef struct aeTimeEvent {
long long id; /* time event identifier. */
long when_sec; /* seconds */
long when_ms; /* milliseconds */
monotime when;
aeTimeProc *timeProc;
aeEventFinalizerProc *finalizerProc;
void *clientData;
@ -101,7 +100,6 @@ typedef struct aeEventLoop {
int maxfd; /* highest file descriptor currently registered */
int setsize; /* max number of file descriptors tracked */
long long timeEventNextId;
time_t lastTime; /* Used to detect system clock skew */
aeFileEvent *events; /* Registered events */
aeFiredEvent *fired; /* Fired events */
aeTimeEvent *timeEventHead;

View File

@ -58,4 +58,10 @@
#define _LARGEFILE_SOURCE
#define _FILE_OFFSET_BITS 64
#ifdef __linux__
/* features.h uses the defines above to set feature specific defines. */
#include <linux/version.h>
#include <features.h>
#endif
#endif

170
src/monotonic.c Normal file
View File

@ -0,0 +1,170 @@
#include "monotonic.h"
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#undef NDEBUG
#include <assert.h>
/* The function pointer for clock retrieval. */
monotime (*getMonotonicUs)(void) = NULL;
static char monotonic_info_string[32];
/* Using the processor clock (aka TSC on x86) can provide improved performance
* throughout Redis wherever the monotonic clock is used. The processor clock
* is significantly faster than calling 'clock_getting' (POSIX). While this is
* generally safe on modern systems, this link provides additional information
* about use of the x86 TSC: http://oliveryang.net/2015/09/pitfalls-of-TSC-usage
*
* To use the processor clock, either uncomment this line, or build with
* CFLAGS="-DUSE_PROCESSOR_CLOCK"
#define USE_PROCESSOR_CLOCK
*/
#if defined(USE_PROCESSOR_CLOCK) && defined(__x86_64__) && defined(__linux__)
#include <regex.h>
#include <x86intrin.h>
static long mono_ticksPerMicrosecond = 0;
static monotime getMonotonicUs_x86() {
return __rdtsc() / mono_ticksPerMicrosecond;
}
static void monotonicInit_x86linux() {
const int bufflen = 256;
char buf[bufflen];
regex_t cpuGhzRegex, constTscRegex;
const size_t nmatch = 2;
regmatch_t pmatch[nmatch];
int constantTsc = 0;
int rc;
/* Determine the number of TSC ticks in a micro-second. This is
* a constant value matching the standard speed of the processor.
* On modern processors, this speed remains constant even though
* the actual clock speed varies dynamically for each core. */
rc = regcomp(&cpuGhzRegex, "^model name\\s+:.*@ ([0-9.]+)GHz", REG_EXTENDED);
assert(rc == 0);
/* Also check that the constant_tsc flag is present. (It should be
* unless this is a really old CPU. */
rc = regcomp(&constTscRegex, "^flags\\s+:.* constant_tsc", REG_EXTENDED);
assert(rc == 0);
FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
if (cpuinfo != NULL) {
while (fgets(buf, bufflen, cpuinfo) != NULL) {
if (regexec(&cpuGhzRegex, buf, nmatch, pmatch, 0) == 0) {
buf[pmatch[1].rm_eo] = '\0';
double ghz = atof(&buf[pmatch[1].rm_so]);
mono_ticksPerMicrosecond = (long)(ghz * 1000);
break;
}
}
while (fgets(buf, bufflen, cpuinfo) != NULL) {
if (regexec(&constTscRegex, buf, nmatch, pmatch, 0) == 0) {
constantTsc = 1;
break;
}
}
fclose(cpuinfo);
}
regfree(&cpuGhzRegex);
regfree(&constTscRegex);
if (mono_ticksPerMicrosecond == 0) {
fprintf(stderr, "monotonic: x86 linux, unable to determine clock rate");
return;
}
if (!constantTsc) {
fprintf(stderr, "monotonic: x86 linux, 'constant_tsc' flag not present");
return;
}
snprintf(monotonic_info_string, sizeof(monotonic_info_string),
"X86 TSC @ %ld ticks/us", mono_ticksPerMicrosecond);
getMonotonicUs = getMonotonicUs_x86;
}
#endif
#if defined(USE_PROCESSOR_CLOCK) && defined(__aarch64__)
static long mono_ticksPerMicrosecond = 0;
/* Read the clock value. */
static inline uint64_t __cntvct() {
uint64_t virtual_timer_value;
__asm__ volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value;
}
/* Read the Count-timer Frequency. */
static inline uint32_t cntfrq_hz() {
uint64_t virtual_freq_value;
__asm__ volatile("mrs %0, cntfrq_el0" : "=r"(virtual_freq_value));
return (uint32_t)virtual_freq_value; /* top 32 bits are reserved */
}
static monotime getMonotonicUs_aarch64() {
return __cntvct() / mono_ticksPerMicrosecond;
}
static void monotonicInit_aarch64() {
mono_ticksPerMicrosecond = (long)cntfrq_hz() / 1000L / 1000L;
if (mono_ticksPerMicrosecond == 0) {
fprintf(stderr, "monotonic: aarch64, unable to determine clock rate");
return;
}
snprintf(monotonic_info_string, sizeof(monotonic_info_string),
"ARM CNTVCT @ %ld ticks/us", mono_ticksPerMicrosecond);
getMonotonicUs = getMonotonicUs_aarch64;
}
#endif
static monotime getMonotonicUs_posix() {
/* clock_gettime() is specified in POSIX.1b (1993). Even so, some systems
* did not support this until much later. CLOCK_MONOTONIC is technically
* optional and may not be supported - but it appears to be universal.
* If this is not supported, provide a system-specific alternate version. */
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ((uint64_t)ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
}
static void monotonicInit_posix() {
/* Ensure that CLOCK_MONOTONIC is supported. This should be supported
* on any reasonably current OS. If the assertion below fails, provide
* an appropriate alternate implementation. */
struct timespec ts;
int rc = clock_gettime(CLOCK_MONOTONIC, &ts);
assert(rc == 0);
snprintf(monotonic_info_string, sizeof(monotonic_info_string),
"POSIX clock_gettime");
getMonotonicUs = getMonotonicUs_posix;
}
const char * monotonicInit() {
#if defined(USE_PROCESSOR_CLOCK) && defined(__x86_64__) && defined(__linux__)
if (getMonotonicUs == NULL) monotonicInit_x86linux();
#endif
#if defined(USE_PROCESSOR_CLOCK) && defined(__aarch64__)
if (getMonotonicUs == NULL) monotonicInit_aarch64();
#endif
if (getMonotonicUs == NULL) monotonicInit_posix();
return monotonic_info_string;
}

52
src/monotonic.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef __MONOTONIC_H
#define __MONOTONIC_H
/* The monotonic clock is an always increasing clock source. It is unrelated to
* the actual time of day and should only be used for relative timings. The
* monotonic clock is also not guaranteed to be chronologically precise; there
* may be slight skew/shift from a precise clock.
*
* Depending on system architecture, the monotonic time may be able to be
* retrieved much faster than a normal clock source by using an instruction
* counter on the CPU. On x86 architectures (for example), the RDTSC
* instruction is a very fast clock source for this purpose.
*/
#include "fmacros.h"
#include <stdint.h>
#include <unistd.h>
/* A counter in micro-seconds. The 'monotime' type is provided for variables
* holding a monotonic time. This will help distinguish & document that the
* variable is associated with the monotonic clock and should not be confused
* with other types of time.*/
typedef uint64_t monotime;
/* Retrieve counter of micro-seconds relative to an arbitrary point in time. */
extern monotime (*getMonotonicUs)(void);
/* Call once at startup to initialize the monotonic clock. Though this only
* needs to be called once, it may be called additional times without impact.
* Returns a printable string indicating the type of clock initialized.
* (The returned string is static and doesn't need to be freed.) */
const char * monotonicInit();
/* Functions to measure elapsed time. Example:
* monotime myTimer;
* elapsedStart(&myTimer);
* while (elapsedMs(myTimer) < 10) {} // loops for 10ms
*/
static inline void elapsedStart(monotime *start_time) {
*start_time = getMonotonicUs();
}
static inline uint64_t elapsedUs(monotime start_time) {
return getMonotonicUs() - start_time;
}
static inline uint64_t elapsedMs(monotime start_time) {
return elapsedUs(start_time) / 1000;
}
#endif

View File

@ -28,6 +28,7 @@
*/
#include "server.h"
#include "monotonic.h"
#include "cluster.h"
#include "slowlog.h"
#include "bio.h"
@ -2874,6 +2875,8 @@ void initServer(void) {
createSharedObjects();
adjustOpenFilesLimit();
const char *clk_msg = monotonicInit();
serverLog(LL_NOTICE, "monotonic clock: %s", clk_msg);
server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
if (server.el == NULL) {
serverLog(LL_WARNING,