
The code used to decide on the next time to wake on a timer with microsecond accuracy, but when deciding to go to sleep it used milliseconds accuracy (with truncation), this means that it would wake up too early, see that there's no timer to process, and go to sleep again for 0ms again and again until the right microsecond arrived. i.e. a timer for 100ms, would sleep for 99ms, but then do a busy loop through the kernel in the last millisecond, triggering many calls to beforeSleep. The fix is to change all the logic in ae.c to work with microseconds, which is good since most of the ae backends support micro (or even nano) seconds. however the epoll backend, doesn't support micro, so to avoid this problem it needs to round upwards, rather than truncate. Issue created by the monotonic timer PR #7644 (redis 6.2) Before that, all the timers in ae.c were in milliseconds (using mstime), so when it requested the backend to sleep till the next timer event, it would have worked ok.
137 lines
4.8 KiB
C
137 lines
4.8 KiB
C
/* Linux epoll(2) based ae.c module
|
|
*
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#include <sys/epoll.h>
|
|
|
|
typedef struct aeApiState {
|
|
int epfd;
|
|
struct epoll_event *events;
|
|
} aeApiState;
|
|
|
|
static int aeApiCreate(aeEventLoop *eventLoop) {
|
|
aeApiState *state = zmalloc(sizeof(aeApiState));
|
|
|
|
if (!state) return -1;
|
|
state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
|
|
if (!state->events) {
|
|
zfree(state);
|
|
return -1;
|
|
}
|
|
state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */
|
|
if (state->epfd == -1) {
|
|
zfree(state->events);
|
|
zfree(state);
|
|
return -1;
|
|
}
|
|
anetCloexec(state->epfd);
|
|
eventLoop->apidata = state;
|
|
return 0;
|
|
}
|
|
|
|
static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize);
|
|
return 0;
|
|
}
|
|
|
|
static void aeApiFree(aeEventLoop *eventLoop) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
close(state->epfd);
|
|
zfree(state->events);
|
|
zfree(state);
|
|
}
|
|
|
|
static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
struct epoll_event ee = {0}; /* avoid valgrind warning */
|
|
/* If the fd was already monitored for some event, we need a MOD
|
|
* operation. Otherwise we need an ADD operation. */
|
|
int op = eventLoop->events[fd].mask == AE_NONE ?
|
|
EPOLL_CTL_ADD : EPOLL_CTL_MOD;
|
|
|
|
ee.events = 0;
|
|
mask |= eventLoop->events[fd].mask; /* Merge old events */
|
|
if (mask & AE_READABLE) ee.events |= EPOLLIN;
|
|
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
|
|
ee.data.fd = fd;
|
|
if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
|
|
return 0;
|
|
}
|
|
|
|
static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
struct epoll_event ee = {0}; /* avoid valgrind warning */
|
|
int mask = eventLoop->events[fd].mask & (~delmask);
|
|
|
|
ee.events = 0;
|
|
if (mask & AE_READABLE) ee.events |= EPOLLIN;
|
|
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
|
|
ee.data.fd = fd;
|
|
if (mask != AE_NONE) {
|
|
epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
|
|
} else {
|
|
/* Note, Kernel < 2.6.9 requires a non null event pointer even for
|
|
* EPOLL_CTL_DEL. */
|
|
epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee);
|
|
}
|
|
}
|
|
|
|
static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
int retval, numevents = 0;
|
|
|
|
retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
|
|
tvp ? (tvp->tv_sec*1000 + (tvp->tv_usec + 999)/1000) : -1);
|
|
if (retval > 0) {
|
|
int j;
|
|
|
|
numevents = retval;
|
|
for (j = 0; j < numevents; j++) {
|
|
int mask = 0;
|
|
struct epoll_event *e = state->events+j;
|
|
|
|
if (e->events & EPOLLIN) mask |= AE_READABLE;
|
|
if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
|
|
if (e->events & EPOLLERR) mask |= AE_WRITABLE|AE_READABLE;
|
|
if (e->events & EPOLLHUP) mask |= AE_WRITABLE|AE_READABLE;
|
|
eventLoop->fired[j].fd = e->data.fd;
|
|
eventLoop->fired[j].mask = mask;
|
|
}
|
|
}
|
|
return numevents;
|
|
}
|
|
|
|
static char *aeApiName(void) {
|
|
return "epoll";
|
|
}
|