diff --git a/src/debug.c b/src/debug.c index 26cb98f1e..b924d9ed3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1029,7 +1029,7 @@ NULL /* =========================== Crash handling ============================== */ -__attribute__ ((noinline)) +__attribute__ ((noinline)) void _serverAssert(const char *estr, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); @@ -1119,7 +1119,7 @@ void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, con _serverAssert(estr,file,line); } -__attribute__ ((noinline)) +__attribute__ ((noinline)) void _serverPanic(const char *file, int line, const char *msg, ...) { va_list ap; va_start(ap,msg); @@ -1796,18 +1796,31 @@ void closeDirectLogFiledes(int fd) { if (!log_to_stdout) close(fd); } +#if defined(HAVE_BACKTRACE) && defined(__linux__) +static int stacktrace_pipe[2] = {0}; +static void setupStacktracePipe(void) { + if (-1 == anetPipe(stacktrace_pipe, O_CLOEXEC | O_NONBLOCK, O_CLOEXEC | O_NONBLOCK)) { + serverLog(LL_WARNING, "setupStacktracePipe failed: %s", strerror(errno)); + } +} +#else +static void setupStacktracePipe(void) {/* we don't need a pipe to write the stacktraces */} +#endif #ifdef HAVE_BACKTRACE #define BACKTRACE_MAX_SIZE 100 #ifdef __linux__ +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif #include +#include #include #include #define TIDS_MAX_SIZE 50 -static size_t get_ready_to_signal_threads_tids(pid_t pid, int sig_num, pid_t tids[TIDS_MAX_SIZE]); +static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_SIZE]); -#define MAX_BUFF_LENGTH 256 typedef struct { char thread_name[16]; int trace_size; @@ -1815,64 +1828,47 @@ typedef struct { void *trace[BACKTRACE_MAX_SIZE]; } stacktrace_data; -static stacktrace_data *stacktraces_mempool = NULL; -static redisAtomic size_t g_thread_ids = 0; - -static stacktrace_data *get_stack_trace_pool(void) { - size_t thread_id; - atomicGetIncr(g_thread_ids, thread_id, 1); - return stacktraces_mempool + thread_id; -} - __attribute__ ((noinline)) static void collect_stacktrace_data(void) { - /* allocate stacktrace_data struct */ - stacktrace_data *trace_data = get_stack_trace_pool(); + stacktrace_data trace_data = {{0}}; /* Get the stack trace first! */ - trace_data->trace_size = backtrace(trace_data->trace, BACKTRACE_MAX_SIZE); + trace_data.trace_size = backtrace(trace_data.trace, BACKTRACE_MAX_SIZE); /* get the thread name */ - prctl(PR_GET_NAME, trace_data->thread_name); + prctl(PR_GET_NAME, trace_data.thread_name); /* get the thread id */ - trace_data->tid = syscall(SYS_gettid); + trace_data.tid = syscall(SYS_gettid); + + /* Send the output to the main process*/ + if (write(stacktrace_pipe[1], &trace_data, sizeof(trace_data)) == -1) {/* Avoid warning. */}; } __attribute__ ((noinline)) static void writeStacktraces(int fd, int uplevel) { /* get the list of all the process's threads that don't block or ignore the THREADS_SIGNAL */ - pid_t pid = getpid(); pid_t tids[TIDS_MAX_SIZE]; - size_t len_tids = get_ready_to_signal_threads_tids(pid, THREADS_SIGNAL, tids); + size_t len_tids = get_ready_to_signal_threads_tids(THREADS_SIGNAL, tids); if (!len_tids) { - serverLogFromHandler(LL_WARNING, "writeStacktraces(): Failed to get the process's threads."); + serverLogRawFromHandler(LL_WARNING, "writeStacktraces(): Failed to get the process's threads."); } - stacktrace_data stacktraces[len_tids]; - stacktraces_mempool = stacktraces; - memset(stacktraces, 0, sizeof(stacktrace_data) * len_tids); - - /* restart mempool iterator*/ - atomicSet(g_thread_ids, 0); + char buff[PIPE_BUF]; + /* Clear the stacktraces pipe */ + while (read(stacktrace_pipe[0], &buff, sizeof(buff)) > 0) {} /* ThreadsManager_runOnThreads returns 0 if it is already running */ if (!ThreadsManager_runOnThreads(tids, len_tids, collect_stacktrace_data)) return; - size_t skipped = 0; + size_t collected = 0; - char buff[MAX_BUFF_LENGTH]; pid_t calling_tid = syscall(SYS_gettid); - /* for backtrace_data in backtraces_data: */ - for (size_t i = 0; i < len_tids; i++) { - stacktrace_data curr_stacktrace_data = stacktraces[i]; - /*ThreadsManager_runOnThreads might fail to collect the thread's data */ - if (0 == curr_stacktrace_data.trace_size) { - skipped++; - continue; - } + /* Read the stacktrace_pipe until it's empty */ + stacktrace_data curr_stacktrace_data = {{0}}; + while (read(stacktrace_pipe[0], &curr_stacktrace_data, sizeof(curr_stacktrace_data)) > 0) { /* stacktrace header includes the tid and the thread's name */ - snprintf(buff, MAX_BUFF_LENGTH, "\n%d %s", curr_stacktrace_data.tid, curr_stacktrace_data.thread_name); + snprintf_async_signal_safe(buff, sizeof(buff), "\n%d %s", curr_stacktrace_data.tid, curr_stacktrace_data.thread_name); if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */}; /* skip kernel call to the signal handler, the signal handler and the callback addresses */ @@ -1882,19 +1878,19 @@ static void writeStacktraces(int fd, int uplevel) { /* skip signal syscall and ThreadsManager_runOnThreads */ curr_uplevel += uplevel + 2; /* Add an indication to header of the thread that is handling the log file */ - snprintf(buff, MAX_BUFF_LENGTH, " *\n"); + if (write(fd," *\n",strlen(" *\n")) == -1) {/* Avoid warning. */}; } else { /* just add a new line */ - snprintf(buff, MAX_BUFF_LENGTH, "\n"); + if (write(fd,"\n",strlen("\n")) == -1) {/* Avoid warning. */}; } - if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */}; - /* add the stacktrace */ backtrace_symbols_fd(curr_stacktrace_data.trace+curr_uplevel, curr_stacktrace_data.trace_size-curr_uplevel, fd); + + ++collected; } - snprintf(buff, MAX_BUFF_LENGTH, "\n%zu/%zu expected stacktraces.\n", len_tids - skipped, len_tids); + snprintf_async_signal_safe(buff, sizeof(buff), "\n%lu/%lu expected stacktraces.\n", (long unsigned)(collected), (long unsigned)len_tids); if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */}; } @@ -1919,7 +1915,7 @@ static void writeStacktraces(int fd, int uplevel) { * Functions that are taken in consideration in "uplevel" should be declared with * __attribute__ ((noinline)) to make sure the compiler won't inline them. */ -__attribute__ ((noinline)) +__attribute__ ((noinline)) void logStackTrace(void *eip, int uplevel) { int fd = openDirectLogFiledes(); char *msg; @@ -1940,6 +1936,9 @@ void logStackTrace(void *eip, int uplevel) { /* Write symbols to log file */ ++uplevel; writeStacktraces(fd, uplevel); + msg = "\n------ STACK TRACE DONE ------\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + /* Cleanup */ closeDirectLogFiledes(fd); @@ -2215,7 +2214,7 @@ void invalidFunctionWasCalled(void) {} typedef void (*invalidFunctionWasCalledType)(void); -__attribute__ ((noinline)) +__attribute__ ((noinline)) static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { UNUSED(secret); UNUSED(info); @@ -2223,7 +2222,7 @@ static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { if(pthread_mutex_lock(&signal_handler_lock) == EDEADLK) { /* If this thread already owns the lock (meaning we crashed during handling a signal) * log that the crash report can't be generated. */ - serverLog(LL_WARNING, + serverLogRawFromHandler(LL_WARNING, "Crashed running signal handler. Can't continue to generate the crash report"); /* gracefully exit */ bugReportEnd(1, sig); @@ -2282,6 +2281,8 @@ static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { } void setupDebugSigHandlers(void) { + setupStacktracePipe(); + setupSigSegvHandler(); struct sigaction act; @@ -2293,7 +2294,7 @@ void setupDebugSigHandlers(void) { } void setupSigSegvHandler(void) { - /* Initialize the signal handler lock. + /* Initialize the signal handler lock. Attempting to initialize an already initialized mutex or mutexattr results in undefined behavior. */ if (!signal_handler_lock_initialized) { /* Set signal handler with error checking attribute. re-lock within the same thread will error. */ @@ -2355,7 +2356,7 @@ void printCrashReport(void) { void bugReportEnd(int killViaSignal, int sig) { struct sigaction act; - serverLogRaw(LL_WARNING|LL_RAW, + serverLogRawFromHandler(LL_WARNING|LL_RAW, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" " Please report the crash by opening an issue on github:\n\n" " http://github.com/redis/redis/issues\n\n" @@ -2424,16 +2425,16 @@ void sigalrmSignalHandler(int sig, siginfo_t *info, void *secret) { /* SIGALRM can be sent explicitly to the process calling kill() to get the stacktraces, or every watchdog_period interval. In the last case, si_pid is not set */ if(info->si_pid == 0) { - serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---"); + serverLogRawFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---"); } else { - serverLogFromHandler(LL_WARNING, "\nReceived SIGALRM"); + serverLogRawFromHandler(LL_WARNING, "\nReceived SIGALRM"); } #ifdef HAVE_BACKTRACE logStackTrace(getAndSetMcontextEip(uc, NULL), 1); #else - serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace()."); + serverLogRawFromHandler(LL_WARNING,"Sorry: no support for backtrace()."); #endif - serverLogFromHandler(LL_WARNING,"--------\n"); + serverLogRawFromHandler(LL_WARNING,"--------\n"); } /* Schedule a SIGALRM delivery after the specified period in milliseconds. @@ -2478,30 +2479,41 @@ void debugDelay(int usec) { /* =========================== Stacktrace Utils ============================ */ + + /** If it doesn't block and doesn't ignore, return 1 (the thread will handle the signal) * If thread tid blocks or ignores sig_num returns 0 (thread is not ready to catch the signal). * also returns 0 if something is wrong and prints a warning message to the log file **/ -static int is_thread_ready_to_signal(pid_t pid, pid_t tid, int sig_num) { - /* open the threads status file */ - char buff[MAX_BUFF_LENGTH]; - snprintf(buff, MAX_BUFF_LENGTH, "/proc/%d/task/%d/status", pid, tid); - FILE *thread_status_file = fopen(buff, "r"); - if (thread_status_file == NULL) { - serverLog(LL_WARNING, - "tid:%d: failed to open /proc/%d/task/%d/status file", tid, pid, tid); +static int is_thread_ready_to_signal(const char *proc_pid_task_path, const char *tid, int sig_num) { + /* Open the threads status file path /proc/>/task//status */ + char path_buff[PATH_MAX]; + snprintf_async_signal_safe(path_buff, PATH_MAX, "%s/%s/status", proc_pid_task_path, tid); + + int thread_status_file = open(path_buff, O_RDONLY); + char buff[PATH_MAX]; + if (thread_status_file == -1) { + serverLogFromHandler(LL_WARNING, "tid:%s: failed to open %s file", tid, path_buff); return 0; } int ret = 1; - size_t field_name_len = strlen("SigBlk:"); /* SigIgn has the same length */ + size_t field_name_len = strlen("SigBlk:\t"); /* SigIgn has the same length */ char *line = NULL; size_t fields_count = 2; - while ((line = fgets(buff, MAX_BUFF_LENGTH, thread_status_file)) && fields_count) { + while ((line = fgets_async_signal_safe(buff, PATH_MAX, thread_status_file)) && fields_count) { /* iterate the file until we reach SigBlk or SigIgn field line */ - if (!strncmp(buff, "SigBlk:", field_name_len) || !strncmp(buff, "SigIgn:", field_name_len)) { - /* check if the signal exist in the mask */ - unsigned long sig_mask = strtoul(buff + field_name_len, NULL, 16); - if(sig_mask & sig_num) { /* if the signal is blocked/ignored return 0 */ + if (!strncmp(buff, "SigBlk:\t", field_name_len) || !strncmp(buff, "SigIgn:\t", field_name_len)) { + line = buff + field_name_len; + unsigned long sig_mask; + if (-1 == string2ul_base16_async_signal_safe(line, sizeof(buff), &sig_mask)) { + serverLogRawFromHandler(LL_WARNING, "Can't convert signal mask to an unsigned long due to an overflow"); + ret = 0; + break; + } + + /* The bit position in a signal mask aligns with the signal number. Since signal numbers start from 1 + we need to adjust the signal number by subtracting 1 to align it correctly with the zero-based indexing used */ + if (sig_mask & (1L << (sig_num - 1))) { /* if the signal is blocked/ignored return 0 */ ret = 0; break; } @@ -2509,58 +2521,84 @@ static int is_thread_ready_to_signal(pid_t pid, pid_t tid, int sig_num) { } } - fclose(thread_status_file); + close(thread_status_file); /* if we reached EOF, it means we haven't found SigBlk or/and SigIgn, something is wrong */ if (line == NULL) { ret = 0; - serverLog(LL_WARNING, - "tid:%d: failed to find SigBlk or/and SigIgn field(s) in /proc/%d/task/%d/status file", tid, pid, tid); + serverLogFromHandler(LL_WARNING, "tid:%s: failed to find SigBlk or/and SigIgn field(s) in %s/%s/status file", tid, proc_pid_task_path, tid); } return ret; } +/** We are using syscall(SYS_getdents64) to read directories, which unlike opendir(), is considered + * async-signal-safe. This function wrapper getdents64() in glibc is supported as of glibc 2.30. + * To support earlier versions of glibc, we use syscall(SYS_getdents64), which requires defining + * linux_dirent64 ourselves. This structure is very old and stable: It will not change unless the kernel + * chooses to break compatibility with all existing binaries. Highly Unlikely. +*/ +struct linux_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; /* Length of this linux_dirent */ + unsigned char d_type; + char d_name[256]; /* Filename (null-terminated) */ +}; + /** Returns the number of the process's threads that can receive signal sig_num. * Writes into tids the tids of these threads. * If it fails, returns 0. */ -static size_t get_ready_to_signal_threads_tids(pid_t pid, int sig_num, pid_t tids[TIDS_MAX_SIZE]) { - /* Initialize the path the process threads' directory. */ - char path_buff[MAX_BUFF_LENGTH]; - snprintf(path_buff, MAX_BUFF_LENGTH, "/proc/%d/task", pid); +static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_SIZE]) { + /* Open /proc//task file. */ + char path_buff[PATH_MAX]; + snprintf_async_signal_safe(path_buff, PATH_MAX, "/proc/%d/task", getpid()); - /* Get the directory handler. */ - DIR *dir; - if (!(dir = opendir(path_buff))) return 0; + int dir; + if (-1 == (dir = open(path_buff, O_RDONLY | O_DIRECTORY))) return 0; size_t tids_count = 0; - struct dirent *entry; pid_t calling_tid = syscall(SYS_gettid); int current_thread_index = -1; + long nread; + char buff[PATH_MAX]; - /* Each thread is represented by a directory */ - while ((entry = readdir(dir)) != NULL) { - if (entry->d_type == DT_DIR) { + /* readdir() is not async-signal-safe (AS-safe). + Hence, we read the file using SYS_getdents64, which is considered AS-sync*/ + while ((nread = syscall(SYS_getdents64, dir, buff, PATH_MAX))) { + if (nread == -1) { + close(dir); + serverLogRawFromHandler(LL_WARNING, "get_ready_to_signal_threads_tids(): Failed to read the process's task directory"); + return 0; + } + /* Each thread is represented by a directory */ + for (long pos = 0; pos < nread;) { + struct linux_dirent64 *entry = (struct linux_dirent64 *)(buff + pos); + pos += entry->d_reclen; /* Skip irrelevant directories. */ - if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) { - /* the thread's directory name is equivalent to its tid. */ - pid_t tid = atoi(entry->d_name); + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue; - if(!is_thread_ready_to_signal(pid, tid, sig_num)) continue; + /* the thread's directory name is equivalent to its tid. */ + long tid; + string2l(entry->d_name, strlen(entry->d_name), &tid); - if(tid == calling_tid) { - current_thread_index = tids_count; - } + if(!is_thread_ready_to_signal(path_buff, entry->d_name, sig_num)) continue; - /* save the thread id */ - tids[tids_count++] = tid; + if(tid == calling_tid) { + current_thread_index = tids_count; + } + + /* save the thread id */ + tids[tids_count++] = tid; + + /* Stop if we reached the maximum threads number. */ + if(tids_count == TIDS_MAX_SIZE) { + serverLogRawFromHandler(LL_WARNING, "get_ready_to_signal_threads_tids(): Reached the limit of the tids buffer."); + break; } } - /* Stop if we reached the maximum threads number. */ - if(tids_count == TIDS_MAX_SIZE) { - serverLogFromHandler(LL_WARNING,"get_ready_to_signal_threads_tids(): Reached the limit of the tids buffer."); - break; - } + + if(tids_count == TIDS_MAX_SIZE) break; } /* Swap the last tid with the the current thread id */ @@ -2571,7 +2609,7 @@ static size_t get_ready_to_signal_threads_tids(pid_t pid, int sig_num, pid_t tid tids[current_thread_index] = last_tid; } - closedir(dir); + close(dir); return tids_count; } diff --git a/src/server.c b/src/server.c index 454bd969d..19be60c4a 100644 --- a/src/server.c +++ b/src/server.c @@ -167,13 +167,9 @@ void _serverLog(int level, const char *fmt, ...) { serverLogRaw(level,msg); } -/* Log a fixed message without printf-alike capabilities, in a way that is - * safe to call from a signal handler. - * - * We actually use this only for signals that are not fatal from the point - * of view of Redis. Signals that are going to kill the server anyway and - * where we need printf-alike features are served by serverLog(). */ -void serverLogFromHandler(int level, const char *msg) { +/* Low level logging from signal handler. Should be used with pre-formatted strings. + See serverLogFromHandler. */ +void serverLogRawFromHandler(int level, const char *msg) { int fd; int log_to_stdout = server.logfile[0] == '\0'; char buf[64]; @@ -183,18 +179,41 @@ void serverLogFromHandler(int level, const char *msg) { fd = log_to_stdout ? STDOUT_FILENO : open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644); if (fd == -1) return; - ll2string(buf,sizeof(buf),getpid()); - if (write(fd,buf,strlen(buf)) == -1) goto err; - if (write(fd,":signal-handler (",17) == -1) goto err; - ll2string(buf,sizeof(buf),time(NULL)); - if (write(fd,buf,strlen(buf)) == -1) goto err; - if (write(fd,") ",2) == -1) goto err; - if (write(fd,msg,strlen(msg)) == -1) goto err; - if (write(fd,"\n",1) == -1) goto err; + if (level & LL_RAW) { + if (write(fd,msg,strlen(msg)) == -1) goto err; + } + else { + ll2string(buf,sizeof(buf),getpid()); + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd,":signal-handler (",17) == -1) goto err; + ll2string(buf,sizeof(buf),time(NULL)); + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd,") ",2) == -1) goto err; + if (write(fd,msg,strlen(msg)) == -1) goto err; + if (write(fd,"\n",1) == -1) goto err; + } err: if (!log_to_stdout) close(fd); } +/* An async-signal-safe version of serverLog. if LL_RAW is not included in level flags, + * The message format is: :signal-handler (