diff --git a/src/debug.c b/src/debug.c index 540768018..e63ab5471 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1055,6 +1055,7 @@ NULL /* =========================== Crash handling ============================== */ +__attribute__ ((noinline)) void _serverAssert(const char *estr, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); @@ -1144,6 +1145,7 @@ void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, con _serverAssert(estr,file,line); } +__attribute__ ((noinline)) void _serverPanic(const char *file, int line, const char *msg, ...) { va_list ap; va_start(ap,msg); @@ -1837,7 +1839,7 @@ typedef struct { void *trace[BACKTRACE_MAX_SIZE]; } stacktrace_data; -static void *collect_stacktrace_data(void) { +__attribute__ ((noinline)) static void *collect_stacktrace_data(void) { /* allocate stacktrace_data struct */ stacktrace_data *trace_data = zmalloc(sizeof(stacktrace_data)); @@ -1854,6 +1856,7 @@ static void *collect_stacktrace_data(void) { return trace_data; } +__attribute__ ((noinline)) static void writeStacktraces(int fd, int uplevel) { /* get the list of all the process's threads that don't block or ignore the THREADS_SIGNAL */ pid_t pid = getpid(); @@ -1869,6 +1872,7 @@ static void writeStacktraces(int fd, int uplevel) { /* ThreadsManager_runOnThreads returns NULL if it is already running */ if (!stacktraces_data) return; + size_t skipped = 0; char buff[MAX_BUFF_LENGTH]; pid_t calling_tid = syscall(SYS_gettid); @@ -1876,8 +1880,11 @@ static void writeStacktraces(int fd, int uplevel) { for (size_t i = 0; i < len_tids; i++) { stacktrace_data *curr_stacktrace_data = stacktraces_data[i]; /*ThreadsManager_runOnThreads might fail to collect the thread's data */ - if (!curr_stacktrace_data) continue; - + if (!curr_stacktrace_data) { + skipped++; + continue; + } + /* stacktrace header includes the tid and the thread's name */ snprintf(buff, MAX_BUFF_LENGTH, "\n%d %s", curr_stacktrace_data->tid, curr_stacktrace_data->thread_name); if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */}; @@ -1890,7 +1897,7 @@ static void writeStacktraces(int fd, int uplevel) { curr_uplevel += uplevel + 2; /* Add an indication to header of the thread that is handling the log file */ snprintf(buff, MAX_BUFF_LENGTH, " *\n"); - } else { + } else { /* just add a new line */ snprintf(buff, MAX_BUFF_LENGTH, "\n"); } @@ -1903,10 +1910,14 @@ static void writeStacktraces(int fd, int uplevel) { zfree(curr_stacktrace_data); } zfree(stacktraces_data); + + snprintf(buff, MAX_BUFF_LENGTH, "\n%zu/%zu expected stacktraces.\n", len_tids - skipped, len_tids); + if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */}; + } #else /* __linux__*/ - +__attribute__ ((noinline)) static void writeStacktraces(int fd, int uplevel) { void *trace[BACKTRACE_MAX_SIZE]; @@ -1922,7 +1933,10 @@ static void writeStacktraces(int fd, int uplevel) { * to be called from signal handlers safely. * The eip argument is optional (can take NULL). * The uplevel argument indicates how many of the calling functions to skip. + * Functions that are taken in consideration in "uplevel" should be declared with + * __attribute__ ((noinline)) to make sure the compiler won't inline them. */ +__attribute__ ((noinline)) void logStackTrace(void *eip, int uplevel) { int fd = openDirectLogFiledes(); char *msg; @@ -2212,6 +2226,7 @@ void invalidFunctionWasCalled(void) {} typedef void (*invalidFunctionWasCalledType)(void); +__attribute__ ((noinline)) static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { UNUSED(secret); UNUSED(info); diff --git a/src/threads_mngr.c b/src/threads_mngr.c index cb93dcf33..b6bb97a8a 100644 --- a/src/threads_mngr.c +++ b/src/threads_mngr.c @@ -58,13 +58,14 @@ static sem_t wait_for_threads_sem; /* This flag is set while ThreadsManager_runOnThreads is running */ static redisAtomic int g_in_progress = 0; +static pthread_rwlock_t globals_rw_lock = PTHREAD_RWLOCK_INITIALIZER; /*============================ Internal prototypes ========================== */ static void invoke_callback(int sig); /* returns 0 if it is safe to start, IN_PROGRESS otherwise. */ static int test_and_start(void); static void wait_threads(void); -/* Clean up global variable. +/* Clean up global variable. Assuming we are under the g_in_progress protection, this is not a thread-safe function */ static void ThreadsManager_cleanups(void); @@ -82,6 +83,7 @@ void ThreadsManager_init(void) { sigaction(SIGUSR2, &act, NULL); } +__attribute__ ((noinline)) void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) { /* Check if it is safe to start running. If not - return */ if(test_and_start() == IN_PROGRESS) { @@ -95,7 +97,7 @@ void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_c g_tids_len = tids_len; /* Allocate the output buffer */ - g_output_array = zmalloc(sizeof(void*) * tids_len); + g_output_array = zcalloc(sizeof(void*) * tids_len); /* Initialize a semaphore that we will be waiting on for the threads use pshared = 0 to indicate the semaphore is shared between the process's threads (and not between processes), @@ -131,19 +133,30 @@ static int test_and_start(void) { return prev_state; } +__attribute__ ((noinline)) static void invoke_callback(int sig) { UNUSED(sig); - size_t thread_id; - atomicGetIncr(g_thread_ids, thread_id, 1); - g_output_array[thread_id] = g_callback(); - size_t curr_done_count; - atomicIncrGet(g_num_threads_done, curr_done_count, 1); - - /* last thread shuts down the light */ - if (curr_done_count == g_tids_len) { - sem_post(&wait_for_threads_sem); + /* If the lock is already locked for write, we are running cleanups, no reason to proceed. */ + if(0 != pthread_rwlock_tryrdlock(&globals_rw_lock)) { + serverLog(LL_WARNING, "threads_mngr: ThreadsManager_cleanups() is in progress, can't invoke signal handler."); + return; } + + if (g_output_array) { + size_t thread_id; + atomicGetIncr(g_thread_ids, thread_id, 1); + g_output_array[thread_id] = g_callback(); + size_t curr_done_count; + atomicIncrGet(g_num_threads_done, curr_done_count, 1); + + /* last thread shuts down the light */ + if (curr_done_count == g_tids_len) { + sem_post(&wait_for_threads_sem); + } + } + + pthread_rwlock_unlock(&globals_rw_lock); } static void wait_threads(void) { @@ -161,15 +174,11 @@ static void wait_threads(void) { serverLog(LL_WARNING, "threads_mngr: waiting for threads' output was interrupted by signal. Continue waiting."); continue; } - - if (status == -1) { - if (errno == ETIMEDOUT) { - serverLog(LL_WARNING, "threads_mngr: waiting for threads' output timed out"); - } - } } static void ThreadsManager_cleanups(void) { + pthread_rwlock_wrlock(&globals_rw_lock); + g_callback = NULL; g_tids_len = 0; g_output_array = NULL; @@ -179,6 +188,8 @@ static void ThreadsManager_cleanups(void) { /* Lastly, turn off g_in_progress */ atomicSet(g_in_progress, 0); + pthread_rwlock_unlock(&globals_rw_lock); + } #else diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl index 7bd2eb83c..3aec1fbed 100644 --- a/tests/integration/logging.tcl +++ b/tests/integration/logging.tcl @@ -19,6 +19,25 @@ if {$system_name eq {darwin}} { } } +proc check_log_backtrace_for_debug {log_pattern} { + set res [wait_for_log_messages 0 \"$log_pattern\" 0 100 100] + if {$::verbose} { puts $res} + + # If the stacktrace is printed more than once, it means redis crashed during crash report generation + assert_equal [count_log_message 0 "STACK TRACE"] 1 + + set pattern "*debugCommand*" + set res [wait_for_log_messages 0 \"$pattern\" 0 100 100] + if {$::verbose} { puts $res} +} + +# used when backtrace_supported == 0 +proc check_crash_log {log_pattern} { + set res [wait_for_log_messages 0 \"$log_pattern\" 0 50 100] + if {$::verbose} { puts $res } +} + +# test the watchdog and the stack trace report from multiple threads if {$backtrace_supported} { set server_path [tmpdir server.log] start_server [list overrides [list dir $server_path]] { @@ -32,13 +51,9 @@ if {$backtrace_supported} { assert_equal [count_log_message 0 "threads_mngr: waiting for threads' output timed out"] 0 assert_equal [count_log_message 0 "bioProcessBackgroundJobs"] 3 } - - set pattern "*redis-server *main*" - set res [wait_for_log_messages 0 \"$pattern\" 0 100 100] - if {$::verbose} { puts $res } - set pattern "*debugCommand*" - set res [wait_for_log_messages 0 \"$pattern\" 0 100 100] - if {$::verbose} { puts $res } + check_log_backtrace_for_debug "*WATCHDOG TIMER EXPIRED*" + # make sure redis is still alive + assert_equal "PONG" [r ping] } } } @@ -46,27 +61,61 @@ if {$backtrace_supported} { # Valgrind will complain that the process terminated by a signal, skip it. if {!$::valgrind} { if {$backtrace_supported} { - set crash_pattern "*STACK TRACE*" - } else { - set crash_pattern "*crashed by signal*" + set check_cb check_log_backtrace_for_debug + } else { + set check_cb check_crash_log } + # test being killed by a SIGABRT from outside set server_path [tmpdir server1.log] start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] { test "Crash report generated on SIGABRT" { set pid [s process_id] + r deferred 1 + r debug sleep 10 ;# so that we see the function in the stack trace + r flush + after 100 ;# wait for redis to get into the sleep exec kill -SIGABRT $pid - set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100] - if {$::verbose} { puts $res } + $check_cb "*crashed by signal*" } } + # test DEBUG SEGFAULT set server_path [tmpdir server2.log] start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] { test "Crash report generated on DEBUG SEGFAULT" { catch {r debug segfault} - set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100] - if {$::verbose} { puts $res } + $check_cb "*crashed by signal*" + } + } + + # test DEBUG SIGALRM being non-fatal + set server_path [tmpdir server3.log] + start_server [list overrides [list dir $server_path]] { + test "Stacktraces generated on SIGALRM" { + set pid [s process_id] + r deferred 1 + r debug sleep 10 ;# so that we see the function in the stack trace + r flush + after 100 ;# wait for redis to get into the sleep + exec kill -SIGALRM $pid + $check_cb "*Received SIGALRM*" + r read + r deferred 0 + # make sure redis is still alive + assert_equal "PONG" [r ping] + } + } +} + +# test DEBUG ASSERT +if {$backtrace_supported} { + set server_path [tmpdir server4.log] + # Use exit() instead of abort() upon assertion so Valgrind tests won't fail. + start_server [list overrides [list dir $server_path use-exit-on-panic yes crash-memcheck-enabled no]] { + test "Generate stacktrace on assertion" { + catch {r debug assert} + check_log_backtrace_for_debug "*ASSERTION FAILED*" } } }