Fix and cleanup Sentinel leaked fds test. (#8469)

* For consistency, use tclsh for the script as well
* Ignore leaked fds that originate from grandparent process, since we
  only care about fds redis-sentinel itself is responsible for
* Check every test iteration to catch problems early
* Some cleanups, e.g. parameterization of file name, etc.
This commit is contained in:
Yossi Gottlieb 2021-02-08 17:02:46 +02:00 committed by GitHub
parent b2351ea0dc
commit dbcc0a85d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 36 deletions

View File

@ -29,6 +29,7 @@ set ::sentinel_base_port 20000
set ::redis_base_port 30000
set ::redis_port_count 1024
set ::host "127.0.0.1"
set ::leaked_fds_file [file normalize "tmp/leaked_fds.txt"]
set ::pids {} ; # We kill everything at exit
set ::dirs {} ; # We remove all the temp dirs at exit
set ::run_matching {} ; # If non empty, only tests matching pattern are run.
@ -410,13 +411,13 @@ proc check_leaks instance_types {
# Execute all the units inside the 'tests' directory.
proc run_tests {} {
set sentinel_fd_leaks_file "sentinel_fd_leaks"
if { [file exists $sentinel_fd_leaks_file] } {
file delete $sentinel_fd_leaks_file
}
set tests [lsort [glob ../tests/*]]
foreach test $tests {
# Remove leaked_fds file before starting
if {$::leaked_fds_file != "" && [file exists $::leaked_fds_file]} {
file delete $::leaked_fds_file
}
if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
continue
}
@ -424,19 +425,19 @@ proc run_tests {} {
puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
source $test
check_leaks {redis sentinel}
# Check if a leaked fds file was created and abort the test.
if {$::leaked_fds_file != "" && [file exists $::leaked_fds_file]} {
puts [colorstr red "ERROR: Sentinel has leaked fds to scripts:"]
puts [exec cat $::leaked_fds_file]
puts "----"
incr ::failed
}
}
}
# Print a message and exists with 0 / 1 according to zero or more failures.
proc end_tests {} {
set sentinel_fd_leaks_file "sentinel_fd_leaks"
if { [file exists $sentinel_fd_leaks_file] } {
# temporarily disabling this error from failing the tests until leaks are fixed.
#puts [colorstr red "WARNING: sentinel test(s) failed, there are leaked fds in sentinel:"]
#puts [exec cat $sentinel_fd_leaks_file]
#exit 1
}
if {$::failed == 0 } {
puts "GOOD! No errors."
exit 0

View File

@ -10,6 +10,9 @@ set ::tlsdir "../../tls"
proc main {} {
parse_options
if {$::leaked_fds_file != ""} {
set ::env(LEAKED_FDS_FILE) $::leaked_fds_file
}
spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"] "../tests/includes/sentinel.conf"
spawn_instance redis $::redis_base_port $::instances_count
run_tests

View File

@ -0,0 +1,70 @@
#!/usr/bin/env tclsh
#
# This script detects file descriptors that have leaked from a parent process.
#
# Our goal is to detect file descriptors that were opened by the parent and
# not cleaned up prior to exec(), but not file descriptors that were inherited
# from the grandparent which the parent knows nothing about. To do that, we
# look up every potential leak and try to match it against open files by the
# grandparent process.
# Get PID of parent process
proc get_parent_pid {_pid} {
set fd [open "/proc/$_pid/status" "r"]
set content [read $fd]
close $fd
if {[regexp {\nPPid:\s+(\d+)} $content _ ppid]} {
return $ppid
}
error "failed to get parent pid"
}
# Linux only
set os [exec uname]
if {$os != "Linux"} {
puts "Only Linux is supported."
exit 0
}
if {![info exists env(LEAKED_FDS_FILE)]} {
puts "Missing LEAKED_FDS_FILE environment variable."
exit 0
}
set outfile $::env(LEAKED_FDS_FILE)
set parent_pid [get_parent_pid [pid]]
set grandparent_pid [get_parent_pid $parent_pid]
set leaked_fds {}
# Look for fds that were directly inherited from our parent but not from
# our grandparent (tcl)
foreach fd [glob -tails -directory "/proc/self/fd" *] {
# Ignore stdin/stdout/stderr
if {$fd == 0 || $fd == 1 || $fd == 2} {
continue
}
# Read symlink to get info about the file descriptor. This can be a real
# file name or an arbitrary string that identifies the fd.
if { [catch {set fdlink [file readlink "/proc/self/fd/$fd"]} err] } {
continue
}
# See if grandparent process has the same fd and info and skip if it does.
if { ! [catch {set gp_fdlink [file readlink "/proc/$grandparent_pid/fd/$fd"]} err] } {
if {$gp_fdlink == $fdlink} {
continue
}
}
lappend leaked_fds [list $fd $fdlink]
}
# Produce report only if we found leaks
if {[llength $leaked_fds] > 0} {
set fd [open $outfile "w"]
puts $fd [join $leaked_fds "\n"]
close $fd
}

View File

@ -41,8 +41,10 @@ test "(init) Sentinels can start monitoring a master" {
S $id SENTINEL SET mymaster down-after-milliseconds 2000
S $id SENTINEL SET mymaster failover-timeout 20000
S $id SENTINEL SET mymaster parallel-syncs 10
S $id SENTINEL SET mymaster notification-script ../../tests/includes/notify.sh
S $id SENTINEL SET mymaster client-reconfig-script ../../tests/includes/notify.sh
if {$::leaked_fds_file != ""} {
S $id SENTINEL SET mymaster notification-script ../../tests/helpers/check_leaked_fds.tcl
S $id SENTINEL SET mymaster client-reconfig-script ../../tests/helpers/check_leaked_fds.tcl
}
}
}

View File

@ -1,21 +0,0 @@
#!/usr/bin/env bash
OS=`uname -s`
if [ ${OS} != "Linux" ]
then
exit 0
fi
# fd 3 is meant to catch the actual access to /proc/pid/fd,
# in case there's an fd leak by the sentinel,
# it can take 3, but then the access to /proc will take another fd, and we'll catch that.
leaked_fd_count=`ls /proc/self/fd | grep -vE '^[0|1|2|3]$' | wc -l`
if [ $leaked_fd_count -gt 0 ]
then
sentinel_fd_leaks_file="../sentinel_fd_leaks"
if [ ! -f $sentinel_fd_leaks_file ]
then
ls -l /proc/self/fd | cat >> $sentinel_fd_leaks_file
lsof -p $$ | cat >> $sentinel_fd_leaks_file
fi
fi