From 20c34a91da474c05d4e7aca50f7571bc6482d0c5 Mon Sep 17 00:00:00 2001 From: Vivek Saini Date: Thu, 28 Apr 2022 21:30:40 +0000 Subject: [PATCH] Converted some existing PSYNC tests for multimaster --- tests/integration/psync2-reg-multimaster.tcl | 102 ++++++++++++ .../replication-psync-multimaster.tcl | 153 ++++++++++++++++++ 2 files changed, 255 insertions(+) create mode 100644 tests/integration/psync2-reg-multimaster.tcl create mode 100644 tests/integration/replication-psync-multimaster.tcl diff --git a/tests/integration/psync2-reg-multimaster.tcl b/tests/integration/psync2-reg-multimaster.tcl new file mode 100644 index 000000000..1f15d75bb --- /dev/null +++ b/tests/integration/psync2-reg-multimaster.tcl @@ -0,0 +1,102 @@ +# Issue 3899 regression test. +# We create a chain of three instances: master -> slave -> slave2 +# and continuously break the link while traffic is generated by +# keydb-benchmark. At the end we check that the data is the same +# everywhere. + +start_server {tags {"psync2"} overrides {active-replica {yes} multi-master {yes} client-output-buffer-limit {replica 200mb 10mb 999999} } } { +start_server {overrides {active-replica {yes} multi-master {yes} client-output-buffer-limit {replica 200mb 10mb 999999} } } { +start_server {overrides {active-replica {yes} multi-master {yes} client-output-buffer-limit {replica 200mb 10mb 999999} } } { + # Config + set debug_msg 0 ; # Enable additional debug messages + + set no_exit 0 ; # Do not exit at end of the test + + set duration 20 ; # Total test seconds + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + set R_unixsocket($j) [srv [expr 0-$j] unixsocket] + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + # Setup the replication and backlog parameters + test "PSYNC2 #3899 regression: setup" { + $R(0) slaveof $R_host(1) $R_port(1) + $R(0) slaveof $R_host(2) $R_port(2) + $R(1) slaveof $R_host(0) $R_port(0) + $R(1) slaveof $R_host(2) $R_port(2) + $R(2) slaveof $R_host(0) $R_port(0) + $R(2) slaveof $R_host(1) $R_port(1) + + $R(0) set foo bar + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [$R(1) dbsize] == 1 && + [$R(2) dbsize] == 1 + } else { + fail "Replicas not replicating from master" + } + + $R(0) config set repl-backlog-size 200mb + $R(1) config set repl-backlog-size 200mb + $R(2) config set repl-backlog-size 200mb + + } + + set cycle_start_time [clock milliseconds] + set bench_pid [exec src/keydb-benchmark -s $R_unixsocket(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &] + while 1 { + set elapsed [expr {[clock milliseconds]-$cycle_start_time}] + if {$elapsed > $duration*1000} break + if {rand() < .05} { + test "PSYNC2 #3899 regression: kill first replica" { + $R(1) client kill type master + } + } + if {rand() < .05} { + test "PSYNC2 #3899 regression: kill chained replica" { + $R(2) client kill type master + } + } + after 100 + } + exec kill -9 $bench_pid + + if {$debug_msg} { + for {set j 0} {$j < 100} {incr j} { + if { + [$R(0) debug digest] == [$R(1) debug digest] && + [$R(1) debug digest] == [$R(2) debug digest] + } break + puts [$R(0) debug digest] + puts [$R(1) debug digest] + puts [$R(2) debug digest] + after 1000 + } + } + + test "PSYNC2 #3899 regression: verify consistency" { + wait_for_condition 50 1000 { + ([$R(0) debug digest] eq [$R(1) debug digest]) && + ([$R(1) debug digest] eq [$R(2) debug digest]) + } else { + set csv3 [csvdump {r -2}] + set csv2 [csvdump {r -1}] + set csv1 [csvdump r] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + set fd [open /tmp/repldump3.txt w] + puts -nonewline $fd $csv3 + close $fd + fail [format "The three instances have different data sets:\n\tnode 1: %s\n\tnode 2: %s\n\tnode 3: %s\nRun diff -u against /tmp/repldump*.txt for more info" [$R(0) debug digest] [$R(1) debug digest] [$R(2) debug digest] ] + } + } +}}} diff --git a/tests/integration/replication-psync-multimaster.tcl b/tests/integration/replication-psync-multimaster.tcl new file mode 100644 index 000000000..2665adf9e --- /dev/null +++ b/tests/integration/replication-psync-multimaster.tcl @@ -0,0 +1,153 @@ +# Creates a master-replica pair and breaks the link continuously to force +# partial resyncs attempts, all this while flooding the master with +# write queries. +# +# You can specify backlog size, ttl, delay before reconnection, test duration +# in seconds, and an additional condition to verify at the end. +# +# If reconnect is > 0, the test actually try to break the connection and +# reconnect with the master, otherwise just the initial synchronization is +# checked for consistency. +proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} { + start_server [list tags [list "repl"] overrides [list active-replica yes client-output-buffer-limit [list replica $backlog_size $backlog_size 9999999] ] ] { + start_server [list overrides [list client-output-buffer-limit [list replica $backlog_size $backlog_size 9999999] active-replica yes ] ] { + + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + + $master config set repl-backlog-size $backlog_size + $master config set repl-backlog-ttl $backlog_ttl + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 1 + $replica config set repl-diskless-load $sdl + + test {Replica should be able to synchronize with the master} { + $replica replicaof $master_host $master_port + } + + after 1000 + + test {Master should be able to synchronize with the replica} { + $master replicaof $replica_host $replica_port + } + + set load_handle0 [start_climbing_load $master_host $master_port 9 100000] + set load_handle1 [start_climbing_load $master_host $master_port 11 100000] + set load_handle2 [start_climbing_load $master_host $master_port 12 100000] + + # Check that the background clients are actually writing. + test {Detect write load to master} { + wait_for_condition 50 1000 { + [$master dbsize] > 100 + } else { + fail "Can't detect write load from background clients." + } + } + + test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" { + # Now while the clients are writing data, break the maste-replica + # link multiple times. + if ($reconnect) { + for {set j 0} {$j < $duration*10} {incr j} { + after 100 + # catch {puts "MASTER [$master dbsize] keys, REPLICA [$replica dbsize] keys"} + + if {($j % 20) == 0} { + catch { + if {$delay} { + $replica multi + $replica client kill $master_host:$master_port + $replica debug sleep $delay + $replica exec + } else { + $replica client kill $master_host:$master_port + } + } + } + } + } + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + + # Wait for the replica to reach the "online" + # state from the POV of the master. + set retry 5000 + while {$retry} { + set info [$master info] + if {[string match {*slave0:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:replica not correctly synchronized" + } + + # Wait that replica acknowledge it is online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. (-LOADING error) + wait_for_condition 5000 100 { + [lindex [$replica role] 3] eq {connected} + } else { + fail "replica still not connected after some time" + } + + wait_for_condition 100 100 { + [$master debug digest] == [$replica debug digest] + } else { + set csv1 [csvdump r] + set csv2 [csvdump {r -1}] + set fd [open /tmp/repldump1.txt w] + puts -nonewline $fd $csv1 + close $fd + set fd [open /tmp/repldump2.txt w] + puts -nonewline $fd $csv2 + close $fd + fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info" + } + assert {[$master dbsize] > 0} + eval $cond + } + } + } +} + + +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + test_psync {no reconnection, just sync} 6 1000000 3600 0 { + } $mdl $sdl 0 + + test_psync {ok psync} 6 100000000 3600 0 { + assert {[s -1 sync_partial_ok] > 0} + } $mdl $sdl 1 + + test_psync {no backlog} 6 100 3600 0.5 { + assert {[s -1 sync_partial_err] > 0} + } $mdl $sdl 1 + + test_psync {ok after delay} 3 100000000 3600 3 { + assert {[s -1 sync_partial_ok] > 0} + } $mdl $sdl 1 + + test_psync {backlog expired} 3 100000000 1 3 { + assert {[s -1 sync_partial_err] > 0} + } $mdl $sdl 1 + } +} + +foreach mdl {no} { + foreach sdl {swapdb} { + test_psync {backlog expired} 3 100000000 1 3 { + assert {[s -1 sync_partial_err] > 0} + } $mdl $sdl 1 + } +} +