Fix Protocol desync regression test (#1590)

The desync regression test was created as a regression test for the following bug: in case we embed NULL termination inside inline/multi-bulk message we will not be able to perform strchr in order to identify the newline(\n)/carriage-return(\r) in the client query buffer. this can influence (for example) replica reading primary stream and keep filling it's query buffer endlessly consuming more and more memory. In order to handle the above risk, a check was added to verify the inline bulk and multi-bulk size are not exceeding the 64K bytes in the query-buffer. A test was placed in order to verify this. This PR introduce the following fixes to the desync regression test: 1. fix the sent payload to flush 1024 bytes block of 'A's instead of 'payload' which was sent by mistake. 2. Make sure that the connection is correctly terminated on protocol error by the server after exceeding the 64K and not over 64K. 3. add another test intrinsic which will also verify the nested bulk with embedded null termination (was not verified before) fixes https://github.com/valkey-io/valkey/issues/1583 NOTE: Although it is possible to change the use of strchr to a more "safe" utility (eg memchr) which will not pause scan at first occurrence of '\0', we still like to protect against over excessive usage of the query buffer and also preserve the current behavior(?). We will look into improving this though in a followup issue. --------- Signed-off-by: Ran Shidlansik <ranshid@amazon.com> Signed-off-by: ranshid <88133677+ranshid@users.noreply.github.com>
2025-01-20 20:28:45 +02:00 · 2025-01-20 20:28:45 +02:00 · dd92d079dc
commit dd92d079dc
parent 3032ccd48a
1 changed files with 18 additions and 16 deletions
--- a/tests/unit/protocol.tcl
+++ b/tests/unit/protocol.tcl
@ -69,7 +69,7 @@ start_server {tags {"protocol network"}} {
    }

    set c 0
-    foreach seq [list "\x00" "*\x00" "$\x00"] {
+    foreach seq [list "\x00" "*\x00" "$\x00" "*1\r\n$\x00"] {
        incr c
        test "Protocol desync regression test #$c" {
            if {$::tls} {
@ -77,29 +77,31 @@ start_server {tags {"protocol network"}} {
            } else {
                set s [socket [srv 0 host] [srv 0 port]]
            }
+            fconfigure $s -blocking 0
            puts -nonewline $s $seq
+            # PROTO_INLINE_MAX_SIZE is hardcoded in Valkey code to 64K. doing the same here 
+            # since we would like to validate it is enforced. 
+            set PROTO_INLINE_MAX_SIZE [expr 1024 * 64]
            set payload [string repeat A 1024]"\n"
-            set test_start [clock seconds]
-            set test_time_limit 30
-            while 1 {
+            set payload_size 0
+            while {$payload_size <= $PROTO_INLINE_MAX_SIZE} {
                if {[catch {
-                    puts -nonewline $s payload
-                    flush $s
                    incr payload_size [string length $payload]
+                    puts -nonewline $s $payload
+                    flush $s
                }]} {
-                    set retval [gets $s]
-                    close $s
+                    assert_morethan $payload_size $PROTO_INLINE_MAX_SIZE
                    break
-                } else {
-                    set elapsed [expr {[clock seconds]-$test_start}]
-                    if {$elapsed > $test_time_limit} {
-                        close $s
-                        error "assertion:Valkey did not closed connection after protocol desync"
-                    }
                }
            }
-            set retval
-        } {*Protocol error*}
+           
+            wait_for_condition 50 100 {
+                [string match {*Protocol error*} [gets $s]]
+            } else {
+                fail "expected connection to be closed on protocol error after sending $payload_size bytes"
+            }
+            close $s
+        }
    }
    unset c