Fix occasional hangs on replication reconnection. (#7830)
This happens only on diskless replicas when attempting to reconnect after failing to load an RDB file. It is more likely to occur with larger datasets. After reconnection is initiated, replicationEmptyDbCallback() may get called and try to write to an unconnected socket. This triggered another issue where the connection is put into an error state and the connect handler never gets called. The problem is a regression introduced by commit c17e597. (cherry picked from commit 1980f639b161f46da2944d60f1c2facaf547dc1a)
This commit is contained in:
parent
6a4da4958e
commit
9d0388a043
@ -168,7 +168,12 @@ static int connSocketWrite(connection *conn, const void *data, size_t data_len)
|
||||
int ret = write(conn->fd, data, data_len);
|
||||
if (ret < 0 && errno != EAGAIN) {
|
||||
conn->last_errno = errno;
|
||||
conn->state = CONN_STATE_ERROR;
|
||||
|
||||
/* Don't overwrite the state of a connection that is not already
|
||||
* connected, not to mess with handler callbacks.
|
||||
*/
|
||||
if (conn->state == CONN_STATE_CONNECTED)
|
||||
conn->state = CONN_STATE_ERROR;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -180,7 +185,12 @@ static int connSocketRead(connection *conn, void *buf, size_t buf_len) {
|
||||
conn->state = CONN_STATE_CLOSED;
|
||||
} else if (ret < 0 && errno != EAGAIN) {
|
||||
conn->last_errno = errno;
|
||||
conn->state = CONN_STATE_ERROR;
|
||||
|
||||
/* Don't overwrite the state of a connection that is not already
|
||||
* connected, not to mess with handler callbacks.
|
||||
*/
|
||||
if (conn->state == CONN_STATE_CONNECTED)
|
||||
conn->state = CONN_STATE_ERROR;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -1374,7 +1374,8 @@ void replicationSendNewlineToMaster(void) {
|
||||
* the new dataset received by the master. */
|
||||
void replicationEmptyDbCallback(void *privdata) {
|
||||
UNUSED(privdata);
|
||||
replicationSendNewlineToMaster();
|
||||
if (server.repl_state == REPL_STATE_TRANSFER)
|
||||
replicationSendNewlineToMaster();
|
||||
}
|
||||
|
||||
/* Once we have a link with the master and the synchronization was
|
||||
|
Loading…
x
Reference in New Issue
Block a user