Make cluster config file saving atomic and fsync acl (#10924)

As an outstanding part mentioned in #10737, we could just make the cluster config file and
ACL file saving done with a more safe and atomic pattern (write to temp file, fsync, rename, fsync dir).

The cluster config file uses an in-place overwrite and truncation (which was also used by the
main config file before #7824).
The ACL file is using the temp file and rename approach, but was missing an fsync.

Co-authored-by: 朱天 <zhutian03@meituan.com>
This commit is contained in:
Tian 2022-07-20 14:11:01 +08:00 committed by GitHub
parent 56828bab59
commit cc2848132f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 61 additions and 28 deletions

View File

@ -2264,7 +2264,7 @@ int ACLSaveToFile(const char *filename) {
/* Create a temp file with the new content. */
tmpfilename = sdsnew(filename);
tmpfilename = sdscatfmt(tmpfilename,".tmp-%i-%I",
(int)getpid(),(int)mstime());
(int) getpid(),mstime());
if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
serverLog(LL_WARNING,"Opening temp ACL file for ACL SAVE: %s",
strerror(errno));
@ -2272,8 +2272,19 @@ int ACLSaveToFile(const char *filename) {
}
/* Write it. */
if (write(fd,acl,sdslen(acl)) != (ssize_t)sdslen(acl)) {
serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s",
size_t offset = 0;
while (offset < sdslen(acl)) {
ssize_t written_bytes = write(fd,acl + offset,sdslen(acl) - offset);
if (written_bytes <= 0) {
if (errno == EINTR) continue;
serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
offset += written_bytes;
}
if (redis_fsync(fd) == -1) {
serverLog(LL_WARNING,"Syncing ACL file for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
@ -2285,6 +2296,11 @@ int ACLSaveToFile(const char *filename) {
strerror(errno));
goto cleanup;
}
if (fsyncFileDir(filename) == -1) {
serverLog(LL_WARNING,"Syncing ACL directory for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
sdsfree(tmpfilename); tmpfilename = NULL;
retval = C_OK; /* If we reached this point, everything is fine. */

View File

@ -406,10 +406,11 @@ fmterr:
* bigger we pad our payload with newlines that are anyway ignored and truncate
* the file afterward. */
int clusterSaveConfig(int do_fsync) {
sds ci;
size_t content_size;
struct stat sb;
int fd;
sds ci,tmpfilename;
size_t content_size,offset = 0;
ssize_t written_bytes;
int fd = -1;
int retval = C_ERR;
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG;
@ -421,36 +422,52 @@ int clusterSaveConfig(int do_fsync) {
(unsigned long long) server.cluster->lastVoteEpoch);
content_size = sdslen(ci);
if ((fd = open(server.cluster_configfile,O_WRONLY|O_CREAT,0644))
== -1) goto err;
if (redis_fstat(fd,&sb) == -1) goto err;
/* Pad the new payload if the existing file length is greater. */
if (sb.st_size > (off_t)content_size) {
ci = sdsgrowzero(ci,sb.st_size);
memset(ci+content_size,'\n',sb.st_size-content_size);
/* Create a temp file with the new content. */
tmpfilename = sdscatfmt(sdsempty(),"%s.tmp-%i-%I",
server.cluster_configfile,(int) getpid(),mstime());
if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
serverLog(LL_WARNING,"Could not open temp cluster config file: %s",strerror(errno));
goto cleanup;
}
if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err;
while (offset < content_size) {
written_bytes = write(fd,ci + offset,content_size - offset);
if (written_bytes <= 0) {
if (errno == EINTR) continue;
serverLog(LL_WARNING,"Failed after writing (%zd) bytes to tmp cluster config file: %s",
offset,strerror(errno));
goto cleanup;
}
offset += written_bytes;
}
if (do_fsync) {
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
if (fsync(fd) == -1) goto err;
if (redis_fsync(fd) == -1) {
serverLog(LL_WARNING,"Could not sync tmp cluster config file: %s",strerror(errno));
goto cleanup;
}
}
/* Truncate the file if needed to remove the final \n padding that
* is just garbage. */
if (content_size != sdslen(ci) && ftruncate(fd,content_size) == -1) {
/* ftruncate() failing is not a critical error. */
if (rename(tmpfilename, server.cluster_configfile) == -1) {
serverLog(LL_WARNING,"Could not rename tmp cluster config file: %s",strerror(errno));
goto cleanup;
}
close(fd);
sdsfree(ci);
return 0;
err:
if (do_fsync) {
if (fsyncFileDir(server.cluster_configfile) == -1) {
serverLog(LL_WARNING,"Could not sync cluster config file dir: %s",strerror(errno));
goto cleanup;
}
}
retval = C_OK; /* If we reached this point, everything is fine. */
cleanup:
if (fd != -1) close(fd);
if (retval) unlink(tmpfilename);
sdsfree(tmpfilename);
sdsfree(ci);
return -1;
return retval;
}
void clusterSaveConfigOrDie(int do_fsync) {