From 1010c1b43e9af03dbfbf7895f83ab519b75cfe91 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 16:10:18 +0200 Subject: [PATCH 01/13] LCS: initial functionality implemented. --- src/db.c | 26 +++++++++++ src/server.c | 6 ++- src/server.h | 2 + src/t_string.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 211bb978d..bfa39564e 100644 --- a/src/db.c +++ b/src/db.c @@ -1554,6 +1554,32 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk return keys; } +/* LCS ... [STOREIDX ] ... */ +int *lcsGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) +{ + int i; + int *keys; + UNUSED(cmd); + + /* We need to parse the options of the command in order to check for the + * "STOREIDX" argument before the STRINGS argument. */ + for (i = 1; i < argc; i++) { + char *arg = argv[i]->ptr; + int moreargs = (argc-1) - i; + + if (!strcasecmp(arg, "strings")) { + break; + } else if (!strcasecmp(arg, "storeidx") && moreargs) { + keys = getKeysTempBuffer; + keys[0] = i+1; + *numkeys = 1; + return keys; + } + } + *numkeys = 0; + return NULL; +} + /* Helper function to extract keys from memory command. * MEMORY USAGE */ int *memoryGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { diff --git a/src/server.c b/src/server.c index c89e9c075..d06a9e29a 100644 --- a/src/server.c +++ b/src/server.c @@ -1004,7 +1004,11 @@ struct redisCommand redisCommandTable[] = { {"acl",aclCommand,-2, "admin no-script no-slowlog ok-loading ok-stale", - 0,NULL,0,0,0,0,0,0} + 0,NULL,0,0,0,0,0,0}, + + {"lcs",lcsCommand,-4, + "write use-memory @string", + 0,lcsGetKeys,0,0,0,0,0,0} }; /*============================ Utility functions ============================ */ diff --git a/src/server.h b/src/server.h index c4db4278e..1472bcee7 100644 --- a/src/server.h +++ b/src/server.h @@ -2101,6 +2101,7 @@ int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkey int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); int *memoryGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); +int *lcsGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys); /* Cluster */ void clusterInit(void); @@ -2372,6 +2373,7 @@ void xdelCommand(client *c); void xtrimCommand(client *c); void lolwutCommand(client *c); void aclCommand(client *c); +void lcsCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/src/t_string.c b/src/t_string.c index 8ccd69eb9..e19647845 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -479,3 +479,126 @@ void strlenCommand(client *c) { checkType(c,o,OBJ_STRING)) return; addReplyLongLong(c,stringObjectLen(o)); } + +/* LCS -- Longest common subsequence. + * + * LCS [LEN] [IDX] [STOREIDX ] STRINGS */ +void lcsCommand(client *c) { + uint32_t i, j; + sds a = NULL, b = NULL; + int getlen = 0, getidx = 0; + robj *idxkey = NULL; /* STOREIDX will set this and getidx to 1. */ + + for (j = 1; j < (uint32_t)c->argc; j++) { + char *opt = c->argv[j]->ptr; + int moreargs = (c->argc-1) - j; + + if (!strcasecmp(opt,"IDX")) { + getidx = 1; + } else if (!strcasecmp(opt,"STOREIDX") && moreargs) { + getidx = 1; + idxkey = c->argv[j+1]; + j++; + } else if (!strcasecmp(opt,"LEN")) { + getlen++; + } else if (!strcasecmp(opt,"STRINGS")) { + if (moreargs != 2) { + addReplyError(c,"LCS requires exactly two strings"); + return; + } + a = c->argv[j+1]->ptr; + b = c->argv[j+2]->ptr; + j += 2; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Complain if the user didn't pass the STRING option. */ + if (a == NULL) { + addReplyError(c,"STRINGS is mandatory"); + return; + } + + /* Compute the LCS using the vanilla dynamic programming technique of + * building a table of LCS(x,y) substrings. */ + uint32_t alen = sdslen(a); + uint32_t blen = sdslen(b); + + /* Setup an uint32_t array to store at LCS[i,j] the length of the + * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so + * we index it as LCS[i+alen*j] */ + uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t)); + #define LCS(A,B) lcs[(A)+((B)*(alen+1))] + + /* Start building the LCS table. */ + for (uint32_t i = 0; i <= alen; i++) { + for (uint32_t j = 0; j <= blen; j++) { + if (i == 0 || j == 0) { + /* If one substring has length of zero, the + * LCS length is zero. */ + LCS(i,j) = 0; + } else if (a[i-1] == b[j-1]) { + /* The len LCS (and the LCS itself) of two + * sequences with the same final character, is the + * LCS of the two sequences without the last char + * plus that last char. */ + LCS(i,j) = LCS(i-1,j-1)+1; + } else { + /* If the last character is different, take the longest + * between the LCS of the first string and the second + * minus the last char, and the reverse. */ + uint32_t lcs1 = LCS(i-1,j); + uint32_t lcs2 = LCS(i,j-1); + LCS(i,j) = lcs1 > lcs2 ? lcs1 : lcs2; + } + } + } + + /* Store the actual LCS string in "result" if needed. We create + * it backward, but the length is already known, we store it into idx. */ + uint32_t idx = LCS(alen,blen); + sds result = NULL; + + /* Do we need to compute the actual LCS string? Allocate it in that case. */ + int computelcs = getidx || !getlen; + if (computelcs) result = sdsnewlen(SDS_NOINIT,idx); + + i = alen, j = blen; + while (computelcs && i > 0 && j > 0) { + if (a[i-1] == b[j-1]) { + /* If there is a match, store the character and reduce + * the indexes to look for a new match. */ + result[idx-1] = a[i-1]; + idx--; + i--; + j--; + } else { + /* Otherwise reduce i and j depending on the largest + * LCS between, to understand what direction we need to go. */ + uint32_t lcs1 = LCS(i-1,j); + uint32_t lcs2 = LCS(i,j-1); + if (lcs1 > lcs2) + i--; + else + j--; + } + } + + /* Signal modified key, increment dirty, ... */ + + /* Reply depending on the given options. */ + if (getlen) { + addReplyLongLong(c,LCS(alen,blen)); + } else { + addReplyBulkSds(c,result); + result = NULL; + } + + /* Cleanup. */ + sdsfree(result); + zfree(lcs); + return; +} + From b3400559be53ff77f7196c99d791d62d298875e9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 17:11:31 +0200 Subject: [PATCH 02/13] LCS: implement range indexes option. --- src/t_string.c | 68 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index e19647845..6c096a539 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -482,7 +482,7 @@ void strlenCommand(client *c) { /* LCS -- Longest common subsequence. * - * LCS [LEN] [IDX] [STOREIDX ] STRINGS */ + * LCS [IDX] [STOREIDX ] STRINGS */ void lcsCommand(client *c) { uint32_t i, j; sds a = NULL, b = NULL; @@ -495,12 +495,12 @@ void lcsCommand(client *c) { if (!strcasecmp(opt,"IDX")) { getidx = 1; + } else if (!strcasecmp(opt,"LEN")) { + getlen = 1; } else if (!strcasecmp(opt,"STOREIDX") && moreargs) { getidx = 1; idxkey = c->argv[j+1]; j++; - } else if (!strcasecmp(opt,"LEN")) { - getlen++; } else if (!strcasecmp(opt,"STRINGS")) { if (moreargs != 2) { addReplyError(c,"LCS requires exactly two strings"); @@ -515,10 +515,17 @@ void lcsCommand(client *c) { } } - /* Complain if the user didn't pass the STRING option. */ + /* Complain if the user passed ambiguous parameters. */ if (a == NULL) { addReplyError(c,"STRINGS is mandatory"); return; + } else if (getlen && (getidx && idxkey == NULL)) { + addReplyError(c, + "If you want both the LEN and indexes, please " + "store the indexes into a key with STOREIDX. However note " + "that the IDX output also includes both the LCS string and " + "its length"); + return; } /* Compute the LCS using the vanilla dynamic programming technique of @@ -559,21 +566,62 @@ void lcsCommand(client *c) { /* Store the actual LCS string in "result" if needed. We create * it backward, but the length is already known, we store it into idx. */ uint32_t idx = LCS(alen,blen); - sds result = NULL; + sds result = NULL; /* Resulting LCS string. */ + void *arraylenptr = NULL; /* Deffered length of the array for IDX. */ + uint32_t arange_start = alen, /* alen signals that values are not set. */ + arange_end = 0, + brange_start = 0, + brange_end = 0; /* Do we need to compute the actual LCS string? Allocate it in that case. */ int computelcs = getidx || !getlen; if (computelcs) result = sdsnewlen(SDS_NOINIT,idx); + /* Start with a deferred array if we have to emit the ranges. */ + uint32_t arraylen = 0; /* Number of ranges emitted in the array. */ + if (getidx && idxkey == NULL) + arraylenptr = addReplyDeferredLen(c); + i = alen, j = blen; while (computelcs && i > 0 && j > 0) { if (a[i-1] == b[j-1]) { /* If there is a match, store the character and reduce * the indexes to look for a new match. */ result[idx-1] = a[i-1]; - idx--; - i--; - j--; + /* Track the current range. */ + int emit_range = 0; + if (arange_start == alen) { + arange_start = i-1; + arange_end = i-1; + brange_start = j-1; + brange_end = j-1; + if (i == 0 || j == 0) emit_range = 1; + } else { + /* Let's see if we can extend the range backward since + * it is contiguous. */ + if (arange_start == i && brange_start == j) { + arange_start--; + brange_start--; + } else { + emit_range = 1; + } + } + + /* Emit the current range if needed. */ + if (emit_range) { + if (arraylenptr) { + addReplyArrayLen(c,2); + addReplyArrayLen(c,2); + addReplyLongLong(c,arange_start); + addReplyLongLong(c,arange_end); + addReplyArrayLen(c,2); + addReplyLongLong(c,brange_start); + addReplyLongLong(c,brange_end); + } + arange_start = alen; /* Restart at the next match. */ + arraylen++; + } + idx--; i--; j--; } else { /* Otherwise reduce i and j depending on the largest * LCS between, to understand what direction we need to go. */ @@ -589,7 +637,9 @@ void lcsCommand(client *c) { /* Signal modified key, increment dirty, ... */ /* Reply depending on the given options. */ - if (getlen) { + if (arraylenptr) { + setDeferredArrayLen(c,arraylenptr,arraylen); + } else if (getlen) { addReplyLongLong(c,LCS(alen,blen)); } else { addReplyBulkSds(c,result); From c9c03c3ee60b6a7a4918d4b2bbf40cfd21fcd284 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 17:14:36 +0200 Subject: [PATCH 03/13] LCS: fix emission of last range starting at index 0. --- src/t_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_string.c b/src/t_string.c index 6c096a539..caccd11f1 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -595,7 +595,6 @@ void lcsCommand(client *c) { arange_end = i-1; brange_start = j-1; brange_end = j-1; - if (i == 0 || j == 0) emit_range = 1; } else { /* Let's see if we can extend the range backward since * it is contiguous. */ @@ -606,6 +605,7 @@ void lcsCommand(client *c) { emit_range = 1; } } + if (arange_start == 0 || brange_start == 0) emit_range = 1; /* Emit the current range if needed. */ if (emit_range) { From 4cbf3f5dddc7c765269d8ce9eceb406ccde036d6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 17:36:32 +0200 Subject: [PATCH 04/13] LCS: other fixes to range emission. --- src/t_string.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index caccd11f1..30999ee70 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -584,12 +584,13 @@ void lcsCommand(client *c) { i = alen, j = blen; while (computelcs && i > 0 && j > 0) { + int emit_range = 0; if (a[i-1] == b[j-1]) { /* If there is a match, store the character and reduce * the indexes to look for a new match. */ result[idx-1] = a[i-1]; + /* Track the current range. */ - int emit_range = 0; if (arange_start == alen) { arange_start = i-1; arange_end = i-1; @@ -605,22 +606,9 @@ void lcsCommand(client *c) { emit_range = 1; } } + /* Emit the range if we matched with the first byte of + * one of the two strings. We'll exit the loop ASAP. */ if (arange_start == 0 || brange_start == 0) emit_range = 1; - - /* Emit the current range if needed. */ - if (emit_range) { - if (arraylenptr) { - addReplyArrayLen(c,2); - addReplyArrayLen(c,2); - addReplyLongLong(c,arange_start); - addReplyLongLong(c,arange_end); - addReplyArrayLen(c,2); - addReplyLongLong(c,brange_start); - addReplyLongLong(c,brange_end); - } - arange_start = alen; /* Restart at the next match. */ - arraylen++; - } idx--; i--; j--; } else { /* Otherwise reduce i and j depending on the largest @@ -631,6 +619,22 @@ void lcsCommand(client *c) { i--; else j--; + if (arange_start != alen) emit_range = 1; + } + + /* Emit the current range if needed. */ + if (emit_range) { + if (arraylenptr) { + addReplyArrayLen(c,2); + addReplyArrayLen(c,2); + addReplyLongLong(c,arange_start); + addReplyLongLong(c,arange_end); + addReplyArrayLen(c,2); + addReplyLongLong(c,brange_start); + addReplyLongLong(c,brange_end); + } + arange_start = alen; /* Restart at the next match. */ + arraylen++; } } From 8cdc15c3093a14f4a9af45cfae5679c67eda3fa0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 22:11:59 +0200 Subject: [PATCH 05/13] LCS: implement KEYS option. --- src/t_string.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index 30999ee70..0b159e6b3 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -482,12 +482,13 @@ void strlenCommand(client *c) { /* LCS -- Longest common subsequence. * - * LCS [IDX] [STOREIDX ] STRINGS */ + * LCS [IDX] [STOREIDX ] STRINGS | KEYS */ void lcsCommand(client *c) { uint32_t i, j; sds a = NULL, b = NULL; int getlen = 0, getidx = 0; robj *idxkey = NULL; /* STOREIDX will set this and getidx to 1. */ + robj *obja = NULL, *objb = NULL; for (j = 1; j < (uint32_t)c->argc; j++) { char *opt = c->argv[j]->ptr; @@ -509,6 +510,18 @@ void lcsCommand(client *c) { a = c->argv[j+1]->ptr; b = c->argv[j+2]->ptr; j += 2; + } else if (!strcasecmp(opt,"KEYS")) { + if (moreargs != 2) { + addReplyError(c,"LCS requires exactly two keys"); + return; + } + obja = lookupKeyRead(c->db,c->argv[j+1]); + objb = lookupKeyRead(c->db,c->argv[j+2]); + obja = obja ? getDecodedObject(obja) : createStringObject("",0); + objb = objb ? getDecodedObject(objb) : createStringObject("",0); + a = obja->ptr; + b = objb->ptr; + j += 2; } else { addReply(c,shared.syntaxerr); return; @@ -517,7 +530,8 @@ void lcsCommand(client *c) { /* Complain if the user passed ambiguous parameters. */ if (a == NULL) { - addReplyError(c,"STRINGS is mandatory"); + addReplyError(c,"Please specify two strings: " + "STRINGS or KEYS options are mandatory"); return; } else if (getlen && (getidx && idxkey == NULL)) { addReplyError(c, @@ -651,6 +665,8 @@ void lcsCommand(client *c) { } /* Cleanup. */ + if (obja) decrRefCount(obja); + if (objb) decrRefCount(objb); sdsfree(result); zfree(lcs); return; From 88e66ecf946f720db76b06dfb1a27834d16b7d61 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 1 Apr 2020 23:45:07 +0200 Subject: [PATCH 06/13] LCS: 7x speedup by accessing the array with better locality. --- src/t_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_string.c b/src/t_string.c index 0b159e6b3..b715e144a 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -551,7 +551,7 @@ void lcsCommand(client *c) { * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so * we index it as LCS[i+alen*j] */ uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t)); - #define LCS(A,B) lcs[(A)+((B)*(alen+1))] + #define LCS(A,B) lcs[(B)+((A)*(blen+1))] /* Start building the LCS table. */ for (uint32_t i = 0; i <= alen; i++) { From 3f96e1623d5270714dc94c75ca8d5daab336912c Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 2 Apr 2020 13:37:35 +0200 Subject: [PATCH 07/13] LCS: MINMATCHLEN and WITHMATCHLEN options. --- src/t_string.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index b715e144a..6b30c9751 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -482,11 +482,13 @@ void strlenCommand(client *c) { /* LCS -- Longest common subsequence. * - * LCS [IDX] [STOREIDX ] STRINGS | KEYS */ + * LCS [IDX] [STOREIDX ] [MINMATCHLEN ] + * STRINGS | KEYS */ void lcsCommand(client *c) { uint32_t i, j; + long long minmatchlen = 0; sds a = NULL, b = NULL; - int getlen = 0, getidx = 0; + int getlen = 0, getidx = 0, withmatchlen = 0; robj *idxkey = NULL; /* STOREIDX will set this and getidx to 1. */ robj *obja = NULL, *objb = NULL; @@ -498,10 +500,17 @@ void lcsCommand(client *c) { getidx = 1; } else if (!strcasecmp(opt,"LEN")) { getlen = 1; + } else if (!strcasecmp(opt,"WITHMATCHLEN")) { + withmatchlen = 1; } else if (!strcasecmp(opt,"STOREIDX") && moreargs) { getidx = 1; idxkey = c->argv[j+1]; j++; + } else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) { + if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL) + != C_OK) return; + if (minmatchlen < 0) minmatchlen = 0; + j++; } else if (!strcasecmp(opt,"STRINGS")) { if (moreargs != 2) { addReplyError(c,"LCS requires exactly two strings"); @@ -637,18 +646,22 @@ void lcsCommand(client *c) { } /* Emit the current range if needed. */ + uint32_t match_len = arange_end - arange_start + 1; if (emit_range) { - if (arraylenptr) { - addReplyArrayLen(c,2); - addReplyArrayLen(c,2); - addReplyLongLong(c,arange_start); - addReplyLongLong(c,arange_end); - addReplyArrayLen(c,2); - addReplyLongLong(c,brange_start); - addReplyLongLong(c,brange_end); + if (minmatchlen == 0 || match_len >= minmatchlen) { + if (arraylenptr) { + addReplyArrayLen(c,2+withmatchlen); + addReplyArrayLen(c,2); + addReplyLongLong(c,arange_start); + addReplyLongLong(c,arange_end); + addReplyArrayLen(c,2); + addReplyLongLong(c,brange_start); + addReplyLongLong(c,brange_end); + if (withmatchlen) addReplyLongLong(c,match_len); + arraylen++; + } } arange_start = alen; /* Restart at the next match. */ - arraylen++; } } From 5a3d85745ad5abe8b7e9ae11c062a0a1b4220387 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 2 Apr 2020 16:15:17 +0200 Subject: [PATCH 08/13] LCS: output LCS len as well in IDX mode. --- src/t_string.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/t_string.c b/src/t_string.c index 6b30c9751..fb53f7c54 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -602,8 +602,11 @@ void lcsCommand(client *c) { /* Start with a deferred array if we have to emit the ranges. */ uint32_t arraylen = 0; /* Number of ranges emitted in the array. */ - if (getidx && idxkey == NULL) + if (getidx && idxkey == NULL) { + addReplyMapLen(c,2); + addReplyBulkCString(c,"matches"); arraylenptr = addReplyDeferredLen(c); + } i = alen, j = blen; while (computelcs && i > 0 && j > 0) { @@ -669,6 +672,8 @@ void lcsCommand(client *c) { /* Reply depending on the given options. */ if (arraylenptr) { + addReplyBulkCString(c,"len"); + addReplyLongLong(c,LCS(alen,blen)); setDeferredArrayLen(c,arraylenptr,arraylen); } else if (getlen) { addReplyLongLong(c,LCS(alen,blen)); From ef610802c7262f60c3418ba7e19524636a5c0c8e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 2 Apr 2020 21:17:31 +0200 Subject: [PATCH 09/13] LCS: fix stale comment. --- src/t_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_string.c b/src/t_string.c index fb53f7c54..80cdbb5e1 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -558,7 +558,7 @@ void lcsCommand(client *c) { /* Setup an uint32_t array to store at LCS[i,j] the length of the * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so - * we index it as LCS[i+alen*j] */ + * we index it as LCS[j+(blen+1)*j] */ uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t)); #define LCS(A,B) lcs[(B)+((A)*(blen+1))] From 7261a5550fc760bf2cfe7365ecb793e728bb6950 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Apr 2020 13:23:07 +0200 Subject: [PATCH 10/13] LCS: get rid of STOREIDX option. Fix get keys helper. --- src/db.c | 14 +++++++------- src/t_string.c | 27 +++++++++++++-------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/db.c b/src/db.c index bfa39564e..e9e10aeeb 100644 --- a/src/db.c +++ b/src/db.c @@ -1554,30 +1554,30 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk return keys; } -/* LCS ... [STOREIDX ] ... */ +/* LCS ... [KEYS ] ... */ int *lcsGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { int i; - int *keys; + int *keys = getKeysTempBuffer; UNUSED(cmd); /* We need to parse the options of the command in order to check for the - * "STOREIDX" argument before the STRINGS argument. */ + * "KEYS" argument before the "STRINGS" argument. */ for (i = 1; i < argc; i++) { char *arg = argv[i]->ptr; int moreargs = (argc-1) - i; if (!strcasecmp(arg, "strings")) { break; - } else if (!strcasecmp(arg, "storeidx") && moreargs) { - keys = getKeysTempBuffer; + } else if (!strcasecmp(arg, "keys") && moreargs >= 2) { keys[0] = i+1; - *numkeys = 1; + keys[1] = i+2; + *numkeys = 2; return keys; } } *numkeys = 0; - return NULL; + return keys; } /* Helper function to extract keys from memory command. diff --git a/src/t_string.c b/src/t_string.c index 80cdbb5e1..deba22253 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -482,14 +482,13 @@ void strlenCommand(client *c) { /* LCS -- Longest common subsequence. * - * LCS [IDX] [STOREIDX ] [MINMATCHLEN ] + * LCS [IDX] [MINMATCHLEN ] * STRINGS | KEYS */ void lcsCommand(client *c) { uint32_t i, j; long long minmatchlen = 0; sds a = NULL, b = NULL; int getlen = 0, getidx = 0, withmatchlen = 0; - robj *idxkey = NULL; /* STOREIDX will set this and getidx to 1. */ robj *obja = NULL, *objb = NULL; for (j = 1; j < (uint32_t)c->argc; j++) { @@ -502,17 +501,16 @@ void lcsCommand(client *c) { getlen = 1; } else if (!strcasecmp(opt,"WITHMATCHLEN")) { withmatchlen = 1; - } else if (!strcasecmp(opt,"STOREIDX") && moreargs) { - getidx = 1; - idxkey = c->argv[j+1]; - j++; } else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) { if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL) != C_OK) return; if (minmatchlen < 0) minmatchlen = 0; j++; } else if (!strcasecmp(opt,"STRINGS")) { - if (moreargs != 2) { + if (a != NULL) { + addReplyError(c,"Either use STRINGS or KEYS"); + return; + } else if (moreargs != 2) { addReplyError(c,"LCS requires exactly two strings"); return; } @@ -520,7 +518,10 @@ void lcsCommand(client *c) { b = c->argv[j+2]->ptr; j += 2; } else if (!strcasecmp(opt,"KEYS")) { - if (moreargs != 2) { + if (a != NULL) { + addReplyError(c,"Either use STRINGS or KEYS"); + return; + } else if (moreargs != 2) { addReplyError(c,"LCS requires exactly two keys"); return; } @@ -542,12 +543,10 @@ void lcsCommand(client *c) { addReplyError(c,"Please specify two strings: " "STRINGS or KEYS options are mandatory"); return; - } else if (getlen && (getidx && idxkey == NULL)) { + } else if (getlen && getidx) { addReplyError(c, - "If you want both the LEN and indexes, please " - "store the indexes into a key with STOREIDX. However note " - "that the IDX output also includes both the LCS string and " - "its length"); + "If you want both the length and indexes, please " + "just use IDX."); return; } @@ -602,7 +601,7 @@ void lcsCommand(client *c) { /* Start with a deferred array if we have to emit the ranges. */ uint32_t arraylen = 0; /* Number of ranges emitted in the array. */ - if (getidx && idxkey == NULL) { + if (getidx) { addReplyMapLen(c,2); addReplyBulkCString(c,"matches"); arraylenptr = addReplyDeferredLen(c); From 8dc28b6c7525190f3205969427614d51d47ac451 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Apr 2020 13:45:37 +0200 Subject: [PATCH 11/13] LCS tests. --- tests/unit/type/string.tcl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index 7122fd987..131a80ad0 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -419,4 +419,26 @@ start_server {tags {"string"}} { r set foo bar r getrange foo 0 4294967297 } {bar} + + set rna1 {CACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTG} + set rna2 {ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT} + set rnalcs {ACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT} + + test {LCS string output with STRINGS option} { + r LCS STRINGS $rna1 $rna2 + } $rnalcs + + test {LCS len} { + r LCS LEN STRINGS $rna1 $rna2 + } [string length $rnalcs] + + test {LCS with KEYS option} { + r set virus1 $rna1 + r set virus2 $rna2 + r LCS KEYS virus1 virus2 + } $rnalcs + + test {LCS indexes} { + dict get [r LCS IDX KEYS virus1 virus2] matches + } {{{238 238} {239 239}} {{236 236} {238 238}} {{229 230} {236 237}} {{224 224} {235 235}} {{1 222} {13 234}}} } From ca8d6f1072e15fcb12827c1ca84b388c41e394b3 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Apr 2020 13:48:31 +0200 Subject: [PATCH 12/13] LCS: allow KEYS / STRINGS to be anywhere. Initially they needed to be at the end so that we could extend to N strings in the future, but after further consideration I no longer believe it's worth it. --- src/t_string.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index deba22253..4e693cefa 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -510,9 +510,6 @@ void lcsCommand(client *c) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; - } else if (moreargs != 2) { - addReplyError(c,"LCS requires exactly two strings"); - return; } a = c->argv[j+1]->ptr; b = c->argv[j+2]->ptr; @@ -521,9 +518,6 @@ void lcsCommand(client *c) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; - } else if (moreargs != 2) { - addReplyError(c,"LCS requires exactly two keys"); - return; } obja = lookupKeyRead(c->db,c->argv[j+1]); objb = lookupKeyRead(c->db,c->argv[j+2]); From af3c722feccb2d002e57f02ee5ef623f86e7ea2f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 6 Apr 2020 13:51:49 +0200 Subject: [PATCH 13/13] LCS: more tests. --- tests/unit/type/string.tcl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index 131a80ad0..b9ef9de7a 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -441,4 +441,12 @@ start_server {tags {"string"}} { test {LCS indexes} { dict get [r LCS IDX KEYS virus1 virus2] matches } {{{238 238} {239 239}} {{236 236} {238 238}} {{229 230} {236 237}} {{224 224} {235 235}} {{1 222} {13 234}}} + + test {LCS indexes with match len} { + dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN] matches + } {{{238 238} {239 239} 1} {{236 236} {238 238} 1} {{229 230} {236 237} 2} {{224 224} {235 235} 1} {{1 222} {13 234} 222}} + + test {LCS indexes with match len and minimum match len} { + dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN MINMATCHLEN 5] matches + } {{{1 222} {13 234} 222}} }