diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h index 6c1047d..8efca0a 100644 --- a/include/rapidjson/internal/regex.h +++ b/include/rapidjson/internal/regex.h @@ -256,13 +256,13 @@ private: case '{': { unsigned n, m; - if (!ParseUnsigned(ds, &n) || n == 0) + if (!ParseUnsigned(ds, &n)) return; if (ds.Peek() == ',') { ds.Take(); if (ds.Peek() == '}') - m = 0; + m = kInfinityQuantifier; else if (!ParseUnsigned(ds, &m) || m < n) return; } @@ -424,15 +424,29 @@ private: } bool EvalQuantifier(Stack& operandStack, unsigned n, unsigned m) { - RAPIDJSON_ASSERT(n > 0); - RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity + RAPIDJSON_ASSERT(n <= m); if (operandStack.GetSize() < sizeof(Frag)) return false; + if (n == 0) { + if (m == 0) // a{0} not support + return false; + else if (m == kInfinityQuantifier) + Eval(operandStack, kZeroOrMore); // a{0,} -> a* + else { + Eval(operandStack, kZeroOrOne); // a{0,5} -> a? + for (unsigned i = 0; i < m - 1; i++) + CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a? + for (unsigned i = 0; i < m - 1; i++) + Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a? + } + return true; + } + for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a CloneTopOperand(operandStack); - if (m == 0) + if (m == kInfinityQuantifier) Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ else if (m > n) { CloneTopOperand(operandStack); // a{3,5} -> a a a a @@ -469,6 +483,8 @@ private: template bool ParseUnsigned(DecodedStream& ds, unsigned* u) { unsigned r = 0; + if (ds.Peek() < '0' || ds.Peek() > '9') + return false; while (ds.Peek() >= '0' && ds.Peek() <= '9') { if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 return false; // overflow @@ -658,6 +674,8 @@ private: SizeType stateCount_; SizeType rangeCount_; + static const unsigned kInfinityQuantifier = ~0u; + // For SearchWithAnchoring() uint32_t* stateSet_; // allocated by states_.GetAllocator() mutable Stack state0_; diff --git a/test/unittest/regextest.cpp b/test/unittest/regextest.cpp index 65105fa..e3371d1 100644 --- a/test/unittest/regextest.cpp +++ b/test/unittest/regextest.cpp @@ -325,6 +325,43 @@ TEST(Regex, QuantifierMinMax3) { EXPECT_FALSE(re.Match("abbbbbbd")); } +// Issue538 +TEST(Regex, QuantifierMinMax4) { + Regex re("a(b|c){0,3}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ad")); + EXPECT_TRUE(re.Match("abd")); + EXPECT_TRUE(re.Match("acd")); + EXPECT_TRUE(re.Match("abbd")); + EXPECT_TRUE(re.Match("accd")); + EXPECT_TRUE(re.Match("abcd")); + EXPECT_TRUE(re.Match("abbbd")); + EXPECT_TRUE(re.Match("acccd")); + EXPECT_FALSE(re.Match("abbbbd")); + EXPECT_FALSE(re.Match("add")); + EXPECT_FALSE(re.Match("accccd")); + EXPECT_FALSE(re.Match("abcbcd")); +} + +// Issue538 +TEST(Regex, QuantifierMinMax5) { + Regex re("a(b|c){0,}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ad")); + EXPECT_TRUE(re.Match("abd")); + EXPECT_TRUE(re.Match("acd")); + EXPECT_TRUE(re.Match("abbd")); + EXPECT_TRUE(re.Match("accd")); + EXPECT_TRUE(re.Match("abcd")); + EXPECT_TRUE(re.Match("abbbd")); + EXPECT_TRUE(re.Match("acccd")); + EXPECT_TRUE(re.Match("abbbbd")); + EXPECT_TRUE(re.Match("accccd")); + EXPECT_TRUE(re.Match("abcbcd")); + EXPECT_FALSE(re.Match("add")); + EXPECT_FALSE(re.Match("aad")); +} + #define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC TEST(Regex, Unicode) { @@ -501,6 +538,7 @@ TEST(Regex, Invalid) { EXPECT_FALSE(re.IsValid());\ } + TEST_INVALID(""); TEST_INVALID("a|"); TEST_INVALID("()"); TEST_INVALID(")"); @@ -517,7 +555,7 @@ TEST(Regex, Invalid) { TEST_INVALID("a{0}"); TEST_INVALID("a{-1}"); TEST_INVALID("a{}"); - TEST_INVALID("a{0,}"); + // TEST_INVALID("a{0,}"); // Support now TEST_INVALID("a{,0}"); TEST_INVALID("a{1,0}"); TEST_INVALID("a{-1,0}"); @@ -530,4 +568,9 @@ TEST(Regex, Invalid) { #undef TEST_INVALID } +TEST(Regex, Issue538) { + Regex re("^[0-9]+(\\\\.[0-9]+){0,2}"); + EXPECT_TRUE(re.IsValid()); +} + #undef EURO