Add numbered quantifier
This commit is contained in:
parent
51bb7631f4
commit
fa7dc1c439
@ -39,6 +39,9 @@ static const SizeType kRegexInvalidRange = ~SizeType(0);
|
||||
- \c a? Zero or one
|
||||
- \c a* Zero or more
|
||||
- \c a+ One or more
|
||||
- \c a{3} Exactly 3 times
|
||||
- \c a{3,} At least 3 times
|
||||
- \c a{3,5} 3 to 5 times
|
||||
- \c (ab)* Grouping
|
||||
- \c . Any character
|
||||
- \c [abc] Character classes
|
||||
@ -266,6 +269,28 @@ private:
|
||||
return;
|
||||
break;
|
||||
|
||||
case '{':
|
||||
{
|
||||
unsigned n, m;
|
||||
if (!ParseUnsigned(ds, &n) || n == 0)
|
||||
return;
|
||||
|
||||
if (ds.Peek() == ',') {
|
||||
ds.Take();
|
||||
if (ds.Peek() == '}')
|
||||
m = 0;
|
||||
else if (!ParseUnsigned(ds, &m) || m < n)
|
||||
return;
|
||||
}
|
||||
else
|
||||
m = n;
|
||||
|
||||
if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
|
||||
return;
|
||||
ds.Take();
|
||||
}
|
||||
break;
|
||||
|
||||
case '.':
|
||||
PushOperand(operandStack, kAnyCharacterClass);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
@ -406,6 +431,71 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
|
||||
RAPIDJSON_ASSERT(n > 0);
|
||||
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
|
||||
if (operandStack.GetSize() < sizeof(Frag))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
|
||||
CloneTopOperand(operandStack);
|
||||
|
||||
if (m == 0)
|
||||
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
|
||||
else if (m > n) {
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a
|
||||
Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
|
||||
for (unsigned i = n; i < m - 1; i++)
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
|
||||
for (unsigned i = n; i < m; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
|
||||
|
||||
SizeType GetMinStateIndex(SizeType index) {
|
||||
State& s = GetState(index);
|
||||
if (s.out != kRegexInvalidState && s.out < index)
|
||||
index = Min(index, GetMinStateIndex(s.out));
|
||||
if (s.out1 != kRegexInvalidState && s.out1 < index)
|
||||
index = Min(index, GetMinStateIndex(s.out1));
|
||||
return index;
|
||||
}
|
||||
|
||||
void CloneTopOperand(Stack<Allocator>& operandStack) {
|
||||
const Frag *src = operandStack.template Top<Frag>();
|
||||
SizeType minIndex = GetMinStateIndex(src->start);
|
||||
SizeType count = stateCount_ - minIndex; // Assumes top operand contains states in [min, stateCount_)
|
||||
State* s = states_.template Push<State>(count);
|
||||
memcpy(s, &GetState(minIndex), count * sizeof(State));
|
||||
for (SizeType j = 0; j < count; j++) {
|
||||
if (s[j].out != kRegexInvalidState)
|
||||
s[j].out += count;
|
||||
if (s[j].out1 != kRegexInvalidState)
|
||||
s[j].out1 += count;
|
||||
}
|
||||
*operandStack.template Push<Frag>() = Frag(src->start + count, src->out + count);
|
||||
stateCount_ += count;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
|
||||
unsigned r = 0;
|
||||
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
|
||||
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
|
||||
return false; // overflow
|
||||
r = r * 10 + (ds.Take() - '0');
|
||||
}
|
||||
*u = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseRange(DecodedStream<InputStream>& ds, SizeType* range) {
|
||||
bool isBegin = true;
|
||||
@ -495,6 +585,8 @@ private:
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '\\':
|
||||
*escapedCodepoint = codepoint; return true;
|
||||
case 'f': *escapedCodepoint = 0x000C; return true;
|
||||
|
@ -220,6 +220,111 @@ TEST(Regex, OneOrMore4) {
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact1) {
|
||||
Regex re("ab{3}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact2) {
|
||||
Regex re("a(bc){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact3) {
|
||||
Regex re("a(b|c){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin1) {
|
||||
Regex re("ab{3,}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin2) {
|
||||
Regex re("a(bc){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin3) {
|
||||
Regex re("a(b|c){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax1) {
|
||||
Regex re("ab{3,5}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax2) {
|
||||
Regex re("a(bc){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax3) {
|
||||
Regex re("a(b|c){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_TRUE(re.Match("acccccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbd"));
|
||||
}
|
||||
|
||||
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
|
||||
|
||||
TEST(Regex, Unicode) {
|
||||
@ -328,10 +433,10 @@ TEST(Regex, CharacterRange8) {
|
||||
}
|
||||
|
||||
TEST(Regex, Escape) {
|
||||
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\{\\}\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||
Regex re(s);
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B\b[]"));
|
||||
EXPECT_TRUE(re.Match("|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]"));
|
||||
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user