Add ?*+ to regex
This commit is contained in:
parent
05c79891d1
commit
a386934288
@ -54,11 +54,12 @@ public:
|
|||||||
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
|
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
|
||||||
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
|
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
|
||||||
std::memset(stateSet, 0, stateSetSize);
|
std::memset(stateSet, 0, stateSetSize);
|
||||||
|
|
||||||
AddState(stateSet, *current, root_);
|
AddState(stateSet, *current, root_);
|
||||||
|
|
||||||
unsigned codepoint;
|
unsigned codepoint;
|
||||||
while (!current->Empty() && Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
while (!current->Empty() && Encoding::Decode(is, &codepoint) && codepoint != 0) {
|
||||||
|
std::memset(stateSet, 0, stateSetSize);
|
||||||
|
next->Clear();
|
||||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||||
const State& sr = GetState(*s);
|
const State& sr = GetState(*s);
|
||||||
// if (sr.out != kRegexInvalidState)
|
// if (sr.out != kRegexInvalidState)
|
||||||
@ -70,8 +71,6 @@ public:
|
|||||||
Stack<Allocator>* temp = current;
|
Stack<Allocator>* temp = current;
|
||||||
current = next;
|
current = next;
|
||||||
next = temp;
|
next = temp;
|
||||||
std::memset(stateSet, 0, stateSetSize);
|
|
||||||
next->Clear();
|
|
||||||
// printf("\n");
|
// printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,9 +90,12 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
enum Operator {
|
enum Operator {
|
||||||
|
kZeroOrOne,
|
||||||
|
kZeroOrMore,
|
||||||
|
kOneOrMore,
|
||||||
kConcatenation,
|
kConcatenation,
|
||||||
kAlternation,
|
kAlternation,
|
||||||
kLeftParenthesis,
|
kLeftParenthesis
|
||||||
};
|
};
|
||||||
|
|
||||||
struct State {
|
struct State {
|
||||||
@ -193,6 +195,24 @@ private:
|
|||||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case '?':
|
||||||
|
*operatorStack.template Push<Operator>() = kZeroOrOne;
|
||||||
|
if (!Eval(operandStack, operatorStack))
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '*':
|
||||||
|
*operatorStack.template Push<Operator>() = kZeroOrMore;
|
||||||
|
if (!Eval(operandStack, operatorStack))
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '+':
|
||||||
|
*operatorStack.template Push<Operator>() = kOneOrMore;
|
||||||
|
if (!Eval(operandStack, operatorStack))
|
||||||
|
return;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||||
@ -209,16 +229,19 @@ private:
|
|||||||
Frag* e = operandStack.template Pop<Frag>(1);
|
Frag* e = operandStack.template Pop<Frag>(1);
|
||||||
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
||||||
root_ = e->start;
|
root_ = e->start;
|
||||||
// printf("root: %d\n", root_);
|
#if 0
|
||||||
// for (SizeType i = 0; i < stateCount_ ; i++) {
|
printf("root: %d\n", root_);
|
||||||
// State& s = GetState(i);
|
for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||||
// printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
State& s = GetState(i);
|
||||||
// }
|
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||||
// printf("\n");
|
}
|
||||||
|
printf("\n");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
|
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
|
||||||
|
// printf("Eval %c\n", "?*+.|("[*operatorStack.template Top<Operator>()]);
|
||||||
switch (*operatorStack.template Pop<Operator>(1)) {
|
switch (*operatorStack.template Pop<Operator>(1)) {
|
||||||
case kConcatenation:
|
case kConcatenation:
|
||||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||||
@ -240,6 +263,35 @@ private:
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
case kZeroOrOne:
|
||||||
|
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||||
|
Frag e = *operandStack.template Pop<Frag>(1);
|
||||||
|
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||||
|
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case kZeroOrMore:
|
||||||
|
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||||
|
Frag e = *operandStack.template Pop<Frag>(1);
|
||||||
|
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||||
|
Patch(e.out, s);
|
||||||
|
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case kOneOrMore:
|
||||||
|
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||||
|
Frag e = *operandStack.template Pop<Frag>(1);
|
||||||
|
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||||
|
Patch(e.out, s);
|
||||||
|
*operandStack.template Push<Frag>() = Frag(e.start, s);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
using namespace rapidjson::internal;
|
using namespace rapidjson::internal;
|
||||||
|
|
||||||
TEST(Regex, concatenation) {
|
TEST(Regex, Concatenation) {
|
||||||
Regex re("abc");
|
Regex re("abc");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("abc"));
|
EXPECT_TRUE(re.Match("abc"));
|
||||||
@ -28,7 +28,7 @@ TEST(Regex, concatenation) {
|
|||||||
EXPECT_FALSE(re.Match("abcd"));
|
EXPECT_FALSE(re.Match("abcd"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, split1) {
|
TEST(Regex, Alternation1) {
|
||||||
Regex re("abab|abbb");
|
Regex re("abab|abbb");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("abab"));
|
EXPECT_TRUE(re.Match("abab"));
|
||||||
@ -40,7 +40,7 @@ TEST(Regex, split1) {
|
|||||||
EXPECT_FALSE(re.Match("abbbb"));
|
EXPECT_FALSE(re.Match("abbbb"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, split2) {
|
TEST(Regex, Alternation2) {
|
||||||
Regex re("a|b|c");
|
Regex re("a|b|c");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("a"));
|
EXPECT_TRUE(re.Match("a"));
|
||||||
@ -51,7 +51,7 @@ TEST(Regex, split2) {
|
|||||||
EXPECT_FALSE(re.Match("ab"));
|
EXPECT_FALSE(re.Match("ab"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, parenthesis1) {
|
TEST(Regex, Parenthesis1) {
|
||||||
Regex re("(ab)c");
|
Regex re("(ab)c");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("abc"));
|
EXPECT_TRUE(re.Match("abc"));
|
||||||
@ -62,7 +62,7 @@ TEST(Regex, parenthesis1) {
|
|||||||
EXPECT_FALSE(re.Match("abcd"));
|
EXPECT_FALSE(re.Match("abcd"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, parenthesis2) {
|
TEST(Regex, Parenthesis2) {
|
||||||
Regex re("a(bc)");
|
Regex re("a(bc)");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("abc"));
|
EXPECT_TRUE(re.Match("abc"));
|
||||||
@ -73,7 +73,7 @@ TEST(Regex, parenthesis2) {
|
|||||||
EXPECT_FALSE(re.Match("abcd"));
|
EXPECT_FALSE(re.Match("abcd"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Regex, parenthesis3) {
|
TEST(Regex, Parenthesis3) {
|
||||||
Regex re("(a|b)(c|d)");
|
Regex re("(a|b)(c|d)");
|
||||||
ASSERT_TRUE(re.IsValid());
|
ASSERT_TRUE(re.IsValid());
|
||||||
EXPECT_TRUE(re.Match("ac"));
|
EXPECT_TRUE(re.Match("ac"));
|
||||||
@ -84,3 +84,138 @@ TEST(Regex, parenthesis3) {
|
|||||||
EXPECT_FALSE(re.Match("ab"));
|
EXPECT_FALSE(re.Match("ab"));
|
||||||
EXPECT_FALSE(re.Match("cd"));
|
EXPECT_FALSE(re.Match("cd"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrOne1) {
|
||||||
|
Regex re("a?");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match(""));
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_FALSE(re.Match("aa"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrOne2) {
|
||||||
|
Regex re("a?b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("b"));
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_FALSE(re.Match("a"));
|
||||||
|
EXPECT_FALSE(re.Match("aa"));
|
||||||
|
EXPECT_FALSE(re.Match("bb"));
|
||||||
|
EXPECT_FALSE(re.Match("ba"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrOne3) {
|
||||||
|
Regex re("ab?");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_FALSE(re.Match("b"));
|
||||||
|
EXPECT_FALSE(re.Match("aa"));
|
||||||
|
EXPECT_FALSE(re.Match("bb"));
|
||||||
|
EXPECT_FALSE(re.Match("ba"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrOne4) {
|
||||||
|
Regex re("a?b?");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match(""));
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_TRUE(re.Match("b"));
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_FALSE(re.Match("aa"));
|
||||||
|
EXPECT_FALSE(re.Match("bb"));
|
||||||
|
EXPECT_FALSE(re.Match("ba"));
|
||||||
|
EXPECT_FALSE(re.Match("abc"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrOne5) {
|
||||||
|
Regex re("a(ab)?b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aabb"));
|
||||||
|
EXPECT_FALSE(re.Match("aab"));
|
||||||
|
EXPECT_FALSE(re.Match("abb"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrMore1) {
|
||||||
|
Regex re("a*");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match(""));
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_TRUE(re.Match("aa"));
|
||||||
|
EXPECT_FALSE(re.Match("b"));
|
||||||
|
EXPECT_FALSE(re.Match("ab"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrMore2) {
|
||||||
|
Regex re("a*b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("b"));
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aab"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("bb"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrMore3) {
|
||||||
|
Regex re("a*b*");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match(""));
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_TRUE(re.Match("aa"));
|
||||||
|
EXPECT_TRUE(re.Match("b"));
|
||||||
|
EXPECT_TRUE(re.Match("bb"));
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aabb"));
|
||||||
|
EXPECT_FALSE(re.Match("ba"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, ZeroOrMore4) {
|
||||||
|
Regex re("a(ab)*b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aabb"));
|
||||||
|
EXPECT_TRUE(re.Match("aababb"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("aa"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, OneOrMore1) {
|
||||||
|
Regex re("a+");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("a"));
|
||||||
|
EXPECT_TRUE(re.Match("aa"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("b"));
|
||||||
|
EXPECT_FALSE(re.Match("ab"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, OneOrMore2) {
|
||||||
|
Regex re("a+b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aab"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("b"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, OneOrMore3) {
|
||||||
|
Regex re("a+b+");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("ab"));
|
||||||
|
EXPECT_TRUE(re.Match("aab"));
|
||||||
|
EXPECT_TRUE(re.Match("abb"));
|
||||||
|
EXPECT_TRUE(re.Match("aabb"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("b"));
|
||||||
|
EXPECT_FALSE(re.Match("ba"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Regex, OneOrMore4) {
|
||||||
|
Regex re("a(ab)+b");
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("aabb"));
|
||||||
|
EXPECT_TRUE(re.Match("aababb"));
|
||||||
|
EXPECT_FALSE(re.Match(""));
|
||||||
|
EXPECT_FALSE(re.Match("ab"));
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user