Add any character (.) to regex

This commit is contained in:
miloyip 2015-05-25 22:51:03 +08:00
parent 3c9ceb32a5
commit 06853b89b0
2 changed files with 28 additions and 5 deletions

View File

@ -76,7 +76,7 @@ public:
next->Clear();
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
const State& sr = GetState(*s);
if (sr.codepoint == codepoint)
if (sr.codepoint == kAnyCharacterClass || sr.codepoint == codepoint)
AddState(stateSet, *next, sr.out);
}
Stack<Allocator>* temp = current;
@ -108,6 +108,8 @@ private:
kLeftParenthesis
};
static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
struct State {
SizeType out; //!< Equals to kInvalid for matching state
SizeType out1; //!< Equals to non-kInvalid for split
@ -168,6 +170,11 @@ private:
}
}
void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
*operandStack.template Push<Frag>() = Frag(s, s);
}
template <typename InputStream>
void Parse(InputStream& is) {
Allocator allocator;
@ -219,9 +226,13 @@ private:
return;
break;
case '.':
PushOperand(operandStack, kAnyCharacterClass);
ImplicitConcatenation(atomCountStack, operatorStack);
break;
default:
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
*operandStack.template Push<Frag>() = Frag(s, s);
PushOperand(operandStack, codepoint);
ImplicitConcatenation(atomCountStack, operatorStack);
}
}

View File

@ -220,13 +220,25 @@ TEST(Regex, OneOrMore4) {
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, Unicode) {
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
TEST(Regex, Unicode) {
Regex re("a" EURO "+b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a" EURO "b"));
EXPECT_TRUE(re.Match("a" EURO EURO "b"));
EXPECT_FALSE(re.Match("a?b"));
EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match
#undef EURO
}
TEST(Regex, AnyCharacter) {
Regex re(".");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match(EURO));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("aa"));
}
#undef EURO