Add any character (.) to regex
This commit is contained in:
parent
3c9ceb32a5
commit
06853b89b0
@ -76,7 +76,7 @@ public:
|
||||
next->Clear();
|
||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||
const State& sr = GetState(*s);
|
||||
if (sr.codepoint == codepoint)
|
||||
if (sr.codepoint == kAnyCharacterClass || sr.codepoint == codepoint)
|
||||
AddState(stateSet, *next, sr.out);
|
||||
}
|
||||
Stack<Allocator>* temp = current;
|
||||
@ -108,6 +108,8 @@ private:
|
||||
kLeftParenthesis
|
||||
};
|
||||
|
||||
static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
|
||||
|
||||
struct State {
|
||||
SizeType out; //!< Equals to kInvalid for matching state
|
||||
SizeType out1; //!< Equals to non-kInvalid for split
|
||||
@ -168,6 +170,11 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
void Parse(InputStream& is) {
|
||||
Allocator allocator;
|
||||
@ -219,9 +226,13 @@ private:
|
||||
return;
|
||||
break;
|
||||
|
||||
case '.':
|
||||
PushOperand(operandStack, kAnyCharacterClass);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
default:
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s);
|
||||
PushOperand(operandStack, codepoint);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
}
|
||||
}
|
||||
|
@ -220,13 +220,25 @@ TEST(Regex, OneOrMore4) {
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, Unicode) {
|
||||
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
|
||||
|
||||
TEST(Regex, Unicode) {
|
||||
Regex re("a" EURO "+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a" EURO "b"));
|
||||
EXPECT_TRUE(re.Match("a" EURO EURO "b"));
|
||||
EXPECT_FALSE(re.Match("a?b"));
|
||||
EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match
|
||||
#undef EURO
|
||||
}
|
||||
|
||||
TEST(Regex, AnyCharacter) {
|
||||
Regex re(".");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match(EURO));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
#undef EURO
|
||||
|
Loading…
x
Reference in New Issue
Block a user