Add escape characters and control characters
This commit is contained in:
parent
1784afe5f7
commit
92285bed44
@ -46,6 +46,12 @@ static const SizeType kRegexInvalidRange = ~SizeType(0);
|
|||||||
- \c [a-z0-9_] Character class combination
|
- \c [a-z0-9_] Character class combination
|
||||||
- \c [^abc] Negated character classes
|
- \c [^abc] Negated character classes
|
||||||
- \c [^a-c] Negated character class range
|
- \c [^a-c] Negated character class range
|
||||||
|
- \c \\| \\\\ ... Escape characters
|
||||||
|
- \c \\f Form feed (U+000C)
|
||||||
|
- \c \\n Line feed (U+000A)
|
||||||
|
- \c \\r Carriage return (U+000D)
|
||||||
|
- \c \\t Tab (U+0009)
|
||||||
|
- \c \\v Vertical tab (U+000B)
|
||||||
*/
|
*/
|
||||||
template <typename Encoding, typename Allocator = CrtAllocator>
|
template <typename Encoding, typename Allocator = CrtAllocator>
|
||||||
class GenericRegex {
|
class GenericRegex {
|
||||||
@ -256,7 +262,32 @@ private:
|
|||||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
case '\\': // Escape character
|
||||||
|
if (!Encoding::Decode(is, &codepoint) || codepoint == 0)
|
||||||
|
return; // Expect an escape character
|
||||||
|
switch (codepoint) {
|
||||||
|
case '|':
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '?':
|
||||||
|
case '*':
|
||||||
|
case '+':
|
||||||
|
case '.':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '\\':
|
||||||
|
break; // use the codepoint as is
|
||||||
|
case 'f': codepoint = 0x000C; break;
|
||||||
|
case 'n': codepoint = 0x000A; break;
|
||||||
|
case 'r': codepoint = 0x000D; break;
|
||||||
|
case 't': codepoint = 0x0009; break;
|
||||||
|
case 'v': codepoint = 0x000B; break;
|
||||||
|
default:
|
||||||
|
return; // Unsupported escape character
|
||||||
|
}
|
||||||
|
// fall through to default
|
||||||
|
|
||||||
|
default: // Pattern character
|
||||||
PushOperand(operandStack, codepoint);
|
PushOperand(operandStack, codepoint);
|
||||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||||
}
|
}
|
||||||
|
@ -327,4 +327,12 @@ TEST(Regex, CharacterRange8) {
|
|||||||
EXPECT_FALSE(re.Match("!"));
|
EXPECT_FALSE(re.Match("!"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Regex, Escape) {
|
||||||
|
const char* s = "\\|\\(\\)\\?\\*\\+\\.\\[\\]\\\\\\f\\n\\r\\t\\v";
|
||||||
|
Regex re(s);
|
||||||
|
ASSERT_TRUE(re.IsValid());
|
||||||
|
EXPECT_TRUE(re.Match("|()?*+.[]\\\x0C\n\r\t\x0B"));
|
||||||
|
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||||
|
}
|
||||||
|
|
||||||
#undef EURO
|
#undef EURO
|
||||||
|
Loading…
x
Reference in New Issue
Block a user