Merge regex into schema
This commit is contained in:
commit
b8d2f7e660
@ -1213,6 +1213,31 @@ public:
|
||||
return pos;
|
||||
}
|
||||
|
||||
//! Erase a member in object by its name.
|
||||
/*! \param name Name of member to be removed.
|
||||
\return Whether the member existed.
|
||||
\note Linear time complexity.
|
||||
*/
|
||||
bool EraseMember(const Ch* name) {
|
||||
GenericValue n(StringRef(name));
|
||||
return EraseMember(n);
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_STDSTRING
|
||||
bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); }
|
||||
#endif
|
||||
|
||||
template <typename SourceAllocator>
|
||||
bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) {
|
||||
MemberIterator m = FindMember(name);
|
||||
if (m != MemberEnd()) {
|
||||
EraseMember(m);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
//@}
|
||||
|
||||
//!@name Array
|
||||
|
642
include/rapidjson/internal/regex.h
Normal file
642
include/rapidjson/internal/regex.h
Normal file
@ -0,0 +1,642 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_INTERNAL_REGEX_H_
|
||||
#define RAPIDJSON_INTERNAL_REGEX_H_
|
||||
|
||||
#include "../rapidjson.h"
|
||||
#include "stack.h"
|
||||
|
||||
#ifndef RAPIDJSON_REGEX_VERBOSE
|
||||
#define RAPIDJSON_REGEX_VERBOSE 0
|
||||
#endif
|
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN
|
||||
namespace internal {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// GenericRegex
|
||||
|
||||
static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
|
||||
static const SizeType kRegexInvalidRange = ~SizeType(0);
|
||||
|
||||
//! Regular expression engine with subset of ECMAscript grammar.
|
||||
/*!
|
||||
Supported regular expression syntax:
|
||||
- \c ab Concatenation
|
||||
- \c a|b Alternation
|
||||
- \c a? Zero or one
|
||||
- \c a* Zero or more
|
||||
- \c a+ One or more
|
||||
- \c a{3} Exactly 3 times
|
||||
- \c a{3,} At least 3 times
|
||||
- \c a{3,5} 3 to 5 times
|
||||
- \c (ab) Grouping
|
||||
- \c ^a At the beginning
|
||||
- \c a$ At the end
|
||||
- \c . Any character
|
||||
- \c [abc] Character classes
|
||||
- \c [a-c] Character class range
|
||||
- \c [a-z0-9_] Character class combination
|
||||
- \c [^abc] Negated character classes
|
||||
- \c [^a-c] Negated character class range
|
||||
- \c [\b] Backspace (U+0008)
|
||||
- \c \\| \\\\ ... Escape characters
|
||||
- \c \\f Form feed (U+000C)
|
||||
- \c \\n Line feed (U+000A)
|
||||
- \c \\r Carriage return (U+000D)
|
||||
- \c \\t Tab (U+0009)
|
||||
- \c \\v Vertical tab (U+000B)
|
||||
*/
|
||||
template <typename Encoding, typename Allocator = CrtAllocator>
|
||||
class GenericRegex {
|
||||
public:
|
||||
typedef typename Encoding::Ch Ch;
|
||||
|
||||
GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() {
|
||||
StringStream ss(source);
|
||||
DecodedStream<StringStream> ds(ss);
|
||||
Parse(ds);
|
||||
}
|
||||
|
||||
~GenericRegex() {
|
||||
}
|
||||
|
||||
bool IsValid() const {
|
||||
return root_ != kRegexInvalidState;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool Match(InputStream& is) const {
|
||||
return SearchWithAnchoring(is, true, true);
|
||||
}
|
||||
|
||||
bool Match(const Ch* s) const {
|
||||
StringStream is(s);
|
||||
return Match(is);
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool Search(InputStream& is) const {
|
||||
return SearchWithAnchoring(is, anchorBegin_, anchorEnd_);
|
||||
}
|
||||
|
||||
bool Search(const Ch* s) const {
|
||||
StringStream is(s);
|
||||
return Search(is);
|
||||
}
|
||||
|
||||
private:
|
||||
enum Operator {
|
||||
kZeroOrOne,
|
||||
kZeroOrMore,
|
||||
kOneOrMore,
|
||||
kConcatenation,
|
||||
kAlternation,
|
||||
kLeftParenthesis
|
||||
};
|
||||
|
||||
static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
|
||||
static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
|
||||
static const unsigned kRangeNegationFlag = 0x80000000;
|
||||
|
||||
struct Range {
|
||||
unsigned start; //
|
||||
unsigned end;
|
||||
SizeType next;
|
||||
};
|
||||
|
||||
struct State {
|
||||
SizeType out; //!< Equals to kInvalid for matching state
|
||||
SizeType out1; //!< Equals to non-kInvalid for split
|
||||
SizeType rangeStart;
|
||||
unsigned codepoint;
|
||||
};
|
||||
|
||||
struct Frag {
|
||||
Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
|
||||
SizeType start;
|
||||
SizeType out; //!< link-list of all output states
|
||||
SizeType minIndex;
|
||||
};
|
||||
|
||||
template <typename SourceStream>
|
||||
class DecodedStream {
|
||||
public:
|
||||
DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
|
||||
unsigned Peek() { return codepoint_; }
|
||||
unsigned Take() { unsigned c = codepoint_; Decode(); return c; }
|
||||
|
||||
private:
|
||||
void Decode() {
|
||||
if (!Encoding::Decode(ss_, &codepoint_))
|
||||
codepoint_ = 0;
|
||||
}
|
||||
|
||||
SourceStream& ss_;
|
||||
unsigned codepoint_;
|
||||
};
|
||||
|
||||
State& GetState(SizeType index) {
|
||||
RAPIDJSON_ASSERT(index < stateCount_);
|
||||
return states_.template Bottom<State>()[index];
|
||||
}
|
||||
|
||||
const State& GetState(SizeType index) const {
|
||||
RAPIDJSON_ASSERT(index < stateCount_);
|
||||
return states_.template Bottom<State>()[index];
|
||||
}
|
||||
|
||||
Range& GetRange(SizeType index) {
|
||||
RAPIDJSON_ASSERT(index < rangeCount_);
|
||||
return ranges_.template Bottom<Range>()[index];
|
||||
}
|
||||
|
||||
const Range& GetRange(SizeType index) const {
|
||||
RAPIDJSON_ASSERT(index < rangeCount_);
|
||||
return ranges_.template Bottom<Range>()[index];
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
void Parse(DecodedStream<InputStream>& ds) {
|
||||
Allocator allocator;
|
||||
Stack<Allocator> operandStack(&allocator, 256); // Frag
|
||||
Stack<Allocator> operatorStack(&allocator, 256); // Operator
|
||||
Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
|
||||
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
|
||||
unsigned codepoint;
|
||||
while (ds.Peek() != 0) {
|
||||
switch (codepoint = ds.Take()) {
|
||||
case '^':
|
||||
anchorBegin_ = true;
|
||||
break;
|
||||
|
||||
case '$':
|
||||
anchorEnd_ = true;
|
||||
break;
|
||||
|
||||
case '|':
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
*operatorStack.template Push<Operator>() = kAlternation;
|
||||
*atomCountStack.template Top<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
*operatorStack.template Push<Operator>() = kLeftParenthesis;
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
if (operatorStack.Empty())
|
||||
return;
|
||||
operatorStack.template Pop<Operator>(1);
|
||||
atomCountStack.template Pop<unsigned>(1);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
if (!Eval(operandStack, kZeroOrOne))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
if (!Eval(operandStack, kZeroOrMore))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '+':
|
||||
if (!Eval(operandStack, kOneOrMore))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '{':
|
||||
{
|
||||
unsigned n, m;
|
||||
if (!ParseUnsigned(ds, &n) || n == 0)
|
||||
return;
|
||||
|
||||
if (ds.Peek() == ',') {
|
||||
ds.Take();
|
||||
if (ds.Peek() == '}')
|
||||
m = 0;
|
||||
else if (!ParseUnsigned(ds, &m) || m < n)
|
||||
return;
|
||||
}
|
||||
else
|
||||
m = n;
|
||||
|
||||
if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
|
||||
return;
|
||||
ds.Take();
|
||||
}
|
||||
break;
|
||||
|
||||
case '.':
|
||||
PushOperand(operandStack, kAnyCharacterClass);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '[':
|
||||
{
|
||||
SizeType range;
|
||||
if (!ParseRange(ds, &range))
|
||||
return;
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
|
||||
GetState(s).rangeStart = range;
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
||||
}
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '\\': // Escape character
|
||||
if (!CharacterEscape(ds, &codepoint))
|
||||
return; // Unsupported escape character
|
||||
// fall through to default
|
||||
|
||||
default: // Pattern character
|
||||
PushOperand(operandStack, codepoint);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
}
|
||||
}
|
||||
|
||||
while (!operatorStack.Empty())
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
|
||||
// Link the operand to matching state.
|
||||
if (operandStack.GetSize() == sizeof(Frag)) {
|
||||
Frag* e = operandStack.template Pop<Frag>(1);
|
||||
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
||||
root_ = e->start;
|
||||
|
||||
#if RAPIDJSON_REGEX_VERBOSE
|
||||
printf("root: %d\n", root_);
|
||||
for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||
State& s = GetState(i);
|
||||
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
|
||||
State* s = states_.template Push<State>();
|
||||
s->out = out;
|
||||
s->out1 = out1;
|
||||
s->codepoint = codepoint;
|
||||
s->rangeStart = kRegexInvalidRange;
|
||||
return stateCount_++;
|
||||
}
|
||||
|
||||
void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
||||
}
|
||||
|
||||
void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
|
||||
if (*atomCountStack.template Top<unsigned>())
|
||||
*operatorStack.template Push<Operator>() = kConcatenation;
|
||||
(*atomCountStack.template Top<unsigned>())++;
|
||||
}
|
||||
|
||||
SizeType Append(SizeType l1, SizeType l2) {
|
||||
SizeType old = l1;
|
||||
while (GetState(l1).out != kRegexInvalidState)
|
||||
l1 = GetState(l1).out;
|
||||
GetState(l1).out = l2;
|
||||
return old;
|
||||
}
|
||||
|
||||
void Patch(SizeType l, SizeType s) {
|
||||
for (SizeType next; l != kRegexInvalidState; l = next) {
|
||||
next = GetState(l).out;
|
||||
GetState(l).out = s;
|
||||
}
|
||||
}
|
||||
|
||||
bool Eval(Stack<Allocator>& operandStack, Operator op) {
|
||||
switch (op) {
|
||||
case kConcatenation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
Patch(e1.out, e2.start);
|
||||
*operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kAlternation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(e1.start, e2.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrOne:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kOneOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
|
||||
RAPIDJSON_ASSERT(n > 0);
|
||||
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
|
||||
if (operandStack.GetSize() < sizeof(Frag))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
|
||||
CloneTopOperand(operandStack);
|
||||
|
||||
if (m == 0)
|
||||
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
|
||||
else if (m > n) {
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a
|
||||
Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
|
||||
for (unsigned i = n; i < m - 1; i++)
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
|
||||
for (unsigned i = n; i < m; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
|
||||
|
||||
void CloneTopOperand(Stack<Allocator>& operandStack) {
|
||||
const Frag *src = operandStack.template Top<Frag>();
|
||||
SizeType count = stateCount_ - src->minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
|
||||
State* s = states_.template Push<State>(count);
|
||||
memcpy(s, &GetState(src->minIndex), count * sizeof(State));
|
||||
for (SizeType j = 0; j < count; j++) {
|
||||
if (s[j].out != kRegexInvalidState)
|
||||
s[j].out += count;
|
||||
if (s[j].out1 != kRegexInvalidState)
|
||||
s[j].out1 += count;
|
||||
}
|
||||
*operandStack.template Push<Frag>() = Frag(src->start + count, src->out + count, src->minIndex + count);
|
||||
stateCount_ += count;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
|
||||
unsigned r = 0;
|
||||
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
|
||||
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
|
||||
return false; // overflow
|
||||
r = r * 10 + (ds.Take() - '0');
|
||||
}
|
||||
*u = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseRange(DecodedStream<InputStream>& ds, SizeType* range) {
|
||||
bool isBegin = true;
|
||||
bool negate = false;
|
||||
int step = 0;
|
||||
SizeType start = kRegexInvalidRange;
|
||||
SizeType current = kRegexInvalidRange;
|
||||
unsigned codepoint;
|
||||
while ((codepoint = ds.Take()) != 0) {
|
||||
if (isBegin) {
|
||||
isBegin = false;
|
||||
if (codepoint == '^') {
|
||||
negate = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
switch (codepoint) {
|
||||
case ']':
|
||||
if (start == kRegexInvalidRange)
|
||||
return false; // Error: nothing inside []
|
||||
if (step == 2) { // Add trailing '-'
|
||||
SizeType r = NewRange('-');
|
||||
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
||||
GetRange(current).next = r;
|
||||
}
|
||||
if (negate)
|
||||
GetRange(start).start |= kRangeNegationFlag;
|
||||
*range = start;
|
||||
return true;
|
||||
|
||||
case '\\':
|
||||
if (ds.Peek() == 'b') {
|
||||
ds.Take();
|
||||
codepoint = 0x0008; // Escape backspace character
|
||||
}
|
||||
else if (!CharacterEscape(ds, &codepoint))
|
||||
return false;
|
||||
// fall through to default
|
||||
|
||||
default:
|
||||
switch (step) {
|
||||
case 1:
|
||||
if (codepoint == '-') {
|
||||
step++;
|
||||
break;
|
||||
}
|
||||
// fall through to step 0 for other characters
|
||||
|
||||
case 0:
|
||||
{
|
||||
SizeType r = NewRange(codepoint);
|
||||
if (current != kRegexInvalidRange)
|
||||
GetRange(current).next = r;
|
||||
if (start == kRegexInvalidRange)
|
||||
start = r;
|
||||
current = r;
|
||||
}
|
||||
step = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
RAPIDJSON_ASSERT(step == 2);
|
||||
GetRange(current).end = codepoint;
|
||||
step = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SizeType NewRange(unsigned codepoint) {
|
||||
Range* r = ranges_.template Push<Range>();
|
||||
r->start = r->end = codepoint;
|
||||
r->next = kRegexInvalidRange;
|
||||
return rangeCount_++;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool CharacterEscape(DecodedStream<InputStream>& ds, unsigned* escapedCodepoint) {
|
||||
unsigned codepoint;
|
||||
switch (codepoint = ds.Take()) {
|
||||
case '^':
|
||||
case '$':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '\\':
|
||||
*escapedCodepoint = codepoint; return true;
|
||||
case 'f': *escapedCodepoint = 0x000C; return true;
|
||||
case 'n': *escapedCodepoint = 0x000A; return true;
|
||||
case 'r': *escapedCodepoint = 0x000D; return true;
|
||||
case 't': *escapedCodepoint = 0x0009; return true;
|
||||
case 'v': *escapedCodepoint = 0x000B; return true;
|
||||
default:
|
||||
return false; // Unsupported escape character
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const {
|
||||
RAPIDJSON_ASSERT(IsValid());
|
||||
DecodedStream<InputStream> ds(is);
|
||||
|
||||
Allocator allocator;
|
||||
Stack<Allocator> state0(&allocator, stateCount_ * sizeof(SizeType));
|
||||
Stack<Allocator> state1(&allocator, stateCount_ * sizeof(SizeType));
|
||||
Stack<Allocator> *current = &state0, *next = &state1;
|
||||
|
||||
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
|
||||
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
|
||||
bool matched = false;
|
||||
matched = AddState(stateSet, *current, root_);
|
||||
|
||||
unsigned codepoint;
|
||||
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
|
||||
std::memset(stateSet, 0, stateSetSize);
|
||||
next->Clear();
|
||||
matched = false;
|
||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||
const State& sr = GetState(*s);
|
||||
if (sr.codepoint == codepoint ||
|
||||
sr.codepoint == kAnyCharacterClass ||
|
||||
(sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
|
||||
{
|
||||
matched = AddState(stateSet, *next, sr.out) || matched;
|
||||
if (!anchorEnd && matched)
|
||||
goto exit;
|
||||
}
|
||||
if (!anchorBegin)
|
||||
AddState(stateSet, *next, root_);
|
||||
}
|
||||
Stack<Allocator>* temp = current;
|
||||
current = next;
|
||||
next = temp;
|
||||
}
|
||||
|
||||
exit:
|
||||
Allocator::Free(stateSet);
|
||||
return matched;
|
||||
}
|
||||
|
||||
// Return whether the added states is a match state
|
||||
bool AddState(unsigned* stateSet, Stack<Allocator>& l, SizeType index) const {
|
||||
if (index == kRegexInvalidState)
|
||||
return true;
|
||||
|
||||
const State& s = GetState(index);
|
||||
if (s.out1 != kRegexInvalidState) { // Split
|
||||
bool matched = AddState(stateSet, l, s.out);
|
||||
matched = AddState(stateSet, l, s.out1) || matched;
|
||||
return matched;
|
||||
}
|
||||
else if (!(stateSet[index >> 5] & (1 << (index & 31)))) {
|
||||
stateSet[index >> 5] |= (1 << (index & 31));
|
||||
*l.template Push<SizeType>() = index;
|
||||
}
|
||||
return GetState(index).out == kRegexInvalidState;
|
||||
}
|
||||
|
||||
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
|
||||
bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0;
|
||||
while (rangeIndex != kRegexInvalidRange) {
|
||||
const Range& r = GetRange(rangeIndex);
|
||||
if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end)
|
||||
return yes;
|
||||
rangeIndex = r.next;
|
||||
}
|
||||
return !yes;
|
||||
}
|
||||
|
||||
Stack<Allocator> states_;
|
||||
Stack<Allocator> ranges_;
|
||||
SizeType root_;
|
||||
SizeType stateCount_;
|
||||
SizeType rangeCount_;
|
||||
bool anchorBegin_;
|
||||
bool anchorEnd_;
|
||||
};
|
||||
|
||||
typedef GenericRegex<UTF8<> > Regex;
|
||||
|
||||
} // namespace internal
|
||||
RAPIDJSON_NAMESPACE_END
|
||||
|
||||
#endif // RAPIDJSON_INTERNAL_REGEX_H_
|
@ -689,34 +689,37 @@ public:
|
||||
|
||||
ValueType* v = &root;
|
||||
const Token* last = tokens_ + (tokenCount_ - 1);
|
||||
for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
|
||||
for (const Token *t = tokens_; t != last; ++t) {
|
||||
switch (v->GetType()) {
|
||||
case kObjectType:
|
||||
{
|
||||
typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
|
||||
if (m == v->MemberEnd())
|
||||
return false;
|
||||
if (t == last) {
|
||||
v->EraseMember(m);
|
||||
return true;
|
||||
}
|
||||
v = &m->value;
|
||||
}
|
||||
break;
|
||||
case kArrayType:
|
||||
if (t->index == kPointerInvalidIndex || t->index >= v->Size())
|
||||
return false;
|
||||
if (t == last) {
|
||||
v->Erase(v->Begin() + t->index);
|
||||
return true;
|
||||
}
|
||||
v = &((*v)[t->index]);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
switch (v->GetType()) {
|
||||
case kObjectType:
|
||||
return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
|
||||
case kArrayType:
|
||||
if (last->index == kPointerInvalidIndex || last->index >= v->Size())
|
||||
return false;
|
||||
v->Erase(v->Begin() + last->index);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -967,13 +967,13 @@ private:
|
||||
else {
|
||||
if (use64bit) {
|
||||
if (minus)
|
||||
cont = handler.Int64(-(int64_t)i64);
|
||||
cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
|
||||
else
|
||||
cont = handler.Uint64(i64);
|
||||
}
|
||||
else {
|
||||
if (minus)
|
||||
cont = handler.Int(-(int)i);
|
||||
cont = handler.Int(static_cast<int32_t>(~i + 1));
|
||||
else
|
||||
cont = handler.Uint(i);
|
||||
}
|
||||
|
@ -298,11 +298,28 @@ TEST_F(RapidJson, internal_Pow10) {
|
||||
EXPECT_GT(sum, 0.0);
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) {
|
||||
TEST_F(RapidJson, SkipWhitespace_Basic) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
Document doc;
|
||||
ASSERT_TRUE(doc.Parse(whitespace_).IsArray());
|
||||
}
|
||||
rapidjson::StringStream s(whitespace_);
|
||||
while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
|
||||
s.Take();
|
||||
ASSERT_EQ('[', s.Peek());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SIMD_SUFFIX(SkipWhitespace)) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
rapidjson::StringStream s(whitespace_);
|
||||
rapidjson::SkipWhitespace(s);
|
||||
ASSERT_EQ('[', s.Peek());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, SkipWhitespace_strspn) {
|
||||
for (size_t i = 0; i < kTrialCount; i++) {
|
||||
const char* s = whitespace_ + std::strspn(whitespace_, " \t\r\n");
|
||||
ASSERT_EQ('[', *s);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RapidJson, UTF8_Validate) {
|
||||
|
@ -11,6 +11,7 @@ set(UNITTEST_SOURCES
|
||||
pointertest.cpp
|
||||
prettywritertest.cpp
|
||||
readertest.cpp
|
||||
regextest.cpp
|
||||
schematest.cpp
|
||||
simdtest.cpp
|
||||
strfunctest.cpp
|
||||
|
@ -879,7 +879,13 @@ TEST(Pointer, Erase) {
|
||||
d.Parse(kJson);
|
||||
|
||||
EXPECT_FALSE(Pointer("").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/nonexist/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/foo/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/foo/nonexist/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/foo/0/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/foo/0/nonexist/nonexist").Erase(d));
|
||||
EXPECT_FALSE(Pointer("/foo/2/nonexist").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/foo/0").Erase(d));
|
||||
EXPECT_EQ(1u, d["foo"].Size());
|
||||
EXPECT_STREQ("baz", d["foo"][0].GetString());
|
||||
@ -887,6 +893,24 @@ TEST(Pointer, Erase) {
|
||||
EXPECT_TRUE(d["foo"].Empty());
|
||||
EXPECT_TRUE(Pointer("/foo").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/foo").Get(d) == 0);
|
||||
|
||||
Pointer("/a/0/b/0").Create(d);
|
||||
|
||||
EXPECT_TRUE(Pointer("/a/0/b/0").Get(d) != 0);
|
||||
EXPECT_TRUE(Pointer("/a/0/b/0").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/a/0/b/0").Get(d) == 0);
|
||||
|
||||
EXPECT_TRUE(Pointer("/a/0/b").Get(d) != 0);
|
||||
EXPECT_TRUE(Pointer("/a/0/b").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/a/0/b").Get(d) == 0);
|
||||
|
||||
EXPECT_TRUE(Pointer("/a/0").Get(d) != 0);
|
||||
EXPECT_TRUE(Pointer("/a/0").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/a/0").Get(d) == 0);
|
||||
|
||||
EXPECT_TRUE(Pointer("/a").Get(d) != 0);
|
||||
EXPECT_TRUE(Pointer("/a").Erase(d));
|
||||
EXPECT_TRUE(Pointer("/a").Get(d) == 0);
|
||||
}
|
||||
|
||||
TEST(Pointer, CreateValueByPointer) {
|
||||
|
533
test/unittest/regextest.cpp
Normal file
533
test/unittest/regextest.cpp
Normal file
@ -0,0 +1,533 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "unittest.h"
|
||||
#include "rapidjson/internal/regex.h"
|
||||
|
||||
using namespace rapidjson::internal;
|
||||
|
||||
TEST(Regex, Concatenation) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Alternation1) {
|
||||
Regex re("abab|abbb");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abab"));
|
||||
EXPECT_TRUE(re.Match("abbb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("ababa"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
EXPECT_FALSE(re.Match("abbbb"));
|
||||
}
|
||||
|
||||
TEST(Regex, Alternation2) {
|
||||
Regex re("a|b|c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis1) {
|
||||
Regex re("(ab)c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis2) {
|
||||
Regex re("a(bc)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis3) {
|
||||
Regex re("(a|b)(c|d)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ac"));
|
||||
EXPECT_TRUE(re.Match("ad"));
|
||||
EXPECT_TRUE(re.Match("bc"));
|
||||
EXPECT_TRUE(re.Match("bd"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("cd"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne1) {
|
||||
Regex re("a?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne2) {
|
||||
Regex re("a?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne3) {
|
||||
Regex re("ab?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne4) {
|
||||
Regex re("a?b?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne5) {
|
||||
Regex re("a(ab)?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore1) {
|
||||
Regex re("a*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore2) {
|
||||
Regex re("a*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore3) {
|
||||
Regex re("a*b*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("bb"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore4) {
|
||||
Regex re("a(ab)*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore1) {
|
||||
Regex re("a+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore2) {
|
||||
Regex re("a+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore3) {
|
||||
Regex re("a+b+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_TRUE(re.Match("abb"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore4) {
|
||||
Regex re("a(ab)+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact1) {
|
||||
Regex re("ab{3}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact2) {
|
||||
Regex re("a(bc){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact3) {
|
||||
Regex re("a(b|c){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin1) {
|
||||
Regex re("ab{3,}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin2) {
|
||||
Regex re("a(bc){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin3) {
|
||||
Regex re("a(b|c){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax1) {
|
||||
Regex re("ab{3,5}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax2) {
|
||||
Regex re("a(bc){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax3) {
|
||||
Regex re("a(b|c){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_TRUE(re.Match("acccccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbd"));
|
||||
}
|
||||
|
||||
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
|
||||
|
||||
TEST(Regex, Unicode) {
|
||||
Regex re("a" EURO "+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a" EURO "b"));
|
||||
EXPECT_TRUE(re.Match("a" EURO EURO "b"));
|
||||
EXPECT_FALSE(re.Match("a?b"));
|
||||
EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match
|
||||
}
|
||||
|
||||
TEST(Regex, AnyCharacter) {
|
||||
Regex re(".");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match(EURO));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange1) {
|
||||
Regex re("[abc]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange2) {
|
||||
Regex re("[^abc]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("`"));
|
||||
EXPECT_TRUE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange3) {
|
||||
Regex re("[a-c]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange4) {
|
||||
Regex re("[^a-c]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("`"));
|
||||
EXPECT_TRUE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange5) {
|
||||
Regex re("[-]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange6) {
|
||||
Regex re("[a-]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange7) {
|
||||
Regex re("[-a]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange8) {
|
||||
Regex re("[a-zA-Z0-9]*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("Milo"));
|
||||
EXPECT_TRUE(re.Match("MT19937"));
|
||||
EXPECT_TRUE(re.Match("43"));
|
||||
EXPECT_FALSE(re.Match("a_b"));
|
||||
EXPECT_FALSE(re.Match("!"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("_abc"));
|
||||
EXPECT_TRUE(re.Search("abc_"));
|
||||
EXPECT_TRUE(re.Search("_abc_"));
|
||||
EXPECT_TRUE(re.Search("__abc__"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_BeginAnchor) {
|
||||
Regex re("^abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("abc_"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("_abc"));
|
||||
EXPECT_FALSE(re.Search("_abc_"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_EndAnchor) {
|
||||
Regex re("abc$");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("_abc"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("abc_"));
|
||||
EXPECT_FALSE(re.Search("_abc_"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_BothAnchor) {
|
||||
Regex re("^abc$");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_FALSE(re.Search(""));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("b"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Escape) {
|
||||
const char* s = "\\^\\$\\|\\(\\)\\?\\*\\+\\.\\[\\]\\{\\}\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||
Regex re(s);
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("^$|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]"));
|
||||
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||
}
|
||||
|
||||
TEST(Regex, Invalid) {
|
||||
#define TEST_INVALID(s) \
|
||||
{\
|
||||
Regex re(s);\
|
||||
EXPECT_FALSE(re.IsValid());\
|
||||
}
|
||||
|
||||
TEST_INVALID("a|");
|
||||
TEST_INVALID("()");
|
||||
TEST_INVALID(")");
|
||||
TEST_INVALID("(a))");
|
||||
TEST_INVALID("(a|)");
|
||||
TEST_INVALID("(a||b)");
|
||||
TEST_INVALID("(|b)");
|
||||
TEST_INVALID("?");
|
||||
TEST_INVALID("*");
|
||||
TEST_INVALID("+");
|
||||
TEST_INVALID("{");
|
||||
TEST_INVALID("{}");
|
||||
TEST_INVALID("a{a}");
|
||||
TEST_INVALID("a{0}");
|
||||
TEST_INVALID("a{-1}");
|
||||
TEST_INVALID("a{}");
|
||||
TEST_INVALID("a{0,}");
|
||||
TEST_INVALID("a{,0}");
|
||||
TEST_INVALID("a{1,0}");
|
||||
TEST_INVALID("a{-1,0}");
|
||||
TEST_INVALID("a{-1,1}");
|
||||
TEST_INVALID("[]");
|
||||
TEST_INVALID("[^]");
|
||||
TEST_INVALID("[\\a]");
|
||||
TEST_INVALID("\\a");
|
||||
|
||||
#undef TEST_INVALID
|
||||
}
|
||||
|
||||
#undef EURO
|
@ -1182,6 +1182,24 @@ TEST(Value, Object) {
|
||||
EXPECT_TRUE(z.IsObject());
|
||||
}
|
||||
|
||||
TEST(Value, EraseMember_String) {
|
||||
Value::AllocatorType allocator;
|
||||
Value x(kObjectType);
|
||||
x.AddMember("A", "Apple", allocator);
|
||||
x.AddMember("B", "Banana", allocator);
|
||||
|
||||
EXPECT_TRUE(x.EraseMember("B"));
|
||||
EXPECT_FALSE(x.HasMember("B"));
|
||||
|
||||
EXPECT_FALSE(x.EraseMember("nonexist"));
|
||||
|
||||
GenericValue<UTF8<>, CrtAllocator> othername("A");
|
||||
EXPECT_TRUE(x.EraseMember(othername));
|
||||
EXPECT_FALSE(x.HasMember("A"));
|
||||
|
||||
EXPECT_TRUE(x.MemberBegin() == x.MemberEnd());
|
||||
}
|
||||
|
||||
TEST(Value, BigNestedArray) {
|
||||
MemoryPoolAllocator<> allocator;
|
||||
Value x(kArrayType);
|
||||
|
Loading…
x
Reference in New Issue
Block a user