diff --git a/include/rapidjson/document.h b/include/rapidjson/document.h index ca80963..889cdfa 100644 --- a/include/rapidjson/document.h +++ b/include/rapidjson/document.h @@ -1213,6 +1213,31 @@ public: return pos; } + //! Erase a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Linear time complexity. + */ + bool EraseMember(const Ch* name) { + GenericValue n(StringRef(name)); + return EraseMember(n); + } + +#if RAPIDJSON_HAS_STDSTRING + bool EraseMember(const std::basic_string& name) { return EraseMember(GenericValue(StringRef(name))); } +#endif + + template + bool EraseMember(const GenericValue& name) { + MemberIterator m = FindMember(name); + if (m != MemberEnd()) { + EraseMember(m); + return true; + } + else + return false; + } + //@} //!@name Array diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h new file mode 100644 index 0000000..fcf2600 --- /dev/null +++ b/include/rapidjson/internal/regex.h @@ -0,0 +1,642 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_INTERNAL_REGEX_H_ +#define RAPIDJSON_INTERNAL_REGEX_H_ + +#include "../rapidjson.h" +#include "stack.h" + +#ifndef RAPIDJSON_REGEX_VERBOSE +#define RAPIDJSON_REGEX_VERBOSE 0 +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// GenericRegex + +static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1 +static const SizeType kRegexInvalidRange = ~SizeType(0); + +//! Regular expression engine with subset of ECMAscript grammar. +/*! + Supported regular expression syntax: + - \c ab Concatenation + - \c a|b Alternation + - \c a? Zero or one + - \c a* Zero or more + - \c a+ One or more + - \c a{3} Exactly 3 times + - \c a{3,} At least 3 times + - \c a{3,5} 3 to 5 times + - \c (ab) Grouping + - \c ^a At the beginning + - \c a$ At the end + - \c . Any character + - \c [abc] Character classes + - \c [a-c] Character class range + - \c [a-z0-9_] Character class combination + - \c [^abc] Negated character classes + - \c [^a-c] Negated character class range + - \c [\b] Backspace (U+0008) + - \c \\| \\\\ ... Escape characters + - \c \\f Form feed (U+000C) + - \c \\n Line feed (U+000A) + - \c \\r Carriage return (U+000D) + - \c \\t Tab (U+0009) + - \c \\v Vertical tab (U+000B) +*/ +template +class GenericRegex { +public: + typedef typename Encoding::Ch Ch; + + GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() { + StringStream ss(source); + DecodedStream ds(ss); + Parse(ds); + } + + ~GenericRegex() { + } + + bool IsValid() const { + return root_ != kRegexInvalidState; + } + + template + bool Match(InputStream& is) const { + return SearchWithAnchoring(is, true, true); + } + + bool Match(const Ch* s) const { + StringStream is(s); + return Match(is); + } + + template + bool Search(InputStream& is) const { + return SearchWithAnchoring(is, anchorBegin_, anchorEnd_); + } + + bool Search(const Ch* s) const { + StringStream is(s); + return Search(is); + } + +private: + enum Operator { + kZeroOrOne, + kZeroOrMore, + kOneOrMore, + kConcatenation, + kAlternation, + kLeftParenthesis + }; + + static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.' + static const unsigned kRangeCharacterClass = 0xFFFFFFFE; + static const unsigned kRangeNegationFlag = 0x80000000; + + struct Range { + unsigned start; // + unsigned end; + SizeType next; + }; + + struct State { + SizeType out; //!< Equals to kInvalid for matching state + SizeType out1; //!< Equals to non-kInvalid for split + SizeType rangeStart; + unsigned codepoint; + }; + + struct Frag { + Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {} + SizeType start; + SizeType out; //!< link-list of all output states + SizeType minIndex; + }; + + template + class DecodedStream { + public: + DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } + unsigned Peek() { return codepoint_; } + unsigned Take() { unsigned c = codepoint_; Decode(); return c; } + + private: + void Decode() { + if (!Encoding::Decode(ss_, &codepoint_)) + codepoint_ = 0; + } + + SourceStream& ss_; + unsigned codepoint_; + }; + + State& GetState(SizeType index) { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom()[index]; + } + + const State& GetState(SizeType index) const { + RAPIDJSON_ASSERT(index < stateCount_); + return states_.template Bottom()[index]; + } + + Range& GetRange(SizeType index) { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom()[index]; + } + + const Range& GetRange(SizeType index) const { + RAPIDJSON_ASSERT(index < rangeCount_); + return ranges_.template Bottom()[index]; + } + + template + void Parse(DecodedStream& ds) { + Allocator allocator; + Stack operandStack(&allocator, 256); // Frag + Stack operatorStack(&allocator, 256); // Operator + Stack atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis) + + *atomCountStack.template Push() = 0; + + unsigned codepoint; + while (ds.Peek() != 0) { + switch (codepoint = ds.Take()) { + case '^': + anchorBegin_ = true; + break; + + case '$': + anchorEnd_ = true; + break; + + case '|': + while (!operatorStack.Empty() && *operatorStack.template Top() < kAlternation) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + *operatorStack.template Push() = kAlternation; + *atomCountStack.template Top() = 0; + break; + + case '(': + *operatorStack.template Push() = kLeftParenthesis; + *atomCountStack.template Push() = 0; + break; + + case ')': + while (!operatorStack.Empty() && *operatorStack.template Top() != kLeftParenthesis) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + if (operatorStack.Empty()) + return; + operatorStack.template Pop(1); + atomCountStack.template Pop(1); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '?': + if (!Eval(operandStack, kZeroOrOne)) + return; + break; + + case '*': + if (!Eval(operandStack, kZeroOrMore)) + return; + break; + + case '+': + if (!Eval(operandStack, kOneOrMore)) + return; + break; + + case '{': + { + unsigned n, m; + if (!ParseUnsigned(ds, &n) || n == 0) + return; + + if (ds.Peek() == ',') { + ds.Take(); + if (ds.Peek() == '}') + m = 0; + else if (!ParseUnsigned(ds, &m) || m < n) + return; + } + else + m = n; + + if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}') + return; + ds.Take(); + } + break; + + case '.': + PushOperand(operandStack, kAnyCharacterClass); + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '[': + { + SizeType range; + if (!ParseRange(ds, &range)) + return; + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass); + GetState(s).rangeStart = range; + *operandStack.template Push() = Frag(s, s, s); + } + ImplicitConcatenation(atomCountStack, operatorStack); + break; + + case '\\': // Escape character + if (!CharacterEscape(ds, &codepoint)) + return; // Unsupported escape character + // fall through to default + + default: // Pattern character + PushOperand(operandStack, codepoint); + ImplicitConcatenation(atomCountStack, operatorStack); + } + } + + while (!operatorStack.Empty()) + if (!Eval(operandStack, *operatorStack.template Pop(1))) + return; + + // Link the operand to matching state. + if (operandStack.GetSize() == sizeof(Frag)) { + Frag* e = operandStack.template Pop(1); + Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0)); + root_ = e->start; + +#if RAPIDJSON_REGEX_VERBOSE + printf("root: %d\n", root_); + for (SizeType i = 0; i < stateCount_ ; i++) { + State& s = GetState(i); + printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint); + } + printf("\n"); +#endif + } + } + + SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { + State* s = states_.template Push(); + s->out = out; + s->out1 = out1; + s->codepoint = codepoint; + s->rangeStart = kRegexInvalidRange; + return stateCount_++; + } + + void PushOperand(Stack& operandStack, unsigned codepoint) { + SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint); + *operandStack.template Push() = Frag(s, s, s); + } + + void ImplicitConcatenation(Stack& atomCountStack, Stack& operatorStack) { + if (*atomCountStack.template Top()) + *operatorStack.template Push() = kConcatenation; + (*atomCountStack.template Top())++; + } + + SizeType Append(SizeType l1, SizeType l2) { + SizeType old = l1; + while (GetState(l1).out != kRegexInvalidState) + l1 = GetState(l1).out; + GetState(l1).out = l2; + return old; + } + + void Patch(SizeType l, SizeType s) { + for (SizeType next; l != kRegexInvalidState; l = next) { + next = GetState(l).out; + GetState(l).out = s; + } + } + + bool Eval(Stack& operandStack, Operator op) { + switch (op) { + case kConcatenation: + if (operandStack.GetSize() >= sizeof(Frag) * 2) { + Frag e2 = *operandStack.template Pop(1); + Frag e1 = *operandStack.template Pop(1); + Patch(e1.out, e2.start); + *operandStack.template Push() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex)); + return true; + } + return false; + + case kAlternation: + if (operandStack.GetSize() >= sizeof(Frag) * 2) { + Frag e2 = *operandStack.template Pop(1); + Frag e1 = *operandStack.template Pop(1); + SizeType s = NewState(e1.start, e2.start, 0); + *operandStack.template Push() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex)); + return true; + } + return false; + + case kZeroOrOne: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + *operandStack.template Push() = Frag(s, Append(e.out, s), e.minIndex); + return true; + } + return false; + + case kZeroOrMore: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push() = Frag(s, s, e.minIndex); + return true; + } + return false; + + case kOneOrMore: + if (operandStack.GetSize() >= sizeof(Frag)) { + Frag e = *operandStack.template Pop(1); + SizeType s = NewState(kRegexInvalidState, e.start, 0); + Patch(e.out, s); + *operandStack.template Push() = Frag(e.start, s, e.minIndex); + return true; + } + return false; + + default: + return false; + } + } + + bool EvalQuantifier(Stack& operandStack, unsigned n, unsigned m) { + RAPIDJSON_ASSERT(n > 0); + RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity + if (operandStack.GetSize() < sizeof(Frag)) + return false; + + for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a + CloneTopOperand(operandStack); + + if (m == 0) + Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ + else if (m > n) { + CloneTopOperand(operandStack); // a{3,5} -> a a a a + Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a? + for (unsigned i = n; i < m - 1; i++) + CloneTopOperand(operandStack); // a{3,5} -> a a a a? a? + for (unsigned i = n; i < m; i++) + Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a? + } + + for (unsigned i = 0; i < n - 1; i++) + Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a? + + return true; + } + + static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; } + + void CloneTopOperand(Stack& operandStack) { + const Frag *src = operandStack.template Top(); + SizeType count = stateCount_ - src->minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_) + State* s = states_.template Push(count); + memcpy(s, &GetState(src->minIndex), count * sizeof(State)); + for (SizeType j = 0; j < count; j++) { + if (s[j].out != kRegexInvalidState) + s[j].out += count; + if (s[j].out1 != kRegexInvalidState) + s[j].out1 += count; + } + *operandStack.template Push() = Frag(src->start + count, src->out + count, src->minIndex + count); + stateCount_ += count; + } + + template + bool ParseUnsigned(DecodedStream& ds, unsigned* u) { + unsigned r = 0; + while (ds.Peek() >= '0' && ds.Peek() <= '9') { + if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 + return false; // overflow + r = r * 10 + (ds.Take() - '0'); + } + *u = r; + return true; + } + + template + bool ParseRange(DecodedStream& ds, SizeType* range) { + bool isBegin = true; + bool negate = false; + int step = 0; + SizeType start = kRegexInvalidRange; + SizeType current = kRegexInvalidRange; + unsigned codepoint; + while ((codepoint = ds.Take()) != 0) { + if (isBegin) { + isBegin = false; + if (codepoint == '^') { + negate = true; + continue; + } + } + + switch (codepoint) { + case ']': + if (start == kRegexInvalidRange) + return false; // Error: nothing inside [] + if (step == 2) { // Add trailing '-' + SizeType r = NewRange('-'); + RAPIDJSON_ASSERT(current != kRegexInvalidRange); + GetRange(current).next = r; + } + if (negate) + GetRange(start).start |= kRangeNegationFlag; + *range = start; + return true; + + case '\\': + if (ds.Peek() == 'b') { + ds.Take(); + codepoint = 0x0008; // Escape backspace character + } + else if (!CharacterEscape(ds, &codepoint)) + return false; + // fall through to default + + default: + switch (step) { + case 1: + if (codepoint == '-') { + step++; + break; + } + // fall through to step 0 for other characters + + case 0: + { + SizeType r = NewRange(codepoint); + if (current != kRegexInvalidRange) + GetRange(current).next = r; + if (start == kRegexInvalidRange) + start = r; + current = r; + } + step = 1; + break; + + default: + RAPIDJSON_ASSERT(step == 2); + GetRange(current).end = codepoint; + step = 0; + } + } + } + return false; + } + + SizeType NewRange(unsigned codepoint) { + Range* r = ranges_.template Push(); + r->start = r->end = codepoint; + r->next = kRegexInvalidRange; + return rangeCount_++; + } + + template + bool CharacterEscape(DecodedStream& ds, unsigned* escapedCodepoint) { + unsigned codepoint; + switch (codepoint = ds.Take()) { + case '^': + case '$': + case '|': + case '(': + case ')': + case '?': + case '*': + case '+': + case '.': + case '[': + case ']': + case '{': + case '}': + case '\\': + *escapedCodepoint = codepoint; return true; + case 'f': *escapedCodepoint = 0x000C; return true; + case 'n': *escapedCodepoint = 0x000A; return true; + case 'r': *escapedCodepoint = 0x000D; return true; + case 't': *escapedCodepoint = 0x0009; return true; + case 'v': *escapedCodepoint = 0x000B; return true; + default: + return false; // Unsupported escape character + } + } + + template + bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const { + RAPIDJSON_ASSERT(IsValid()); + DecodedStream ds(is); + + Allocator allocator; + Stack state0(&allocator, stateCount_ * sizeof(SizeType)); + Stack state1(&allocator, stateCount_ * sizeof(SizeType)); + Stack *current = &state0, *next = &state1; + + const size_t stateSetSize = (stateCount_ + 31) / 32 * 4; + unsigned* stateSet = static_cast(allocator.Malloc(stateSetSize)); + std::memset(stateSet, 0, stateSetSize); + + bool matched = false; + matched = AddState(stateSet, *current, root_); + + unsigned codepoint; + while (!current->Empty() && (codepoint = ds.Take()) != 0) { + std::memset(stateSet, 0, stateSetSize); + next->Clear(); + matched = false; + for (const SizeType* s = current->template Bottom(); s != current->template End(); ++s) { + const State& sr = GetState(*s); + if (sr.codepoint == codepoint || + sr.codepoint == kAnyCharacterClass || + (sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) + { + matched = AddState(stateSet, *next, sr.out) || matched; + if (!anchorEnd && matched) + goto exit; + } + if (!anchorBegin) + AddState(stateSet, *next, root_); + } + Stack* temp = current; + current = next; + next = temp; + } + + exit: + Allocator::Free(stateSet); + return matched; + } + + // Return whether the added states is a match state + bool AddState(unsigned* stateSet, Stack& l, SizeType index) const { + if (index == kRegexInvalidState) + return true; + + const State& s = GetState(index); + if (s.out1 != kRegexInvalidState) { // Split + bool matched = AddState(stateSet, l, s.out); + matched = AddState(stateSet, l, s.out1) || matched; + return matched; + } + else if (!(stateSet[index >> 5] & (1 << (index & 31)))) { + stateSet[index >> 5] |= (1 << (index & 31)); + *l.template Push() = index; + } + return GetState(index).out == kRegexInvalidState; + } + + bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { + bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0; + while (rangeIndex != kRegexInvalidRange) { + const Range& r = GetRange(rangeIndex); + if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end) + return yes; + rangeIndex = r.next; + } + return !yes; + } + + Stack states_; + Stack ranges_; + SizeType root_; + SizeType stateCount_; + SizeType rangeCount_; + bool anchorBegin_; + bool anchorEnd_; +}; + +typedef GenericRegex > Regex; + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_INTERNAL_REGEX_H_ diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index a1411e9..ab80e97 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -689,34 +689,37 @@ public: ValueType* v = &root; const Token* last = tokens_ + (tokenCount_ - 1); - for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + for (const Token *t = tokens_; t != last; ++t) { switch (v->GetType()) { case kObjectType: { typename ValueType::MemberIterator m = v->FindMember(GenericStringRef(t->name, t->length)); if (m == v->MemberEnd()) return false; - if (t == last) { - v->EraseMember(m); - return true; - } v = &m->value; } break; case kArrayType: if (t->index == kPointerInvalidIndex || t->index >= v->Size()) return false; - if (t == last) { - v->Erase(v->Begin() + t->index); - return true; - } v = &((*v)[t->index]); break; default: return false; } } - return false; + + switch (v->GetType()) { + case kObjectType: + return v->EraseMember(GenericStringRef(last->name, last->length)); + case kArrayType: + if (last->index == kPointerInvalidIndex || last->index >= v->Size()) + return false; + v->Erase(v->Begin() + last->index); + return true; + default: + return false; + } } private: diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index be0d9fb..8989e38 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -967,13 +967,13 @@ private: else { if (use64bit) { if (minus) - cont = handler.Int64(-(int64_t)i64); + cont = handler.Int64(static_cast(~i64 + 1)); else cont = handler.Uint64(i64); } else { if (minus) - cont = handler.Int(-(int)i); + cont = handler.Int(static_cast(~i + 1)); else cont = handler.Uint(i); } diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 05ecf6e..9be966a 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -298,11 +298,28 @@ TEST_F(RapidJson, internal_Pow10) { EXPECT_GT(sum, 0.0); } -TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) { +TEST_F(RapidJson, SkipWhitespace_Basic) { for (size_t i = 0; i < kTrialCount; i++) { - Document doc; - ASSERT_TRUE(doc.Parse(whitespace_).IsArray()); - } + rapidjson::StringStream s(whitespace_); + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); + ASSERT_EQ('[', s.Peek()); + } +} + +TEST_F(RapidJson, SIMD_SUFFIX(SkipWhitespace)) { + for (size_t i = 0; i < kTrialCount; i++) { + rapidjson::StringStream s(whitespace_); + rapidjson::SkipWhitespace(s); + ASSERT_EQ('[', s.Peek()); + } +} + +TEST_F(RapidJson, SkipWhitespace_strspn) { + for (size_t i = 0; i < kTrialCount; i++) { + const char* s = whitespace_ + std::strspn(whitespace_, " \t\r\n"); + ASSERT_EQ('[', *s); + } } TEST_F(RapidJson, UTF8_Validate) { diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 61696d5..76b413e 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -11,6 +11,7 @@ set(UNITTEST_SOURCES pointertest.cpp prettywritertest.cpp readertest.cpp + regextest.cpp schematest.cpp simdtest.cpp strfunctest.cpp diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp index 603361e..37cfcd5 100644 --- a/test/unittest/pointertest.cpp +++ b/test/unittest/pointertest.cpp @@ -879,7 +879,13 @@ TEST(Pointer, Erase) { d.Parse(kJson); EXPECT_FALSE(Pointer("").Erase(d)); + EXPECT_FALSE(Pointer("/nonexist").Erase(d)); + EXPECT_FALSE(Pointer("/nonexist/nonexist").Erase(d)); EXPECT_FALSE(Pointer("/foo/nonexist").Erase(d)); + EXPECT_FALSE(Pointer("/foo/nonexist/nonexist").Erase(d)); + EXPECT_FALSE(Pointer("/foo/0/nonexist").Erase(d)); + EXPECT_FALSE(Pointer("/foo/0/nonexist/nonexist").Erase(d)); + EXPECT_FALSE(Pointer("/foo/2/nonexist").Erase(d)); EXPECT_TRUE(Pointer("/foo/0").Erase(d)); EXPECT_EQ(1u, d["foo"].Size()); EXPECT_STREQ("baz", d["foo"][0].GetString()); @@ -887,6 +893,24 @@ TEST(Pointer, Erase) { EXPECT_TRUE(d["foo"].Empty()); EXPECT_TRUE(Pointer("/foo").Erase(d)); EXPECT_TRUE(Pointer("/foo").Get(d) == 0); + + Pointer("/a/0/b/0").Create(d); + + EXPECT_TRUE(Pointer("/a/0/b/0").Get(d) != 0); + EXPECT_TRUE(Pointer("/a/0/b/0").Erase(d)); + EXPECT_TRUE(Pointer("/a/0/b/0").Get(d) == 0); + + EXPECT_TRUE(Pointer("/a/0/b").Get(d) != 0); + EXPECT_TRUE(Pointer("/a/0/b").Erase(d)); + EXPECT_TRUE(Pointer("/a/0/b").Get(d) == 0); + + EXPECT_TRUE(Pointer("/a/0").Get(d) != 0); + EXPECT_TRUE(Pointer("/a/0").Erase(d)); + EXPECT_TRUE(Pointer("/a/0").Get(d) == 0); + + EXPECT_TRUE(Pointer("/a").Get(d) != 0); + EXPECT_TRUE(Pointer("/a").Erase(d)); + EXPECT_TRUE(Pointer("/a").Get(d) == 0); } TEST(Pointer, CreateValueByPointer) { diff --git a/test/unittest/regextest.cpp b/test/unittest/regextest.cpp new file mode 100644 index 0000000..65105fa --- /dev/null +++ b/test/unittest/regextest.cpp @@ -0,0 +1,533 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "unittest.h" +#include "rapidjson/internal/regex.h" + +using namespace rapidjson::internal; + +TEST(Regex, Concatenation) { + Regex re("abc"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abc")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ab")); + EXPECT_FALSE(re.Match("abcd")); +} + +TEST(Regex, Alternation1) { + Regex re("abab|abbb"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abab")); + EXPECT_TRUE(re.Match("abbb")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("ab")); + EXPECT_FALSE(re.Match("ababa")); + EXPECT_FALSE(re.Match("abb")); + EXPECT_FALSE(re.Match("abbbb")); +} + +TEST(Regex, Alternation2) { + Regex re("a|b|c"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("c")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("aa")); + EXPECT_FALSE(re.Match("ab")); +} + +TEST(Regex, Parenthesis1) { + Regex re("(ab)c"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abc")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ab")); + EXPECT_FALSE(re.Match("abcd")); +} + +TEST(Regex, Parenthesis2) { + Regex re("a(bc)"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abc")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ab")); + EXPECT_FALSE(re.Match("abcd")); +} + +TEST(Regex, Parenthesis3) { + Regex re("(a|b)(c|d)"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ac")); + EXPECT_TRUE(re.Match("ad")); + EXPECT_TRUE(re.Match("bc")); + EXPECT_TRUE(re.Match("bd")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("ab")); + EXPECT_FALSE(re.Match("cd")); +} + +TEST(Regex, ZeroOrOne1) { + Regex re("a?"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("")); + EXPECT_TRUE(re.Match("a")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, ZeroOrOne2) { + Regex re("a?b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("ab")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("aa")); + EXPECT_FALSE(re.Match("bb")); + EXPECT_FALSE(re.Match("ba")); +} + +TEST(Regex, ZeroOrOne3) { + Regex re("ab?"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("ab")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("aa")); + EXPECT_FALSE(re.Match("bb")); + EXPECT_FALSE(re.Match("ba")); +} + +TEST(Regex, ZeroOrOne4) { + Regex re("a?b?"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("")); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("ab")); + EXPECT_FALSE(re.Match("aa")); + EXPECT_FALSE(re.Match("bb")); + EXPECT_FALSE(re.Match("ba")); + EXPECT_FALSE(re.Match("abc")); +} + +TEST(Regex, ZeroOrOne5) { + Regex re("a(ab)?b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aabb")); + EXPECT_FALSE(re.Match("aab")); + EXPECT_FALSE(re.Match("abb")); +} + +TEST(Regex, ZeroOrMore1) { + Regex re("a*"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("")); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("aa")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ab")); +} + +TEST(Regex, ZeroOrMore2) { + Regex re("a*b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aab")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("bb")); +} + +TEST(Regex, ZeroOrMore3) { + Regex re("a*b*"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("")); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("aa")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("bb")); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aabb")); + EXPECT_FALSE(re.Match("ba")); +} + +TEST(Regex, ZeroOrMore4) { + Regex re("a(ab)*b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aabb")); + EXPECT_TRUE(re.Match("aababb")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, OneOrMore1) { + Regex re("a+"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("aa")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ab")); +} + +TEST(Regex, OneOrMore2) { + Regex re("a+b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aab")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("b")); +} + +TEST(Regex, OneOrMore3) { + Regex re("a+b+"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("ab")); + EXPECT_TRUE(re.Match("aab")); + EXPECT_TRUE(re.Match("abb")); + EXPECT_TRUE(re.Match("aabb")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("ba")); +} + +TEST(Regex, OneOrMore4) { + Regex re("a(ab)+b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("aabb")); + EXPECT_TRUE(re.Match("aababb")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("ab")); +} + +TEST(Regex, QuantifierExact1) { + Regex re("ab{3}c"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbc")); + EXPECT_FALSE(re.Match("ac")); + EXPECT_FALSE(re.Match("abc")); + EXPECT_FALSE(re.Match("abbc")); + EXPECT_FALSE(re.Match("abbbbc")); +} + +TEST(Regex, QuantifierExact2) { + Regex re("a(bc){3}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abcbcbcd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abcd")); + EXPECT_FALSE(re.Match("abcbcd")); + EXPECT_FALSE(re.Match("abcbcbcbcd")); +} + +TEST(Regex, QuantifierExact3) { + Regex re("a(b|c){3}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbd")); + EXPECT_TRUE(re.Match("acccd")); + EXPECT_TRUE(re.Match("abcbd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abbd")); + EXPECT_FALSE(re.Match("accccd")); + EXPECT_FALSE(re.Match("abbbbd")); +} + +TEST(Regex, QuantifierMin1) { + Regex re("ab{3,}c"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbc")); + EXPECT_TRUE(re.Match("abbbbc")); + EXPECT_TRUE(re.Match("abbbbbc")); + EXPECT_FALSE(re.Match("ac")); + EXPECT_FALSE(re.Match("abc")); + EXPECT_FALSE(re.Match("abbc")); +} + +TEST(Regex, QuantifierMin2) { + Regex re("a(bc){3,}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abcbcbcd")); + EXPECT_TRUE(re.Match("abcbcbcbcd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abcd")); + EXPECT_FALSE(re.Match("abcbcd")); +} + +TEST(Regex, QuantifierMin3) { + Regex re("a(b|c){3,}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbd")); + EXPECT_TRUE(re.Match("acccd")); + EXPECT_TRUE(re.Match("abcbd")); + EXPECT_TRUE(re.Match("accccd")); + EXPECT_TRUE(re.Match("abbbbd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abbd")); +} + +TEST(Regex, QuantifierMinMax1) { + Regex re("ab{3,5}c"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbc")); + EXPECT_TRUE(re.Match("abbbbc")); + EXPECT_TRUE(re.Match("abbbbbc")); + EXPECT_FALSE(re.Match("ac")); + EXPECT_FALSE(re.Match("abc")); + EXPECT_FALSE(re.Match("abbc")); + EXPECT_FALSE(re.Match("abbbbbbc")); +} + +TEST(Regex, QuantifierMinMax2) { + Regex re("a(bc){3,5}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abcbcbcd")); + EXPECT_TRUE(re.Match("abcbcbcbcd")); + EXPECT_TRUE(re.Match("abcbcbcbcbcd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abcd")); + EXPECT_FALSE(re.Match("abcbcd")); + EXPECT_FALSE(re.Match("abcbcbcbcbcbcd")); +} + +TEST(Regex, QuantifierMinMax3) { + Regex re("a(b|c){3,5}d"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("abbbd")); + EXPECT_TRUE(re.Match("acccd")); + EXPECT_TRUE(re.Match("abcbd")); + EXPECT_TRUE(re.Match("accccd")); + EXPECT_TRUE(re.Match("abbbbd")); + EXPECT_TRUE(re.Match("acccccd")); + EXPECT_TRUE(re.Match("abbbbbd")); + EXPECT_FALSE(re.Match("ad")); + EXPECT_FALSE(re.Match("abbd")); + EXPECT_FALSE(re.Match("accccccd")); + EXPECT_FALSE(re.Match("abbbbbbd")); +} + +#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC + +TEST(Regex, Unicode) { + Regex re("a" EURO "+b"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a" EURO "b")); + EXPECT_TRUE(re.Match("a" EURO EURO "b")); + EXPECT_FALSE(re.Match("a?b")); + EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match +} + +TEST(Regex, AnyCharacter) { + Regex re("."); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match(EURO)); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, CharacterRange1) { + Regex re("[abc]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("c")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("`")); + EXPECT_FALSE(re.Match("d")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, CharacterRange2) { + Regex re("[^abc]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("`")); + EXPECT_TRUE(re.Match("d")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("c")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, CharacterRange3) { + Regex re("[a-c]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("b")); + EXPECT_TRUE(re.Match("c")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("`")); + EXPECT_FALSE(re.Match("d")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, CharacterRange4) { + Regex re("[^a-c]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("`")); + EXPECT_TRUE(re.Match("d")); + EXPECT_FALSE(re.Match("a")); + EXPECT_FALSE(re.Match("b")); + EXPECT_FALSE(re.Match("c")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("aa")); +} + +TEST(Regex, CharacterRange5) { + Regex re("[-]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("-")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("a")); +} + +TEST(Regex, CharacterRange6) { + Regex re("[a-]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("-")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("`")); + EXPECT_FALSE(re.Match("b")); +} + +TEST(Regex, CharacterRange7) { + Regex re("[-a]"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("a")); + EXPECT_TRUE(re.Match("-")); + EXPECT_FALSE(re.Match("")); + EXPECT_FALSE(re.Match("`")); + EXPECT_FALSE(re.Match("b")); +} + +TEST(Regex, CharacterRange8) { + Regex re("[a-zA-Z0-9]*"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("Milo")); + EXPECT_TRUE(re.Match("MT19937")); + EXPECT_TRUE(re.Match("43")); + EXPECT_FALSE(re.Match("a_b")); + EXPECT_FALSE(re.Match("!")); +} + +TEST(Regex, Search) { + Regex re("abc"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Search("abc")); + EXPECT_TRUE(re.Search("_abc")); + EXPECT_TRUE(re.Search("abc_")); + EXPECT_TRUE(re.Search("_abc_")); + EXPECT_TRUE(re.Search("__abc__")); + EXPECT_TRUE(re.Search("abcabc")); + EXPECT_FALSE(re.Search("a")); + EXPECT_FALSE(re.Search("ab")); + EXPECT_FALSE(re.Search("bc")); + EXPECT_FALSE(re.Search("cba")); +} + +TEST(Regex, Search_BeginAnchor) { + Regex re("^abc"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Search("abc")); + EXPECT_TRUE(re.Search("abc_")); + EXPECT_TRUE(re.Search("abcabc")); + EXPECT_FALSE(re.Search("_abc")); + EXPECT_FALSE(re.Search("_abc_")); + EXPECT_FALSE(re.Search("a")); + EXPECT_FALSE(re.Search("ab")); + EXPECT_FALSE(re.Search("bc")); + EXPECT_FALSE(re.Search("cba")); +} + +TEST(Regex, Search_EndAnchor) { + Regex re("abc$"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Search("abc")); + EXPECT_TRUE(re.Search("_abc")); + EXPECT_TRUE(re.Search("abcabc")); + EXPECT_FALSE(re.Search("abc_")); + EXPECT_FALSE(re.Search("_abc_")); + EXPECT_FALSE(re.Search("a")); + EXPECT_FALSE(re.Search("ab")); + EXPECT_FALSE(re.Search("bc")); + EXPECT_FALSE(re.Search("cba")); +} + +TEST(Regex, Search_BothAnchor) { + Regex re("^abc$"); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Search("abc")); + EXPECT_FALSE(re.Search("")); + EXPECT_FALSE(re.Search("a")); + EXPECT_FALSE(re.Search("b")); + EXPECT_FALSE(re.Search("ab")); + EXPECT_FALSE(re.Search("abcd")); +} + +TEST(Regex, Escape) { + const char* s = "\\^\\$\\|\\(\\)\\?\\*\\+\\.\\[\\]\\{\\}\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]"; + Regex re(s); + ASSERT_TRUE(re.IsValid()); + EXPECT_TRUE(re.Match("^$|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]")); + EXPECT_FALSE(re.Match(s)); // Not escaping +} + +TEST(Regex, Invalid) { +#define TEST_INVALID(s) \ + {\ + Regex re(s);\ + EXPECT_FALSE(re.IsValid());\ + } + + TEST_INVALID("a|"); + TEST_INVALID("()"); + TEST_INVALID(")"); + TEST_INVALID("(a))"); + TEST_INVALID("(a|)"); + TEST_INVALID("(a||b)"); + TEST_INVALID("(|b)"); + TEST_INVALID("?"); + TEST_INVALID("*"); + TEST_INVALID("+"); + TEST_INVALID("{"); + TEST_INVALID("{}"); + TEST_INVALID("a{a}"); + TEST_INVALID("a{0}"); + TEST_INVALID("a{-1}"); + TEST_INVALID("a{}"); + TEST_INVALID("a{0,}"); + TEST_INVALID("a{,0}"); + TEST_INVALID("a{1,0}"); + TEST_INVALID("a{-1,0}"); + TEST_INVALID("a{-1,1}"); + TEST_INVALID("[]"); + TEST_INVALID("[^]"); + TEST_INVALID("[\\a]"); + TEST_INVALID("\\a"); + +#undef TEST_INVALID +} + +#undef EURO diff --git a/test/unittest/valuetest.cpp b/test/unittest/valuetest.cpp index 5e142e1..f14669a 100644 --- a/test/unittest/valuetest.cpp +++ b/test/unittest/valuetest.cpp @@ -1182,6 +1182,24 @@ TEST(Value, Object) { EXPECT_TRUE(z.IsObject()); } +TEST(Value, EraseMember_String) { + Value::AllocatorType allocator; + Value x(kObjectType); + x.AddMember("A", "Apple", allocator); + x.AddMember("B", "Banana", allocator); + + EXPECT_TRUE(x.EraseMember("B")); + EXPECT_FALSE(x.HasMember("B")); + + EXPECT_FALSE(x.EraseMember("nonexist")); + + GenericValue, CrtAllocator> othername("A"); + EXPECT_TRUE(x.EraseMember(othername)); + EXPECT_FALSE(x.HasMember("A")); + + EXPECT_TRUE(x.MemberBegin() == x.MemberEnd()); +} + TEST(Value, BigNestedArray) { MemoryPoolAllocator<> allocator; Value x(kArrayType);