From 055f1fa61e9745da2705d4b2714785aef7a3af97 Mon Sep 17 00:00:00 2001 From: ylavic Date: Mon, 10 Dec 2018 21:47:43 +0100 Subject: [PATCH 1/4] Add less than operator to Pointer. Allows to sort pointers in (std-)containers and/or index by them. --- include/rapidjson/pointer.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index 3d339f2..54a8c9a 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -356,6 +356,39 @@ public: */ bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); } + //! Less than operator. + /*! + \note Invalid pointers are never lesser than valid ones. + */ + bool operator<(const GenericPointer& rhs) const { + if (!IsValid()) + return false; + if (!rhs.IsValid()) + return true; + + size_t i = 0, lCount = tokenCount_, rCount = rhs.tokenCount_; + for (;;) { + if (!rCount) + return false; + if (!lCount) + return true; + + if (tokens_[i].index != rhs.tokens_[i].index) + return tokens_[i].index < rhs.tokens_[i].index; + + if (tokens_[i].length > rhs.tokens_[i].length) + return std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * rhs.tokens_[i].length) < 0; + + int cmp = std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length); + if (cmp || tokens_[i].length != rhs.tokens_[i].length) + return cmp <= 0; + + lCount--; + rCount--; + i++; + } + } + //@} //!@name Stringify From af17f196c69bb3a7ff86f6709574fabf41b79805 Mon Sep 17 00:00:00 2001 From: ylavic Date: Tue, 11 Dec 2018 00:19:13 +0100 Subject: [PATCH 2/4] Unit test for Pointer::operator<(). --- test/unittest/pointertest.cpp | 67 +++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp index 1280c86..401bf7d 100644 --- a/test/unittest/pointertest.cpp +++ b/test/unittest/pointertest.cpp @@ -15,7 +15,9 @@ #include "unittest.h" #include "rapidjson/pointer.h" #include "rapidjson/stringbuffer.h" +#include "rapidjson/ostreamwrapper.h" #include +#include using namespace rapidjson; @@ -1493,6 +1495,71 @@ TEST(Pointer, Ambiguity) { } } +TEST(Pointer, LessThan) { + static const char *pointers[] = { + "/a/b", + "/a/c", + "/a", + "/d/1", + "/d/2/z", + "/d/2/3", + "/d/2", + "/d/2/zz", + "/d/1", + "/d/2/z", + "/e/f~g", + "/e/f~0g", + "/e/f~1g", + "/e/f~~g", + "#/e/f%2fg", + "/e/f.g", + "" + }; + static struct { + const char *string; + bool valid; + } ordered_pointers[] = { + { "", true }, + { "/a", true }, + { "/a/b", true }, + { "/a/c", true }, + { "/d/1", true }, + { "/d/1", true }, + { "/d/2", true }, + { "/d/2/3", true }, + { "/d/2/z", true }, + { "/d/2/z", true }, + { "/d/2/zz", true }, + { "/e/f.g", true }, + { "/e/f~1g", true }, + { "/e/f~1g", true }, // was "#/e/f%2fg" + { "/e/f~0g", true }, + { "/e/f~g", false }, + { "/e/f~~g", false } + }; + MemoryPoolAllocator<> allocator; + typedef GenericPointer > PooledPointer; + std::multiset set; + size_t i; + + for (i = 0; i < sizeof(pointers) / sizeof(*pointers); ++i) { + set.insert(PooledPointer(pointers[i], &allocator)); + } + + i = 0; + for (std::set::iterator it = set.begin(); it != set.end(); ++it) { + EXPECT_TRUE(i < sizeof(ordered_pointers) / sizeof(*ordered_pointers)); + EXPECT_EQ(it->IsValid(), ordered_pointers[i].valid); + if (it->IsValid()) { + std::stringstream ss; + OStreamWrapper os(ss); + EXPECT_TRUE(it->Stringify(os)); + EXPECT_EQ(ss.str(), ordered_pointers[i].string); + } + i++; + } +} + // https://github.com/Tencent/rapidjson/issues/483 namespace myjson { From 0e34ed43f40fa522de4e88e3d817882cef3b6c40 Mon Sep 17 00:00:00 2001 From: ylavic Date: Tue, 11 Dec 2018 00:40:05 +0100 Subject: [PATCH 3/4] Rework Pointer::operator<() loop. I must be too dumb to understand the mess MSVC (32bit only) did with the previous loop, and to figure out how it might have make it never end. Anyway, hopefully any compiler can grok this new loop... --- include/rapidjson/pointer.h | 31 ++++++++++++------------------- test/unittest/pointertest.cpp | 25 +++++++++++++++---------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index 54a8c9a..97a376a 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -358,7 +358,7 @@ public: //! Less than operator. /*! - \note Invalid pointers are never lesser than valid ones. + \note Invalid pointers are always greater than valid ones. */ bool operator<(const GenericPointer& rhs) const { if (!IsValid()) @@ -366,27 +366,20 @@ public: if (!rhs.IsValid()) return true; - size_t i = 0, lCount = tokenCount_, rCount = rhs.tokenCount_; - for (;;) { - if (!rCount) - return false; - if (!lCount) - return true; + const Token *lTok = tokens_, *const lEnd = lTok + tokenCount_, + *rTok = rhs.tokens_, *const rEnd = rTok + rhs.tokenCount_; + for (; lTok != lEnd && rTok != rEnd; ++lTok, ++rTok) { + if (lTok->index != rTok->index) + return lTok->index < rTok->index; - if (tokens_[i].index != rhs.tokens_[i].index) - return tokens_[i].index < rhs.tokens_[i].index; + if (lTok->length > rTok->length) + return std::memcmp(lTok->name, rTok->name, sizeof(Ch) * rTok->length) < 0; - if (tokens_[i].length > rhs.tokens_[i].length) - return std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * rhs.tokens_[i].length) < 0; - - int cmp = std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length); - if (cmp || tokens_[i].length != rhs.tokens_[i].length) - return cmp <= 0; - - lCount--; - rCount--; - i++; + int comp = std::memcmp(lTok->name, rTok->name, sizeof(Ch) * lTok->length); + if (comp || lTok->length != rTok->length) + return comp <= 0; } + return rTok != rEnd; } //@} diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp index 401bf7d..8d6f187 100644 --- a/test/unittest/pointertest.cpp +++ b/test/unittest/pointertest.cpp @@ -1515,8 +1515,8 @@ TEST(Pointer, LessThan) { "/e/f.g", "" }; - static struct { - const char *string; + static const struct { + const char *str; bool valid; } ordered_pointers[] = { { "", true }, @@ -1537,24 +1537,29 @@ TEST(Pointer, LessThan) { { "/e/f~g", false }, { "/e/f~~g", false } }; - MemoryPoolAllocator<> allocator; - typedef GenericPointer > PooledPointer; - std::multiset set; + typedef MemoryPoolAllocator<> AllocatorType; + typedef GenericPointer PointerType; + typedef std::multiset PointerSet; + AllocatorType allocator; + PointerSet set; size_t i; - for (i = 0; i < sizeof(pointers) / sizeof(*pointers); ++i) { - set.insert(PooledPointer(pointers[i], &allocator)); + EXPECT_EQ(sizeof(pointers) / sizeof(pointers[0]), + sizeof(ordered_pointers) / sizeof(ordered_pointers[0])); + + for (i = 0; i < sizeof(pointers) / sizeof(pointers[0]); ++i) { + set.insert(PointerType(pointers[i], &allocator)); } i = 0; - for (std::set::iterator it = set.begin(); it != set.end(); ++it) { - EXPECT_TRUE(i < sizeof(ordered_pointers) / sizeof(*ordered_pointers)); + for (PointerSet::iterator it = set.begin(); it != set.end(); ++it) { + EXPECT_TRUE(i < sizeof(ordered_pointers) / sizeof(ordered_pointers[0])); EXPECT_EQ(it->IsValid(), ordered_pointers[i].valid); if (it->IsValid()) { std::stringstream ss; OStreamWrapper os(ss); EXPECT_TRUE(it->Stringify(os)); - EXPECT_EQ(ss.str(), ordered_pointers[i].string); + EXPECT_EQ(ss.str(), ordered_pointers[i].str); } i++; } From eb6ee17d2dcc596806875b2167cf8bf54f37e425 Mon Sep 17 00:00:00 2001 From: ylavic Date: Wed, 12 Dec 2018 22:32:56 +0100 Subject: [PATCH 4/4] Speed up Pointer::operator<(). Speed is more important than alphabetical order (which makes few sense in JSON in general, and with pointers especially). The use case is indexing in std containers, i.e. O(log n) with rbtree, so the faster comparison the better. --- include/rapidjson/pointer.h | 23 ++++---- test/unittest/pointertest.cpp | 99 ++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/include/rapidjson/pointer.h b/include/rapidjson/pointer.h index 97a376a..51805a6 100644 --- a/include/rapidjson/pointer.h +++ b/include/rapidjson/pointer.h @@ -366,20 +366,21 @@ public: if (!rhs.IsValid()) return true; - const Token *lTok = tokens_, *const lEnd = lTok + tokenCount_, - *rTok = rhs.tokens_, *const rEnd = rTok + rhs.tokenCount_; - for (; lTok != lEnd && rTok != rEnd; ++lTok, ++rTok) { - if (lTok->index != rTok->index) - return lTok->index < rTok->index; + if (tokenCount_ != rhs.tokenCount_) + return tokenCount_ < rhs.tokenCount_; - if (lTok->length > rTok->length) - return std::memcmp(lTok->name, rTok->name, sizeof(Ch) * rTok->length) < 0; + for (size_t i = 0; i < tokenCount_; i++) { + if (tokens_[i].index != rhs.tokens_[i].index) + return tokens_[i].index < rhs.tokens_[i].index; - int comp = std::memcmp(lTok->name, rTok->name, sizeof(Ch) * lTok->length); - if (comp || lTok->length != rTok->length) - return comp <= 0; + if (tokens_[i].length != rhs.tokens_[i].length) + return tokens_[i].length < rhs.tokens_[i].length; + + if (int cmp = std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch) * tokens_[i].length)) + return cmp < 0; } - return rTok != rEnd; + + return false; } //@} diff --git a/test/unittest/pointertest.cpp b/test/unittest/pointertest.cpp index 8d6f187..d2dc63a 100644 --- a/test/unittest/pointertest.cpp +++ b/test/unittest/pointertest.cpp @@ -17,7 +17,7 @@ #include "rapidjson/stringbuffer.h" #include "rapidjson/ostreamwrapper.h" #include -#include +#include using namespace rapidjson; @@ -1496,72 +1496,75 @@ TEST(Pointer, Ambiguity) { } TEST(Pointer, LessThan) { - static const char *pointers[] = { - "/a/b", - "/a/c", - "/a", - "/d/1", - "/d/2/z", - "/d/2/3", - "/d/2", - "/d/2/zz", - "/d/1", - "/d/2/z", - "/e/f~g", - "/e/f~0g", - "/e/f~1g", - "/e/f~~g", - "#/e/f%2fg", - "/e/f.g", - "" - }; static const struct { const char *str; bool valid; - } ordered_pointers[] = { - { "", true }, - { "/a", true }, - { "/a/b", true }, - { "/a/c", true }, - { "/d/1", true }, - { "/d/1", true }, - { "/d/2", true }, - { "/d/2/3", true }, - { "/d/2/z", true }, - { "/d/2/z", true }, - { "/d/2/zz", true }, - { "/e/f.g", true }, - { "/e/f~1g", true }, - { "/e/f~1g", true }, // was "#/e/f%2fg" - { "/e/f~0g", true }, - { "/e/f~g", false }, - { "/e/f~~g", false } + } pointers[] = { + { "/a/b", true }, + { "/a", true }, + { "/d/1", true }, + { "/d/2/z", true }, + { "/d/2/3", true }, + { "/d/2", true }, + { "/a/c", true }, + { "/e/f~g", false }, + { "/d/2/zz", true }, + { "/d/1", true }, + { "/d/2/z", true }, + { "/e/f~~g", false }, + { "/e/f~0g", true }, + { "/e/f~1g", true }, + { "/e/f.g", true }, + { "", true } + }; + static const char *ordered_pointers[] = { + "", + "/a", + "/a/b", + "/a/c", + "/d/1", + "/d/1", + "/d/2", + "/e/f.g", + "/e/f~1g", + "/e/f~0g", + "/d/2/3", + "/d/2/z", + "/d/2/z", + "/d/2/zz", + NULL, // was invalid "/e/f~g" + NULL // was invalid "/e/f~~g" }; typedef MemoryPoolAllocator<> AllocatorType; typedef GenericPointer PointerType; - typedef std::multiset PointerSet; + typedef std::multimap PointerMap; + PointerMap map; + PointerMap::iterator it; AllocatorType allocator; - PointerSet set; size_t i; EXPECT_EQ(sizeof(pointers) / sizeof(pointers[0]), sizeof(ordered_pointers) / sizeof(ordered_pointers[0])); for (i = 0; i < sizeof(pointers) / sizeof(pointers[0]); ++i) { - set.insert(PointerType(pointers[i], &allocator)); + it = map.insert(PointerMap::value_type(PointerType(pointers[i].str, &allocator), i)); + if (!it->first.IsValid()) { + EXPECT_EQ(++it, map.end()); + } } - i = 0; - for (PointerSet::iterator it = set.begin(); it != set.end(); ++it) { + for (i = 0, it = map.begin(); it != map.end(); ++it, ++i) { + EXPECT_TRUE(it->second < sizeof(pointers) / sizeof(pointers[0])); + EXPECT_EQ(it->first.IsValid(), pointers[it->second].valid); EXPECT_TRUE(i < sizeof(ordered_pointers) / sizeof(ordered_pointers[0])); - EXPECT_EQ(it->IsValid(), ordered_pointers[i].valid); - if (it->IsValid()) { + EXPECT_EQ(it->first.IsValid(), !!ordered_pointers[i]); + if (it->first.IsValid()) { std::stringstream ss; OStreamWrapper os(ss); - EXPECT_TRUE(it->Stringify(os)); - EXPECT_EQ(ss.str(), ordered_pointers[i].str); + EXPECT_TRUE(it->first.Stringify(os)); + EXPECT_EQ(ss.str(), pointers[it->second].str); + EXPECT_EQ(ss.str(), ordered_pointers[i]); } - i++; } }