Fix minLength & maxLength with code point instead of code unit

This commit is contained in:
miloyip 2015-05-07 16:15:09 +08:00
parent 9c0e409ff2
commit e33049d288
3 changed files with 34 additions and 2 deletions

View File

@ -616,6 +616,22 @@ struct Transcoder<Encoding, Encoding> {
}
};
//! Returns number of code points in a encoded string.
template<typename Encoding>
bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
GenericStringStream<Encoding> is(s);
const typename Encoding::Ch* end = s + length;
SizeType count = 0;
while (is.src_ < end) {
unsigned codepoint;
if (!Encoding::Decode(is, &codepoint))
return false;
count++;
}
*outCount = count;
return true;
}
RAPIDJSON_NAMESPACE_END
#if defined(__GNUC__) || defined(_MSV_VER)

View File

@ -428,8 +428,13 @@ public:
if ((type_ & (1 << kStringSchemaType)) == 0)
return false;
if (length < minLength_ || length > maxLength_)
//if (length < minLength_ || length > maxLength_)
// return false;
if (minLength_ != 0 || maxLength_ != SizeType(~0)) {
SizeType count;
if (CountStringCodePoint<Encoding>(str, length, &count) && (count < minLength_ || count > maxLength_))
return false;
}
#if RAPIDJSON_SCHEMA_HAS_REGEX
if (pattern_ && !IsPatternMatch(*pattern_, str, length))

View File

@ -424,3 +424,14 @@ TEST(EncodingsTest, UTF32) {
}
}
}
TEST(EncodingsTest, CountStringCodePoint) {
SizeType count;
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("", 0, &count));
EXPECT_EQ(0u, count);
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("Hello", 5, &count));
EXPECT_EQ(5u, count);
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("\xC2\xA2\xE2\x82\xAC\xF0\x9D\x84\x9E", 9, &count)); // cents euro G-clef
EXPECT_EQ(3u, count);
EXPECT_FALSE(CountStringCodePoint<UTF8<> >("\xC2\xA2\xE2\x82\xAC\xF0\x9D\x84\x9E\x80", 10, &count));
}