From b2340077131ee918e9a401dcf70fee47b35e51e3 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 13 Jul 2014 13:16:03 +0800 Subject: [PATCH 01/11] Fixes #66 by adding Writer::Reset() and multiple root check Note it redefines RAPIDJSON_ASSERT() to throw exception in unittest and check for assertion with gtest. --- include/rapidjson/prettywriter.h | 31 ++++++------ include/rapidjson/writer.h | 69 ++++++++++++++----------- test/unittest/unittest.h | 18 +++++++ test/unittest/writertest.cpp | 86 ++++++++++++++++++++++++++++++-- 4 files changed, 158 insertions(+), 46 deletions(-) diff --git a/include/rapidjson/prettywriter.h b/include/rapidjson/prettywriter.h index f01e53e..02b9420 100644 --- a/include/rapidjson/prettywriter.h +++ b/include/rapidjson/prettywriter.h @@ -78,13 +78,13 @@ public: bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; if (!empty) { - Base::os_.Put('\n'); + Base::os_->Put('\n'); WriteIndent(); } if (!Base::WriteEndObject()) return false; if (Base::level_stack_.Empty()) // end of json text - Base::os_.Flush(); + Base::os_->Flush(); return true; } @@ -101,13 +101,13 @@ public: bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; if (!empty) { - Base::os_.Put('\n'); + Base::os_->Put('\n'); WriteIndent(); } if (!Base::WriteEndArray()) return false; if (Base::level_stack_.Empty()) // end of json text - Base::os_.Flush(); + Base::os_->Flush(); return true; } @@ -137,26 +137,26 @@ protected: if (level->inArray) { if (level->valueCount > 0) { - Base::os_.Put(','); // add comma if it is not the first element in array - Base::os_.Put('\n'); + Base::os_->Put(','); // add comma if it is not the first element in array + Base::os_->Put('\n'); } else - Base::os_.Put('\n'); + Base::os_->Put('\n'); WriteIndent(); } else { // in object if (level->valueCount > 0) { if (level->valueCount % 2 == 0) { - Base::os_.Put(','); - Base::os_.Put('\n'); + Base::os_->Put(','); + Base::os_->Put('\n'); } else { - Base::os_.Put(':'); - Base::os_.Put(' '); + Base::os_->Put(':'); + Base::os_->Put(' '); } } else - Base::os_.Put('\n'); + Base::os_->Put('\n'); if (level->valueCount % 2 == 0) WriteIndent(); @@ -165,13 +165,16 @@ protected: RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name level->valueCount++; } - else + else { RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + RAPIDJSON_ASSERT(!Base::hasRoot_); // Should only has one and only one root. + Base::hasRoot_ = true; + } } void WriteIndent() { size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; - PutN(Base::os_, indentChar_, count); + PutN(*Base::os_, indentChar_, count); } Ch indentChar_; diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index f3b7567..c312b74 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -41,8 +41,15 @@ public: \param levelDepth Initial capacity of stack. */ Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : - os_(os), level_stack_(allocator, levelDepth * sizeof(Level)), - doublePrecision_(kDefaultDoublePrecision) {} + os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)), + doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {} + + void Reset(OutputStream& os) { + os_ = &os; + doublePrecision_ = kDefaultDoublePrecision; + hasRoot_ = false; + level_stack_.Clear(); + } //! Set the number of significant digits for \c double values /*! When writing a \c double value to the \c OutputStream, the number @@ -103,7 +110,7 @@ public: level_stack_.template Pop(1); bool ret = WriteEndObject(); if (level_stack_.Empty()) // end of json text - os_.Flush(); + os_->Flush(); return ret; } @@ -120,7 +127,7 @@ public: level_stack_.template Pop(1); bool ret = WriteEndArray(); if (level_stack_.Empty()) // end of json text - os_.Flush(); + os_->Flush(); return ret; } //@} @@ -161,22 +168,22 @@ protected: static const size_t kDefaultLevelDepth = 32; bool WriteNull() { - os_.Put('n'); os_.Put('u'); os_.Put('l'); os_.Put('l'); return true; + os_->Put('n'); os_->Put('u'); os_->Put('l'); os_->Put('l'); return true; } bool WriteBool(bool b) { if (b) { - os_.Put('t'); os_.Put('r'); os_.Put('u'); os_.Put('e'); + os_->Put('t'); os_->Put('r'); os_->Put('u'); os_->Put('e'); } else { - os_.Put('f'); os_.Put('a'); os_.Put('l'); os_.Put('s'); os_.Put('e'); + os_->Put('f'); os_->Put('a'); os_->Put('l'); os_->Put('s'); os_->Put('e'); } return true; } bool WriteInt(int i) { if (i < 0) { - os_.Put('-'); + os_->Put('-'); i = -i; } return WriteUint((unsigned)i); @@ -192,14 +199,14 @@ protected: do { --p; - os_.Put(*p); + os_->Put(*p); } while (p != buffer); return true; } bool WriteInt64(int64_t i64) { if (i64 < 0) { - os_.Put('-'); + os_->Put('-'); i64 = -i64; } WriteUint64((uint64_t)i64); @@ -216,7 +223,7 @@ protected: do { --p; - os_.Put(*p); + os_->Put(*p); } while (p != buffer); return true; } @@ -233,7 +240,7 @@ protected: int ret = RAPIDJSON_SNPRINTF(buffer, sizeof(buffer), "%.*g", doublePrecision_, d); RAPIDJSON_ASSERT(ret >= 1); for (int i = 0; i < ret; i++) - os_.Put(buffer[i]); + os_->Put(buffer[i]); return true; } #undef RAPIDJSON_SNPRINTF @@ -252,32 +259,32 @@ protected: #undef Z16 }; - os_.Put('\"'); + os_->Put('\"'); GenericStringStream is(str); while (is.Tell() < length) { const Ch c = is.Peek(); if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) { is.Take(); - os_.Put('\\'); - os_.Put(escape[(unsigned char)c]); + os_->Put('\\'); + os_->Put(escape[(unsigned char)c]); if (escape[(unsigned char)c] == 'u') { - os_.Put('0'); - os_.Put('0'); - os_.Put(hexDigits[(unsigned char)c >> 4]); - os_.Put(hexDigits[(unsigned char)c & 0xF]); + os_->Put('0'); + os_->Put('0'); + os_->Put(hexDigits[(unsigned char)c >> 4]); + os_->Put(hexDigits[(unsigned char)c & 0xF]); } } else - Transcoder::Transcode(is, os_); + Transcoder::Transcode(is, *os_); } - os_.Put('\"'); + os_->Put('\"'); return true; } - bool WriteStartObject() { os_.Put('{'); return true; } - bool WriteEndObject() { os_.Put('}'); return true; } - bool WriteStartArray() { os_.Put('['); return true; } - bool WriteEndArray() { os_.Put(']'); return true; } + bool WriteStartObject() { os_->Put('{'); return true; } + bool WriteEndObject() { os_->Put('}'); return true; } + bool WriteStartArray() { os_->Put('['); return true; } + bool WriteEndArray() { os_->Put(']'); return true; } void Prefix(Type type) { (void)type; @@ -285,21 +292,25 @@ protected: Level* level = level_stack_.template Top(); if (level->valueCount > 0) { if (level->inArray) - os_.Put(','); // add comma if it is not the first element in array + os_->Put(','); // add comma if it is not the first element in array else // in object - os_.Put((level->valueCount % 2 == 0) ? ',' : ':'); + os_->Put((level->valueCount % 2 == 0) ? ',' : ':'); } if (!level->inArray && level->valueCount % 2 == 0) RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name level->valueCount++; } - else + else { RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root. + hasRoot_ = true; + } } - OutputStream& os_; + OutputStream* os_; internal::Stack level_stack_; int doublePrecision_; + bool hasRoot_; static const int kDefaultDoublePrecision = 6; diff --git a/test/unittest/unittest.h b/test/unittest/unittest.h index 0234319..ad87d72 100644 --- a/test/unittest/unittest.h +++ b/test/unittest/unittest.h @@ -56,4 +56,22 @@ inline void TempFilename(char *filename) { filename[i] = filename[i + 1]; } +// Use exception for catching assert +#if _MSC_VER +#pragma warning(disable : 4127) +#endif + +class AssertException : public std::exception { +public: + AssertException(const char* w) : what_(w) {} + AssertException(const AssertException& other) : what_(other.what_) {} + AssertException& operator=(const AssertException& rhs) { what_ = rhs.what_; return *this; } + virtual const char* what() const throw() { return what_; } + +private: + const char* what_; +}; + +#define RAPIDJSON_ASSERT(x) if (!(x)) throw AssertException(RAPIDJSON_STRINGIFY(x)) + #endif // UNITTEST_H_ diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp index be3d77b..c0e83f5 100644 --- a/test/unittest/writertest.cpp +++ b/test/unittest/writertest.cpp @@ -1,4 +1,5 @@ #include "unittest.h" + #include "rapidjson/document.h" #include "rapidjson/reader.h" #include "rapidjson/writer.h" @@ -80,9 +81,10 @@ TEST(Writer,DoublePrecision) { reader.Parse<0>(s, writer.SetDoublePrecision(12)); EXPECT_EQ(writer.GetDoublePrecision(), 12); EXPECT_STREQ(json, buffer.GetString()); - buffer.Clear(); } { // explicit individual double precisions + buffer.Clear(); + writer.Reset(buffer); writer.SetDoublePrecision(2); writer.StartArray(); writer.Double(1.2345, 5); @@ -93,11 +95,12 @@ TEST(Writer,DoublePrecision) { EXPECT_EQ(writer.GetDoublePrecision(), 2); EXPECT_STREQ(json, buffer.GetString()); - buffer.Clear(); } { // write with default precision (output with precision loss) Document d; d.Parse<0>(json); + buffer.Clear(); + writer.Reset(buffer); d.Accept(writer.SetDoublePrecision()); // parsed again to avoid platform-dependent floating point outputs @@ -108,7 +111,6 @@ TEST(Writer,DoublePrecision) { EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457); EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457); EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570); - buffer.Clear(); } } @@ -160,3 +162,81 @@ TEST(Writer, OStreamWrapper) { std::string actual = ss.str(); EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3]}", actual.c_str()); } + +TEST(Writer, AssertRootMustBeArrayOrObject) { +#define T(x)\ + {\ + StringBuffer buffer;\ + Writer writer(buffer);\ + ASSERT_THROW(x, AssertException);\ + } + T(writer.Bool(false)); + T(writer.Bool(true)); + T(writer.Null()); + T(writer.Int(0)); + T(writer.Uint(0)); + T(writer.Int64(0)); + T(writer.Uint64(0)); + T(writer.Double(0)); + T(writer.String("foo")); +#undef T +} + +TEST(Writer, AssertIncorrectObjectLevel) { + StringBuffer buffer; + Writer writer(buffer); + writer.StartObject(); + writer.EndObject(); + ASSERT_THROW(writer.EndObject(), AssertException); +} + +TEST(Writer, AssertIncorrectArrayLevel) { + StringBuffer buffer; + Writer writer(buffer); + writer.StartArray(); + writer.EndArray(); + ASSERT_THROW(writer.EndArray(), AssertException); +} + +TEST(Writer, AssertIncorrectEndObject) { + StringBuffer buffer; + Writer writer(buffer); + writer.StartObject(); + ASSERT_THROW(writer.EndArray(), AssertException); +} + +TEST(Writer, AssertIncorrectEndArray) { + StringBuffer buffer; + Writer writer(buffer); + writer.StartObject(); + ASSERT_THROW(writer.EndArray(), AssertException); +} + +TEST(Writer, AssertObjectKeyNotString) { +#define T(x)\ + {\ + StringBuffer buffer;\ + Writer writer(buffer);\ + writer.StartObject();\ + ASSERT_THROW(x, AssertException); \ + } + T(writer.Bool(false)); + T(writer.Bool(true)); + T(writer.Null()); + T(writer.Int(0)); + T(writer.Uint(0)); + T(writer.Int64(0)); + T(writer.Uint64(0)); + T(writer.Double(0)); + T(writer.StartObject()); + T(writer.StartArray()); +#undef T +} + +TEST(Writer, AssertMultipleRoot) { + StringBuffer buffer; + Writer writer(buffer); + writer.StartObject(); + writer.EndObject(); + ASSERT_THROW(writer.StartObject(), AssertException); +} From 5babae98e5c75f6a34f425be9f1f436ec761d3c7 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 13 Jul 2014 13:21:25 +0800 Subject: [PATCH 02/11] Add Writer::IsComplete() --- include/rapidjson/writer.h | 4 ++++ test/unittest/writertest.cpp | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index c312b74..22630c4 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -51,6 +51,10 @@ public: level_stack_.Clear(); } + bool IsComplete() const { + return hasRoot_ && level_stack_.Empty(); + } + //! Set the number of significant digits for \c double values /*! When writing a \c double value to the \c OutputStream, the number of significant digits is limited to 6 by default. diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp index c0e83f5..64dbf24 100644 --- a/test/unittest/writertest.cpp +++ b/test/unittest/writertest.cpp @@ -15,6 +15,7 @@ TEST(Writer, Compact) { reader.Parse<0>(s, writer); EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3]}", buffer.GetString()); EXPECT_EQ(77u, buffer.GetSize()); + EXPECT_TRUE(writer.IsComplete()); } // json -> parse -> writer -> json @@ -26,6 +27,7 @@ TEST(Writer, Compact) { Reader reader; \ reader.Parse<0>(s, writer); \ EXPECT_STREQ(json, buffer.GetString()); \ + EXPECT_TRUE(writer.IsComplete()); \ } TEST(Writer, Int) { @@ -121,6 +123,7 @@ TEST(Writer, Transcode) { Writer, UTF8<> > writer(buffer); GenericReader, UTF16<> > reader; reader.Parse<0>(s, writer); + EXPECT_TRUE(writer.IsComplete()); EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}", buffer.GetString()); } @@ -240,3 +243,31 @@ TEST(Writer, AssertMultipleRoot) { writer.EndObject(); ASSERT_THROW(writer.StartObject(), AssertException); } + +TEST(Writer, RootObjectIsComplete) { + StringBuffer buffer; + Writer writer(buffer); + EXPECT_FALSE(writer.IsComplete()); + writer.StartObject(); + EXPECT_FALSE(writer.IsComplete()); + writer.String("foo"); + EXPECT_FALSE(writer.IsComplete()); + writer.Int(1); + EXPECT_FALSE(writer.IsComplete()); + writer.EndObject(); + EXPECT_TRUE(writer.IsComplete()); +} + +TEST(Writer, RootArrayIsComplete) { + StringBuffer buffer; + Writer writer(buffer); + EXPECT_FALSE(writer.IsComplete()); + writer.StartArray(); + EXPECT_FALSE(writer.IsComplete()); + writer.String("foo"); + EXPECT_FALSE(writer.IsComplete()); + writer.Int(1); + EXPECT_FALSE(writer.IsComplete()); + writer.EndArray(); + EXPECT_TRUE(writer.IsComplete()); +} \ No newline at end of file From 27101d9cd1b586a955f9edb83504194f9fb3f110 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 13 Jul 2014 13:27:15 +0800 Subject: [PATCH 03/11] Add API doc for Writer::Reset() and Writer::IsComplete() --- include/rapidjson/writer.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index 22630c4..468ad90 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -44,6 +44,24 @@ public: os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)), doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {} + //! Reset the writer with a new stream. + /*! + This function reset the writer with a new stream and default settings, + in order to make a Writer object reusable for output multiple JSONs. + + \param os New output stream. + \code + Writer writer(os1); + writer.StartObject(); + // ... + writer.EndObject(); + + writer.Reset(os2); + writer.StartObject(); + // ... + writer.EndObject(); + \endcode + */ void Reset(OutputStream& os) { os_ = &os; doublePrecision_ = kDefaultDoublePrecision; @@ -51,6 +69,10 @@ public: level_stack_.Clear(); } + //! Checks whether the output is a complete JSON. + /*! + A complete JSON has a complete root object or array. + */ bool IsComplete() const { return hasRoot_ && level_stack_.Empty(); } From b5436f710417675e0b16d0045215d2d68f3d9d31 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 13 Jul 2014 23:51:56 +0800 Subject: [PATCH 04/11] Add ASCII encoding which can fulfill #69 --- include/rapidjson/encodings.h | 68 +++++++++++++++++++++++++++++++++++ include/rapidjson/writer.h | 34 +++++++++++++++++- test/unittest/writertest.cpp | 33 +++++++++++++---- 3 files changed, 128 insertions(+), 7 deletions(-) diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h index dcf21e4..fd8689d 100644 --- a/include/rapidjson/encodings.h +++ b/include/rapidjson/encodings.h @@ -23,6 +23,8 @@ namespace rapidjson { concept Encoding { typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. + enum { supportUnicode = 1 }; // or 0 if not supporting unicode + //! \brief Encode a Unicode codepoint to an output stream. //! \param os Output stream. //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. @@ -78,6 +80,8 @@ template struct UTF8 { typedef CharType Ch; + enum { supportUnicode = 1 }; + template static void Encode(OutputStream& os, unsigned codepoint) { if (codepoint <= 0x7F) @@ -222,6 +226,8 @@ struct UTF16 { typedef CharType Ch; RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); + enum { supportUnicode = 1 }; + template static void Encode(OutputStream& os, unsigned codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); @@ -351,6 +357,8 @@ struct UTF32 { typedef CharType Ch; RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); + enum { supportUnicode = 1 }; + template static void Encode(OutputStream& os, unsigned codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); @@ -447,6 +455,66 @@ struct UTF32BE : UTF32 { } }; +/////////////////////////////////////////////////////////////////////////////// +// ASCII + +//! ASCII encoding. +/*! http://en.wikipedia.org/wiki/ASCII + \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. + \note implements Encoding concept +*/ +template +struct ASCII { + typedef CharType Ch; + + enum { supportUnicode = 0 }; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + os.Put(static_cast(codepoint & 0xFF)); + } + + template + static bool Decode(InputStream& is, unsigned* codepoint) { + unsigned char c = static_cast(is.Take()); + *codepoint = c; + return c <= 0X7F; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + unsigned char c = is.Take(); + os.Put(c); + return c <= 0x7F; + } + + template + static CharType TakeBOM(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + Ch c = Take(is); + return c; + } + + template + static Ch Take(InputByteStream& is) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); + return is.Take(); + } + + template + static void PutBOM(OutputByteStream& os) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + (void)os; + } + + template + static void Put(OutputByteStream& os, Ch c) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); + os.Put(static_cast(c)); + } +}; + /////////////////////////////////////////////////////////////////////////////// // AutoUTF diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index f3b7567..73677c1 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -256,7 +256,39 @@ protected: GenericStringStream is(str); while (is.Tell() < length) { const Ch c = is.Peek(); - if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) { + if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) { + // Unicode escaping + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + os_.Put('\\'); + os_.Put('u'); + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + os_.Put(hexDigits[(codepoint >> 12) & 15]); + os_.Put(hexDigits[(codepoint >> 8) & 15]); + os_.Put(hexDigits[(codepoint >> 4) & 15]); + os_.Put(hexDigits[(codepoint ) & 15]); + } + else if (codepoint >= 0x010000 && codepoint <= 0x10FFFF) { + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + os_.Put(hexDigits[(lead >> 12) & 15]); + os_.Put(hexDigits[(lead >> 8) & 15]); + os_.Put(hexDigits[(lead >> 4) & 15]); + os_.Put(hexDigits[(lead ) & 15]); + os_.Put('\\'); + os_.Put('u'); + os_.Put(hexDigits[(trail >> 12) & 15]); + os_.Put(hexDigits[(trail >> 8) & 15]); + os_.Put(hexDigits[(trail >> 4) & 15]); + os_.Put(hexDigits[(trail ) & 15]); + } + else + return false; // invalid code point + } + else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) { is.Take(); os_.Put('\\'); os_.Put(escape[(unsigned char)c]); diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp index be3d77b..b9a4891 100644 --- a/test/unittest/writertest.cpp +++ b/test/unittest/writertest.cpp @@ -113,13 +113,34 @@ TEST(Writer,DoublePrecision) { } TEST(Writer, Transcode) { + const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}"; + // UTF8 -> UTF16 -> UTF8 - StringStream s("{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3], \"dollar\":\"\x24\", \"cents\":\"\xC2\xA2\", \"euro\":\"\xE2\x82\xAC\", \"gclef\":\"\xF0\x9D\x84\x9E\" } "); - StringBuffer buffer; - Writer, UTF8<> > writer(buffer); - GenericReader, UTF16<> > reader; - reader.Parse<0>(s, writer); - EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}", buffer.GetString()); + { + StringStream s(json); + StringBuffer buffer; + Writer, UTF8<> > writer(buffer); + GenericReader, UTF16<> > reader; + reader.Parse(s, writer); + EXPECT_STREQ(json, buffer.GetString()); + } + + // UTF8 -> UTF8 -> ASCII -> UTF8 -> UTF8 + { + StringStream s(json); + StringBuffer buffer; + Writer, ASCII<> > writer(buffer); + Reader reader; + reader.Parse(s, writer); + + StringBuffer buffer2; + Writer writer2(buffer2); + GenericReader, UTF8<> > reader2; + StringStream s2(buffer.GetString()); + reader2.Parse(s2, writer2); + + EXPECT_STREQ(json, buffer2.GetString()); + } } #include From a37a1881a5cee2fdb1a529a49d87d66d91a0dfdf Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Mon, 14 Jul 2014 00:03:40 +0800 Subject: [PATCH 05/11] Fixes missing supportUnicode in AutoUTF --- include/rapidjson/encodings.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h index fd8689d..8982461 100644 --- a/include/rapidjson/encodings.h +++ b/include/rapidjson/encodings.h @@ -534,6 +534,8 @@ template struct AutoUTF { typedef CharType Ch; + enum { supportUnicode = 1 }; + #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x template From 065e3b16283e92661425f08e295228a7fc3f0369 Mon Sep 17 00:00:00 2001 From: miloyip Date: Mon, 14 Jul 2014 13:54:15 +0800 Subject: [PATCH 06/11] Do not assume little endian when detection fail, add more detections --- include/rapidjson/rapidjson.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index 0177b8c..d0377fe 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -55,20 +55,33 @@ /*! GCC provided macro for detecting endianness of the target machine. But other compilers may not have this. User can define RAPIDJSON_ENDIAN to either \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN. + + Implemented with reference to + https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp */ #ifndef RAPIDJSON_ENDIAN -#ifdef __BYTE_ORDER__ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN -#else -#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN -#endif // __BYTE_ORDER__ -#else -#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN // Assumes little endian otherwise. -#endif +# ifdef __BYTE_ORDER__ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __BYTE_ORDER__ +# elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif #endif // RAPIDJSON_ENDIAN - /////////////////////////////////////////////////////////////////////////////// // RAPIDJSON_ALIGNSIZE From d5218804bf32933e5c09b92014f3d68bd5495558 Mon Sep 17 00:00:00 2001 From: miloyip Date: Mon, 14 Jul 2014 14:16:34 +0800 Subject: [PATCH 07/11] More endian detection Since __BYTE_ORDER__ only avaliable since GCC 4.6.0, add more detection. --- include/rapidjson/rapidjson.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index d0377fe..1608ce8 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -52,15 +52,16 @@ #define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine //! Endianness of the machine. -/*! GCC provided macro for detecting endianness of the target machine. But other +/*! GCC 4.6 provided macro for detecting endianness of the target machine. But other compilers may not have this. User can define RAPIDJSON_ENDIAN to either \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN. Implemented with reference to - https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + https://gcc.gnu.org/onlinedocs/gcc-4.6.0/cpp/Common-Predefined-Macros.html http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp */ #ifndef RAPIDJSON_ENDIAN +// Detect with GCC 4.6's macro # ifdef __BYTE_ORDER__ # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN @@ -69,10 +70,22 @@ # else # error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. # endif // __BYTE_ORDER__ +// Detect with GLIBC's endian.h +# elif defined(__GLIBC__) +# include +# if (__BYTE_ORDER == __LITTLE_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +# elif (__BYTE_ORDER == __BIG_ENDIAN) +# define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +# else +# error Unknown machine endianess detected. User needs to define RAPIDJSON_ENDIAN. +# endif // __GLIBC__ +// Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro # elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) # define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN # elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN) # define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +// Detect with architecture macros # elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__) # define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN # elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__) From 649db917bea96a923d1f10172d200784095e5e62 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Wed, 16 Jul 2014 01:09:30 +0800 Subject: [PATCH 08/11] Add encoding documentation --- doc/encoding.md | 137 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 4 deletions(-) diff --git a/doc/encoding.md b/doc/encoding.md index 0f04787..df3a2c8 100644 --- a/doc/encoding.md +++ b/doc/encoding.md @@ -1,9 +1,138 @@ # Encoding -## Unicode +According to [ECMA-404](http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf), -### Character Type +> (in Introduction) JSON text is a sequence of Unicode code points. -### UTF +The earlier [RFC4627](http://www.ietf.org/rfc/rfc4627.txt) stated that, -## Validation & Transcoding +> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is + UTF-8. + +> (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible. When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used. + +RapidJSON supports various encodings. It can also validate the encodings of JSON, and transconding JSON among encodings. All these features are implemented internally, without the need for external libraries (e.g. [ICU](http://site.icu-project.org/)). + +[TOC] + +# Unicode {Unicode} +From [Unicode's official website](http://www.unicode.org/standard/WhatIsUnicode.html): +> Unicode provides a unique number for every character, +> no matter what the platform, +> no matter what the program, +> no matter what the language. + +Those unique numbers are called code points, which is in the range `0x0` to `0x10FFFF`. + +## Unicode Transformation Format {UTF} + +There are various encodings for storing Unicode code points. These are called Unicode Transformation Format (UTF). RapidJSON supports the most commonly used UTFs, including + +* UTF-8: 8-bit variable-width encoding. It maps a code point to 1-4 bytes. +* UTF-16: 16-bit variable-width encoding. It maps a code point to 1-2 16-bit code units (i.e., 2-4 bytes). +* UTF-32: 32-bit fixed-width encoding. It directly maps a code point to 1 32-bit code unit (i.e. 4 bytes). + +For UTF-16 and UTF-32, the byte order (endianness) does matter. Within computer memory, they are often stored in the computer's endianness. However, when it is stored in file or transferred over network, we need to state the byte order of the byte sequence, either little-endian (LE) or big-endian (BE). + +RapidJSON provide these encodings via the structs in `rapidjson/encodings.h`: + +~~~~~~~~~~cpp +namespace rapidjson { + +template +struct UTF8; + +template +struct UTF16; + +template +struct UTF16LE; + +template +struct UTF16BE; + +template +struct UTF32; + +template +struct UTF32LE; + +template +struct UTF32BE; + +} // namespace rapidjson +~~~~~~~~~~ + +For processing text in memory, we normally use `UTF8`, `UTF16` or `UTF32`. For processing text via I/O, we may use `UTF8`, `UTF16LE`, `UTF16BE`, `UTF32LE` or `UTF32BE`. + +When using the DOM-style API, the `Encoding` template parameter in `GenericValue` and `GenericDocument` indicates the encoding to be used to represent JSON string in memory. So normally we will use `UTF8`, `UTF16` or `UTF32` for this template parameter. The choice depends on operating systems and other libraries that the application is using. For example, Windows API represents Unicode characters in UTF-16, while most Linux distributions and applications prefer UTF-8. + +Example of UTF-16 DOM declaration: + +~~~~~~~~~~cpp +typedef GenericDocument > WDocument; +typedef GenericValue > WValue; +~~~~~~~~~~ + +For a detail example, please check the example in [DOM's Encoding](doc/stream.md#Encoding) section. + +## Character Type {CharacterType} + +As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 1-4 code units for UTF-8. + +For `UTF16(LE|BE)`, `UTF32(LE|BE)`, the `CharType` must be integer type of at least 2 and 4 bytes respectively. + +Note that C++11 introduces `char16_t` and `char32_t`, which can be used for `UTF16` and `UTF32` respectively. + +## AutoUTF {AutoUTF} + +Previous encodings are statically bound in compile-time. In other words, user must know exactly which encodings will be used in the memory or streams. However, sometimes we may need to read/write files of different encodings. The encoding needed to be decided in runtime. + +`AutoUTF` is an encoding designed for this purpose. It chooses which encoding to be used according to the input or output stream. Currently, it should be used with `EncodedInputStream` and `EncodedOutputStream`. + +## ASCII {ASCII} + +Although the JSON standards did not mention about [ASCII](http://en.wikipedia.org/wiki/ASCII), sometimes we would like to write 7-bit ASCII JSON for applications that cannot handle UTF-8. Since any JSON can represent unicode characters in escaped sequence `\uXXXX`, JSON can always be encoded in ASCII. + +Here is an example for writing a UTF-8 DOM into ASCII: + +~~~~~~~~~~cpp +using namespace rapidjson; +Document d; // UTF8<> +// ... +StringBuffer buffer; +Writer > writer(buffer); +d.Accept(writer); +std::cout << buffer.GetString(); +~~~~~~~~~~ + +ASCII can be used in input stream. If the input stream contains bytes with values above 127, it will cause `kParseErrorStringInvalidEncoding` error. + +ASCII *cannot* be used in memory (encoding of `Document` or target encoding of `Reader`), as it cannot represent Unicode code points. + +# Validation & Transcoding {ValidationTranscoding} + +When RapidJSON parses a JSON, it can validate the input JSON, whether it is a valid sequence of a specified encoding. This option can be turned on by adding `kParseValidateEncodingFlag` in `parseFlags` template parameter. + +If the input encoding and output encoding is different, `Reader` and `Writer` will automatically transcode (convert) the text. In this case, `kParseValidateEncodingFlag` is not necessary, as it must decode the input sequence. And if the sequence was unable to be decoded, it must be invalid. + +## Transcoder {Transcoder} + +Although the encoding functions in RapidJSON are designed for JSON parsing/generation, user may abuse them for transcoding of non-JSON strings. + +Here is an example for transcoding a string from UTF-8 to UTF-16: + +~~~~~~~~~~cpp +#include "rapidjson/encodings.h" + +using namespace rapidjson; + +const char* s = "..."; // UTF-8 string +StringStream source(s); +GenericStringBuffer > target; + +Transcoder::Transcode, UTF16<> >(source, target) +const wchar_t* t = target.GetString(); +~~~~~~~~~~ + +You may also use `AutoUTF` and the associated streams for setting source/target encoding in runtime. From e590e0757ec8cb6f602e4668defdae2c2593b042 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Wed, 16 Jul 2014 01:21:51 +0800 Subject: [PATCH 09/11] Add missing hash tags --- doc/encoding.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/encoding.md b/doc/encoding.md index df3a2c8..cc764c2 100644 --- a/doc/encoding.md +++ b/doc/encoding.md @@ -15,7 +15,7 @@ RapidJSON supports various encodings. It can also validate the encodings of JSON [TOC] -# Unicode {Unicode} +# Unicode {#Unicode} From [Unicode's official website](http://www.unicode.org/standard/WhatIsUnicode.html): > Unicode provides a unique number for every character, > no matter what the platform, @@ -24,7 +24,7 @@ From [Unicode's official website](http://www.unicode.org/standard/WhatIsUnicode. Those unique numbers are called code points, which is in the range `0x0` to `0x10FFFF`. -## Unicode Transformation Format {UTF} +## Unicode Transformation Format {#UTF} There are various encodings for storing Unicode code points. These are called Unicode Transformation Format (UTF). RapidJSON supports the most commonly used UTFs, including @@ -76,7 +76,7 @@ typedef GenericValue > WValue; For a detail example, please check the example in [DOM's Encoding](doc/stream.md#Encoding) section. -## Character Type {CharacterType} +## Character Type {#CharacterType} As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 1-4 code units for UTF-8. @@ -84,13 +84,13 @@ For `UTF16(LE|BE)`, `UTF32(LE|BE)`, the `CharType` must be integer type of at le Note that C++11 introduces `char16_t` and `char32_t`, which can be used for `UTF16` and `UTF32` respectively. -## AutoUTF {AutoUTF} +## AutoUTF {#AutoUTF} Previous encodings are statically bound in compile-time. In other words, user must know exactly which encodings will be used in the memory or streams. However, sometimes we may need to read/write files of different encodings. The encoding needed to be decided in runtime. `AutoUTF` is an encoding designed for this purpose. It chooses which encoding to be used according to the input or output stream. Currently, it should be used with `EncodedInputStream` and `EncodedOutputStream`. -## ASCII {ASCII} +## ASCII {#ASCII} Although the JSON standards did not mention about [ASCII](http://en.wikipedia.org/wiki/ASCII), sometimes we would like to write 7-bit ASCII JSON for applications that cannot handle UTF-8. Since any JSON can represent unicode characters in escaped sequence `\uXXXX`, JSON can always be encoded in ASCII. @@ -110,13 +110,13 @@ ASCII can be used in input stream. If the input stream contains bytes with value ASCII *cannot* be used in memory (encoding of `Document` or target encoding of `Reader`), as it cannot represent Unicode code points. -# Validation & Transcoding {ValidationTranscoding} +# Validation & Transcoding {#ValidationTranscoding} When RapidJSON parses a JSON, it can validate the input JSON, whether it is a valid sequence of a specified encoding. This option can be turned on by adding `kParseValidateEncodingFlag` in `parseFlags` template parameter. If the input encoding and output encoding is different, `Reader` and `Writer` will automatically transcode (convert) the text. In this case, `kParseValidateEncodingFlag` is not necessary, as it must decode the input sequence. And if the sequence was unable to be decoded, it must be invalid. -## Transcoder {Transcoder} +## Transcoder {#Transcoder} Although the encoding functions in RapidJSON are designed for JSON parsing/generation, user may abuse them for transcoding of non-JSON strings. From 7cfe718d3d1abbb15676b8f83e001b00eb2f1473 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Wed, 16 Jul 2014 01:56:11 +0800 Subject: [PATCH 10/11] Minor update to encoding documentation --- doc/encoding.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/encoding.md b/doc/encoding.md index cc764c2..bc5c178 100644 --- a/doc/encoding.md +++ b/doc/encoding.md @@ -6,8 +6,7 @@ According to [ECMA-404](http://www.ecma-international.org/publications/files/ECM The earlier [RFC4627](http://www.ietf.org/rfc/rfc4627.txt) stated that, -> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is - UTF-8. +> (in §3) JSON text SHALL be encoded in Unicode. The default encoding is UTF-8. > (in §6) JSON may be represented using UTF-8, UTF-16, or UTF-32. When JSON is written in UTF-8, JSON is 8bit compatible. When JSON is written in UTF-16 or UTF-32, the binary content-transfer-encoding must be used. @@ -28,9 +27,9 @@ Those unique numbers are called code points, which is in the range `0x0` to `0x1 There are various encodings for storing Unicode code points. These are called Unicode Transformation Format (UTF). RapidJSON supports the most commonly used UTFs, including -* UTF-8: 8-bit variable-width encoding. It maps a code point to 1-4 bytes. -* UTF-16: 16-bit variable-width encoding. It maps a code point to 1-2 16-bit code units (i.e., 2-4 bytes). -* UTF-32: 32-bit fixed-width encoding. It directly maps a code point to 1 32-bit code unit (i.e. 4 bytes). +* UTF-8: 8-bit variable-width encoding. It maps a code point to 1–4 bytes. +* UTF-16: 16-bit variable-width encoding. It maps a code point to 1–2 16-bit code units (i.e., 2–4 bytes). +* UTF-32: 32-bit fixed-width encoding. It directly maps a code point to a single 32-bit code unit (i.e. 4 bytes). For UTF-16 and UTF-32, the byte order (endianness) does matter. Within computer memory, they are often stored in the computer's endianness. However, when it is stored in file or transferred over network, we need to state the byte order of the byte sequence, either little-endian (LE) or big-endian (BE). @@ -78,7 +77,7 @@ For a detail example, please check the example in [DOM's Encoding](doc/stream.md ## Character Type {#CharacterType} -As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 1-4 code units for UTF-8. +As shown in the declaration, each encoding has a `CharType` template parameter. Actually, it may be a little bit confusing, but each `CharType` stores a code unit, not a character (code point). As mentioned in previous section, a code point may be encoded to 1–4 code units for UTF-8. For `UTF16(LE|BE)`, `UTF32(LE|BE)`, the `CharType` must be integer type of at least 2 and 4 bytes respectively. From 9eda05c2862c600fd65d15e24b50a2bede70644e Mon Sep 17 00:00:00 2001 From: miloyip Date: Wed, 16 Jul 2014 09:13:06 +0800 Subject: [PATCH 11/11] Fixes example code in encoding --- doc/encoding.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/encoding.md b/doc/encoding.md index bc5c178..4d7ca7b 100644 --- a/doc/encoding.md +++ b/doc/encoding.md @@ -130,8 +130,17 @@ const char* s = "..."; // UTF-8 string StringStream source(s); GenericStringBuffer > target; -Transcoder::Transcode, UTF16<> >(source, target) -const wchar_t* t = target.GetString(); +bool hasError = false; +while (source.Peak() != '\0') + if (!Transcoder::Transcode, UTF16<> >(source, target)) { + hasError = true; + break; + } + +if (!hasError) { + const wchar_t* t = target.GetString(); + // ... +} ~~~~~~~~~~ You may also use `AutoUTF` and the associated streams for setting source/target encoding in runtime.