From bf8fcd19c189df01012b316b5dd7b550f513eab4 Mon Sep 17 00:00:00 2001 From: "miloyip@gmail.com" Date: Sat, 3 Dec 2011 11:14:39 +0000 Subject: [PATCH] Added prettyauto example, which can handle UTF-8/UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE Fixed a bug for using Reader with AutoUTFInputStream git-svn-id: https://rapidjson.googlecode.com/svn/trunk@50 c5894555-1306-4e8d-425f-1f6f381ee07c --- build/premake4.lua | 5 +++ example/pretty/pretty.cpp | 1 + example/prettyauto/prettyauto.cpp | 55 +++++++++++++++++++++++++++++++ include/rapidjson/encodedstream.h | 36 ++++++++++---------- 4 files changed, 79 insertions(+), 18 deletions(-) create mode 100644 example/prettyauto/prettyauto.cpp diff --git a/build/premake4.lua b/build/premake4.lua index 7bb41fe..74064fb 100644 --- a/build/premake4.lua +++ b/build/premake4.lua @@ -157,6 +157,11 @@ solution "example" files "../example/pretty/*" setTargetObjDir("../bin") + project "prettyauto" + kind "ConsoleApp" + files "../example/prettyauto/*" + setTargetObjDir("../bin") + project "tutorial" kind "ConsoleApp" files "../example/tutorial/*" diff --git a/example/pretty/pretty.cpp b/example/pretty/pretty.cpp index a48f24f..950d636 100644 --- a/example/pretty/pretty.cpp +++ b/example/pretty/pretty.cpp @@ -1,4 +1,5 @@ // JSON pretty formatting example +// This example can only handle UTF-8. For handling other encodings, see prettyauto example. #include "rapidjson/reader.h" #include "rapidjson/prettywriter.h" diff --git a/example/prettyauto/prettyauto.cpp b/example/prettyauto/prettyauto.cpp new file mode 100644 index 0000000..bb970cd --- /dev/null +++ b/example/prettyauto/prettyauto.cpp @@ -0,0 +1,55 @@ +// JSON pretty formatting example +// This example can handle UTF-8/UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE. +// The input firstly convert to UTF8, and then write to the original encoding with pretty formatting. + +#include "rapidjson/reader.h" +#include "rapidjson/prettywriter.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/encodedstream.h" // NEW +#ifdef _WIN32 +#include +#include +#endif + +using namespace rapidjson; + +int main(int argc, char* argv[]) { +#ifdef _WIN32 + // Prevent Windows converting between CR+LF and LF + _setmode(_fileno(stdin), _O_BINARY); // NEW + _setmode(_fileno(stdout), _O_BINARY); // NEW +#endif + + // Prepare reader and input stream. + //Reader reader; + GenericReader, UTF8<> > reader; // CHANGED + char readBuffer[65536]; + FileReadStream is(stdin, readBuffer, sizeof(readBuffer)); + AutoUTFInputStream eis(is); // NEW + + // Prepare writer and output stream. + char writeBuffer[65536]; + FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer)); + +#if 1 + // Use the same Encoding of the input. Also use BOM according to input. + typedef AutoUTFOutputStream OutputStream; // NEW + OutputStream eos(os, eis.GetType(), eis.HasBOM()); // NEW + PrettyWriter, AutoUTF > writer(eos); // CHANGED +#else + // You may also use static bound encoding type, such as output to UTF-16LE with BOM + typedef EncodedOutputStream,FileWriteStream> OutputStream; // NEW + OutputStream eos(os, true); // NEW + PrettyWriter, UTF16LE<> > writer(eos); // CHANGED +#endif + + // JSON reader parse from the input stream and let writer generate the output. + //if (!reader.Parse(is, writer)) { + if (!reader.Parse(eis, writer)) { // CHANGED + fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), reader.GetParseError()); + return 1; + } + + return 0; +} diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h index efa0acf..40f61de 100644 --- a/include/rapidjson/encodedstream.h +++ b/include/rapidjson/encodedstream.h @@ -83,19 +83,19 @@ public: \param is input stream to be wrapped. \param type UTF encoding type if it is not detected from the stream. */ - AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(is), type_(type), hasBOM_(false) { - DetectType(is); + AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { + DetectType(); static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; takeFunc_ = f[type_]; - current_ = takeFunc_(is_); + current_ = takeFunc_(*is_); } UTFType GetType() const { return type_; } bool HasBOM() const { return hasBOM_; } Ch Peek() const { return current_; } - Ch Take() { Ch c = current_; current_ = takeFunc_(is_); return c; } - size_t Tell() const { is_.Tell(); } + Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } + size_t Tell() const { return is_->Tell(); } // Not implemented void Put(Ch c) { RAPIDJSON_ASSERT(false); } @@ -105,7 +105,7 @@ public: private: // Detect encoding type with BOM or RFC 4627 - void DetectType(InputByteStream& is) { + void DetectType() { // BOM (Byte Order Mark): // 00 00 FE FF UTF-32BE // FF FE 00 00 UTF-32LE @@ -113,17 +113,17 @@ private: // FF FE UTF-16LE // EF BB BF UTF-8 - const unsigned char* c = (const unsigned char *)is.Peek4(); + const unsigned char* c = (const unsigned char *)is_->Peek4(); if (!c) return; unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); hasBOM_ = false; - if (bom == 0xFFFE0000) { type_ = kUTF32BE; is.Take(); is.Take(); is.Take(); is.Take(); hasBOM_ = true; } - else if (bom == 0x0000FEFF) { type_ = kUTF32LE; is.Take(); is.Take(); is.Take(); is.Take(); hasBOM_ = true; } - else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; is.Take(); is.Take(); hasBOM_ = true; } - else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; is.Take(); is.Take(); hasBOM_ = true; } - else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; is.Take(); is.Take(); is.Take(); hasBOM_ = true; } + if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } // RFC 4627: Section 3 // "Since the first two characters of a JSON text will always be ASCII @@ -161,7 +161,7 @@ private: } typedef Ch (*TakeFunc)(InputByteStream& is); - InputByteStream& is_; + InputByteStream* is_; UTFType type_; Ch current_; TakeFunc takeFunc_; @@ -185,7 +185,7 @@ public: \param type UTF encoding type. \param putBOM Whether to write BOM at the beginning of the stream. */ - AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(os), type_(type) { + AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { // RUntime check whether the size of character type is sufficient. It only perform checks with assertion. switch (type_) { case kUTF16LE: @@ -207,8 +207,8 @@ public: UTFType GetType() const { return type_; } - void Put(Ch c) { putFunc_(os_, c); } - void Flush() { os_.Flush(); } + void Put(Ch c) { putFunc_(*os_, c); } + void Flush() { os_->Flush(); } // Not implemented Ch Peek() const { RAPIDJSON_ASSERT(false); } @@ -221,12 +221,12 @@ private: void PutBOM() { typedef void (*PutBOMFunc)(OutputByteStream&); static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; - f[type_](os_); + f[type_](*os_); } typedef void (*PutFunc)(OutputByteStream&, Ch); - OutputByteStream& os_; + OutputByteStream* os_; UTFType type_; PutFunc putFunc_; };