Added prettyauto example, which can handle UTF-8/UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE

Fixed a bug for using Reader with AutoUTFInputStream git-svn-id: https://rapidjson.googlecode.com/svn/trunk@50 c5894555-1306-4e8d-425f-1f6f381ee07c
2011-12-03 11:14:39 +00:00 · 2011-12-03 11:14:39 +00:00 · bf8fcd19c1
commit bf8fcd19c1
parent 7c914a9d4c
4 changed files with 79 additions and 18 deletions
--- a/build/premake4.lua
+++ b/build/premake4.lua
@ -157,6 +157,11 @@ solution "example"
 		files "../example/pretty/*"
 		setTargetObjDir("../bin")

+	project "prettyauto"
+		kind "ConsoleApp"
+		files "../example/prettyauto/*"
+		setTargetObjDir("../bin")
+
 	project "tutorial"
 		kind "ConsoleApp"
 		files "../example/tutorial/*"
--- a/example/pretty/pretty.cpp
+++ b/example/pretty/pretty.cpp
@ -1,4 +1,5 @@
 // JSON pretty formatting example
+// This example can only handle UTF-8. For handling other encodings, see prettyauto example.

 #include "rapidjson/reader.h"
 #include "rapidjson/prettywriter.h"
--- a/example/prettyauto/prettyauto.cpp
+++ b/example/prettyauto/prettyauto.cpp
@ -0,0 +1,55 @@
+// JSON pretty formatting example
+// This example can handle UTF-8/UTF-16LE/UTF-16BE/UTF-32LE/UTF-32BE.
+// The input firstly convert to UTF8, and then write to the original encoding with pretty formatting.
+
+#include "rapidjson/reader.h"
+#include "rapidjson/prettywriter.h"
+#include "rapidjson/filereadstream.h"
+#include "rapidjson/filewritestream.h"
+#include "rapidjson/encodedstream.h"	// NEW
+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+using namespace rapidjson;
+
+int main(int argc, char* argv[]) {
+#ifdef _WIN32
+	// Prevent Windows converting between CR+LF and LF
+	_setmode(_fileno(stdin), _O_BINARY);	// NEW
+	_setmode(_fileno(stdout), _O_BINARY);	// NEW
+#endif
+
+	// Prepare reader and input stream.
+	//Reader reader;
+	GenericReader<AutoUTF<unsigned>, UTF8<> > reader;		// CHANGED
+	char readBuffer[65536];
+	FileReadStream is(stdin, readBuffer, sizeof(readBuffer));
+	AutoUTFInputStream<unsigned, FileReadStream> eis(is);	// NEW
+
+	// Prepare writer and output stream.
+	char writeBuffer[65536];
+	FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer));
+
+#if 1
+	// Use the same Encoding of the input. Also use BOM according to input.
+	typedef AutoUTFOutputStream<unsigned, FileWriteStream> OutputStream;	// NEW
+	OutputStream eos(os, eis.GetType(), eis.HasBOM());						// NEW
+	PrettyWriter<OutputStream, UTF8<>, AutoUTF<unsigned> > writer(eos);		// CHANGED
+#else
+	// You may also use static bound encoding type, such as output to UTF-16LE with BOM
+	typedef EncodedOutputStream<UTF16LE<>,FileWriteStream> OutputStream;	// NEW
+	OutputStream eos(os, true);												// NEW
+	PrettyWriter<OutputStream, UTF8<>, UTF16LE<> > writer(eos);				// CHANGED
+#endif
+
+	// JSON reader parse from the input stream and let writer generate the output.
+	//if (!reader.Parse<kParseValidateEncodingFlag>(is, writer)) {
+	if (!reader.Parse<kParseValidateEncodingFlag>(eis, writer)) {	// CHANGED
+		fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), reader.GetParseError());
+		return 1;
+	}
+
+	return 0;
+}
--- a/include/rapidjson/encodedstream.h
+++ b/include/rapidjson/encodedstream.h
@ -83,19 +83,19 @@ public:
 		\param is input stream to be wrapped.
 		\param type UTF encoding type if it is not detected from the stream.
 	*/
-	AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(is), type_(type), hasBOM_(false) {
-		DetectType(is);
+	AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
+		DetectType();
 		static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
 		takeFunc_ = f[type_];
-		current_ = takeFunc_(is_);
+		current_ = takeFunc_(*is_);
 	}

 	UTFType GetType() const { return type_; }
 	bool HasBOM() const { return hasBOM_; }

 	Ch Peek() const { return current_; }
-	Ch Take() { Ch c = current_; current_ = takeFunc_(is_); return c; }
-	size_t Tell() const { is_.Tell(); }
+	Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
+	size_t Tell() const { return is_->Tell(); }

 	// Not implemented
 	void Put(Ch c) { RAPIDJSON_ASSERT(false); }
@ -105,7 +105,7 @@ public:

 private:
 	// Detect encoding type with BOM or RFC 4627
-	void DetectType(InputByteStream& is) {
+	void DetectType() {
 		// BOM (Byte Order Mark):
 		// 00 00 FE FF  UTF-32BE
 		// FF FE 00 00  UTF-32LE
@ -113,17 +113,17 @@ private:
 		// FF FE		UTF-16LE
 		// EF BB BF		UTF-8

-		const unsigned char* c = (const unsigned char *)is.Peek4();
+		const unsigned char* c = (const unsigned char *)is_->Peek4();
 		if (!c)
 			return;

 		unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
 		hasBOM_ = false;
-		if (bom == 0xFFFE0000)					{ type_ = kUTF32BE; is.Take(); is.Take(); is.Take(); is.Take(); hasBOM_ = true; }
-		else if (bom == 0x0000FEFF)				{ type_ = kUTF32LE;	is.Take(); is.Take(); is.Take(); is.Take();	hasBOM_ = true;	}
-		else if ((bom & 0xFFFF) == 0xFFFE)		{ type_ = kUTF16BE; is.Take(); is.Take();						hasBOM_ = true; }
-		else if ((bom & 0xFFFF) == 0xFEFF)		{ type_ = kUTF16LE; is.Take(); is.Take();						hasBOM_ = true; }
-		else if ((bom & 0xFFFFFF) == 0xBFBBEF)	{ type_ = kUTF8;	is.Take(); is.Take(); is.Take();			hasBOM_ = true; }
+		if (bom == 0xFFFE0000)					{ type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
+		else if (bom == 0x0000FEFF)				{ type_ = kUTF32LE;	hasBOM_ = true;	is_->Take(); is_->Take(); is_->Take(); is_->Take();	}
+		else if ((bom & 0xFFFF) == 0xFFFE)		{ type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();							}
+		else if ((bom & 0xFFFF) == 0xFEFF)		{ type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();							}
+		else if ((bom & 0xFFFFFF) == 0xBFBBEF)	{ type_ = kUTF8;	hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();				}

 		// RFC 4627: Section 3
 		// "Since the first two characters of a JSON text will always be ASCII
@ -161,7 +161,7 @@ private:
 	}

 	typedef Ch (*TakeFunc)(InputByteStream& is);
-	InputByteStream& is_;
+	InputByteStream* is_;
 	UTFType type_;
 	Ch current_;
 	TakeFunc takeFunc_;
@ -185,7 +185,7 @@ public:
 		\param type UTF encoding type.
 		\param putBOM Whether to write BOM at the beginning of the stream.
 	*/
-	AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(os), type_(type) {
+	AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
 		// RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
 		switch (type_) {
 		case kUTF16LE:
@ -207,8 +207,8 @@ public:

 	UTFType GetType() const { return type_; }

-	void Put(Ch c) { putFunc_(os_, c); }
-	void Flush() { os_.Flush(); } 
+	void Put(Ch c) { putFunc_(*os_, c); }
+	void Flush() { os_->Flush(); } 

 	// Not implemented
 	Ch Peek() const { RAPIDJSON_ASSERT(false); }
@ -221,12 +221,12 @@ private:
 	void PutBOM() { 
 		typedef void (*PutBOMFunc)(OutputByteStream&);
 		static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
-		f[type_](os_);
+		f[type_](*os_);
 	}

 	typedef void (*PutFunc)(OutputByteStream&, Ch);

-	OutputByteStream& os_;
+	OutputByteStream* os_;
 	UTFType type_;
 	PutFunc putFunc_;
 };