From bce34fbea8400a54356d71e200e67593033cfd39 Mon Sep 17 00:00:00 2001 From: "miloyip@gmail.com" Date: Mon, 21 Nov 2011 06:56:47 +0000 Subject: [PATCH] Proved FileStream is very slow due to fgetc(). Added a new FileReadStream which uses fread() with buffer. git-svn-id: https://rapidjson.googlecode.com/svn/trunk@22 c5894555-1306-4e8d-425f-1f6f381ee07c --- include/rapidjson/filereadstream.h | 67 +++++++++++++++++++++++++++++ include/rapidjson/filestream.h | 8 +++- test/perftest/perftest.h | 7 +-- test/perftest/rapidjsontest.cpp | 56 ++++++++++++++++++++++++ test/unittest/filestreamtest.cpp | 69 ++++++++++++++++++++++++++++++ 5 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 include/rapidjson/filereadstream.h create mode 100644 test/unittest/filestreamtest.cpp diff --git a/include/rapidjson/filereadstream.h b/include/rapidjson/filereadstream.h new file mode 100644 index 0000000..a926fde --- /dev/null +++ b/include/rapidjson/filereadstream.h @@ -0,0 +1,67 @@ +#ifndef RAPIDJSON_FILEREADSTREAM_H_ +#define RAPIDJSON_FILEREADSTREAM_H_ + +#include "rapidjson.h" +#include + +namespace rapidjson { + +//! Wrapper of C file stream for input using fread(). +/*! + \implements Stream +*/ +class FileReadStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileReadStream(FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { + RAPIDJSON_ASSERT(fp_ != 0); + Read(); + } + + char Peek() const { return *current_; } + char Take() { char c = *current_; Read(); return c; } + size_t Tell() const { return count_ + (current_ - buffer_); } + + // Not implemented + void Put(char c) { RAPIDJSON_ASSERT(false); } + char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + void Read() { + if (current_ < bufferLast_) + ++current_; + else + FillBuffer(); + } + + void FillBuffer() { + if (!eof_) { + count_ += readCount_; + readCount_ = fread(buffer_, 1, bufferSize_, fp_); + bufferLast_ = buffer_ + readCount_ - 1; + current_ = buffer_; + + if (readCount_ < bufferSize_) { + buffer_[readCount_] = '\0'; + ++bufferLast_; + ++count_; + eof_ = true; + } + } + } + + FILE* fp_; + char *buffer_; + size_t bufferSize_; + char *bufferLast_; + char *current_; + size_t readCount_; + size_t count_; //!< Number of characters read + bool eof_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/include/rapidjson/filestream.h b/include/rapidjson/filestream.h index 24573aa..e3201c4 100644 --- a/include/rapidjson/filestream.h +++ b/include/rapidjson/filestream.h @@ -1,14 +1,16 @@ #ifndef RAPIDJSON_FILESTREAM_H_ #define RAPIDJSON_FILESTREAM_H_ +#include "rapidjson.h" #include namespace rapidjson { -//! Wrapper of C file stream for input or output. +//! (Depreciated) Wrapper of C file stream for input or output. /*! This simple wrapper does not check the validity of the stream. \implements Stream + \deprecated { This was only for basic testing in version 0.1, it is found that the performance is very low by using fgetc(). Use FileReadStream instead. } */ class FileStream { public: @@ -32,8 +34,10 @@ private: current_ = (char)c; count_++; } - else + else if (current_ != '\0') { current_ = '\0'; + count_++; + } } FILE* fp_; diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index 1790a7a..2c358ff 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -33,9 +33,9 @@ class PerfTest : public ::testing::Test { public: virtual void SetUp() { - FILE *fp = fopen("data/sample.json", "rb"); - if (!fp) - fp = fopen("../../bin/data/sample.json", "rb"); + FILE *fp = fopen(filename_ = "data/sample.json", "rb"); + if (!fp) + fp = fopen(filename_ = "../../bin/data/sample.json", "rb"); ASSERT_TRUE(fp != 0); fseek(fp, 0, SEEK_END); @@ -69,6 +69,7 @@ public: } protected: + const char* filename_; char *json_; size_t length_; char *whitespace_; diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 66eec9c..fbf22f7 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -7,6 +7,7 @@ #include "rapidjson/prettywriter.h" #include "rapidjson/stringbuffer.h" #include "rapidjson/filestream.h" +#include "rapidjson/filereadstream.h" #include #ifdef RAPIDJSON_SSE2 @@ -58,6 +59,15 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_NullHandler)) { } } +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler)) { + for (int i = 0; i < kTrialCount; i++) { + StringStream s(json_); + BaseReaderHandler<> h; + Reader reader; + reader.Parse<0>(s, h); + } +} + TEST_F(RapidJson, SIMD_SUFFIX(DoucmentParseInsitu_MemoryPoolAllocator)) { //const size_t userBufferSize = 128 * 1024; //char* userBuffer = (char*)malloc(userBufferSize); @@ -231,4 +241,50 @@ TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) { } } + +TEST_F(RapidJson, fread) { + for (int i = 0; i < kTrialCount; i++) { + FILE *fp = fopen(filename_, "rb"); + fread(temp_, 1, length_, fp); + temp_[length_] = '\0'; + for (char *p = temp_; *p; ++p) + ; + fclose(fp); + } +} + +// Depreciated. +//TEST_F(RapidJson, FileStream_Read) { +// for (int i = 0; i < kTrialCount; i++) { +// FILE *fp = fopen(filename_, "rb"); +// FileStream s(fp); +// while (s.Take() != '\0') +// ; +// fclose(fp); +// } +//} + +TEST_F(RapidJson, FileReadStream) { + for (int i = 0; i < kTrialCount; i++) { + FILE *fp = fopen(filename_, "rb"); + char buffer[65536]; + FileReadStream s(fp, buffer, sizeof(buffer)); + while (s.Take() != '\0') + ; + fclose(fp); + } +} + +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler_FileReadStream)) { + for (int i = 0; i < kTrialCount; i++) { + FILE *fp = fopen(filename_, "rb"); + char buffer[65536]; + FileReadStream s(fp, buffer, sizeof(buffer)); + BaseReaderHandler<> h; + Reader reader; + reader.Parse<0>(s, h); + fclose(fp); + } +} + #endif // TEST_RAPIDJSON diff --git a/test/unittest/filestreamtest.cpp b/test/unittest/filestreamtest.cpp new file mode 100644 index 0000000..cc19798 --- /dev/null +++ b/test/unittest/filestreamtest.cpp @@ -0,0 +1,69 @@ +#include "unittest.h" +#include "rapidjson/filestream.h" +#include "rapidjson/filereadstream.h" + +using namespace rapidjson; + +class FileStreamTest : public ::testing::Test { + virtual void SetUp() { + FILE *fp = fopen(filename_ = "data/sample.json", "rb"); + if (!fp) + fp = fopen(filename_ = "../../bin/data/sample.json", "rb"); + ASSERT_TRUE(fp != 0); + + fseek(fp, 0, SEEK_END); + length_ = (size_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + json_ = (char*)malloc(length_ + 1); + fread(json_, 1, length_, fp); + json_[length_] = '\0'; + length_++; // include the null terminator + fclose(fp); + } + + virtual void TearDown() { + free(json_); + } + +protected: + const char* filename_; + char *json_; + size_t length_; +}; + +TEST_F(FileStreamTest, Read) { + FILE *fp = fopen(filename_, "rb"); + ASSERT_TRUE(fp != 0); + FileStream s(fp); + + for (size_t i = 0; i < length_; i++) { + EXPECT_EQ(json_[i], s.Peek()); + EXPECT_EQ(json_[i], s.Peek()); // 2nd time should be the same + EXPECT_EQ(json_[i], s.Take()); + } + + EXPECT_EQ(length_, s.Tell()); + EXPECT_EQ('\0', s.Peek()); + + fclose(fp); +} + +TEST_F(FileStreamTest, BufferedRead) { + FILE *fp = fopen(filename_, "rb"); + ASSERT_TRUE(fp != 0); + char buffer[65536]; + FileReadStream s(fp, buffer, sizeof(buffer)); + + for (size_t i = 0; i < length_; i++) { + if (json_[i] != s.Peek()) + __asm int 3; + ASSERT_EQ(json_[i], s.Peek()); + ASSERT_EQ(json_[i], s.Peek()); // 2nd time should be the same + ASSERT_EQ(json_[i], s.Take()); + } + + EXPECT_EQ(length_, s.Tell()); + EXPECT_EQ('\0', s.Peek()); + + fclose(fp); +}