Added referenced performance of UTF-8 validation
git-svn-id: https://rapidjson.googlecode.com/svn/trunk@36 c5894555-1306-4e8d-425f-1f6f381ee07c
This commit is contained in:
parent
a45bcbba7b
commit
66754fc5b0
61
test/perftest/misctest.cpp
Normal file
61
test/perftest/misctest.cpp
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#include "perftest.h"
|
||||||
|
|
||||||
|
#if TEST_MISC
|
||||||
|
|
||||||
|
class Misc : public PerfTest {
|
||||||
|
};
|
||||||
|
|
||||||
|
// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
|
||||||
|
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
|
||||||
|
|
||||||
|
#define UTF8_ACCEPT 0
|
||||||
|
#define UTF8_REJECT 12
|
||||||
|
|
||||||
|
static const unsigned char utf8d[] = {
|
||||||
|
// The first part of the table maps bytes to character classes that
|
||||||
|
// to reduce the size of the transition table and create bitmasks.
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
|
||||||
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||||
|
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
|
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
|
||||||
|
|
||||||
|
// The second part is a transition table that maps a combination
|
||||||
|
// of a state of the automaton and a character class to a state.
|
||||||
|
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
|
||||||
|
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
|
||||||
|
12,36,12,12,12,12,12,12,12,12,12,12,
|
||||||
|
};
|
||||||
|
|
||||||
|
static unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
|
||||||
|
unsigned type = utf8d[byte];
|
||||||
|
|
||||||
|
*codep = (*state != UTF8_ACCEPT) ?
|
||||||
|
(byte & 0x3fu) | (*codep << 6) :
|
||||||
|
(0xff >> type) & (byte);
|
||||||
|
|
||||||
|
*state = utf8d[256 + *state + type];
|
||||||
|
return *state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsUTF8(unsigned char* s) {
|
||||||
|
unsigned codepoint, state = 0;
|
||||||
|
|
||||||
|
while (*s)
|
||||||
|
decode(&state, &codepoint, *s++);
|
||||||
|
|
||||||
|
return state == UTF8_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Misc, Hoehrmann_IsUTF8) {
|
||||||
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
|
EXPECT_TRUE(IsUTF8((unsigned char*)json_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // TEST_ULTRAJSON
|
@ -6,6 +6,7 @@
|
|||||||
#define TEST_YAJL 0
|
#define TEST_YAJL 0
|
||||||
#define TEST_ULTRAJSON 0
|
#define TEST_ULTRAJSON 0
|
||||||
#define TEST_PLATFORM 0
|
#define TEST_PLATFORM 0
|
||||||
|
#define TEST_MISC 1
|
||||||
|
|
||||||
#if TEST_RAPIDJSON
|
#if TEST_RAPIDJSON
|
||||||
//#define RAPIDJSON_SSE2
|
//#define RAPIDJSON_SSE2
|
||||||
|
@ -233,14 +233,14 @@ TEST_F(RapidJson, SIMD_SUFFIX(Whitespace)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(RapidJson, UTF8_Validate) {
|
TEST_F(RapidJson, UTF8_Validate) {
|
||||||
StringBuffer os(0, length_ + 1);
|
NullStream os;
|
||||||
|
|
||||||
for (int i = 0; i < kTrialCount; i++) {
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
StringStream is(json_);
|
StringStream is(json_);
|
||||||
os.Clear();
|
bool result = true;
|
||||||
while (is.Peek() != '\0')
|
while (is.Peek() != '\0')
|
||||||
UTF8<>::Validate(is, os);
|
result &= UTF8<>::Validate(is, os);
|
||||||
EXPECT_EQ(length_, os.GetSize());
|
EXPECT_TRUE(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user