commit
0cf064422a
@ -9,7 +9,9 @@ set(LIB_PATCH_VERSION "2")
|
||||
set(LIB_VERSION_STRING "${LIB_MAJOR_VERSION}.${LIB_MINOR_VERSION}.${LIB_PATCH_VERSION}")
|
||||
|
||||
# compile in release with debug info mode by default
|
||||
SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Build Type")
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
|
||||
endif()
|
||||
|
||||
# Build all binaries in a separate directory
|
||||
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
150
bin/draft-04/schema
Normal file
150
bin/draft-04/schema
Normal file
@ -0,0 +1,150 @@
|
||||
{
|
||||
"id": "http://json-schema.org/draft-04/schema#",
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"description": "Core schema meta-schema",
|
||||
"definitions": {
|
||||
"schemaArray": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"items": { "$ref": "#" }
|
||||
},
|
||||
"positiveInteger": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
},
|
||||
"positiveIntegerDefault0": {
|
||||
"allOf": [ { "$ref": "#/definitions/positiveInteger" }, { "default": 0 } ]
|
||||
},
|
||||
"simpleTypes": {
|
||||
"enum": [ "array", "boolean", "integer", "null", "number", "object", "string" ]
|
||||
},
|
||||
"stringArray": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"minItems": 1,
|
||||
"uniqueItems": true
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uri"
|
||||
},
|
||||
"$schema": {
|
||||
"type": "string",
|
||||
"format": "uri"
|
||||
},
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": {},
|
||||
"multipleOf": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"exclusiveMinimum": true
|
||||
},
|
||||
"maximum": {
|
||||
"type": "number"
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"minimum": {
|
||||
"type": "number"
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"maxLength": { "$ref": "#/definitions/positiveInteger" },
|
||||
"minLength": { "$ref": "#/definitions/positiveIntegerDefault0" },
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"format": "regex"
|
||||
},
|
||||
"additionalItems": {
|
||||
"anyOf": [
|
||||
{ "type": "boolean" },
|
||||
{ "$ref": "#" }
|
||||
],
|
||||
"default": {}
|
||||
},
|
||||
"items": {
|
||||
"anyOf": [
|
||||
{ "$ref": "#" },
|
||||
{ "$ref": "#/definitions/schemaArray" }
|
||||
],
|
||||
"default": {}
|
||||
},
|
||||
"maxItems": { "$ref": "#/definitions/positiveInteger" },
|
||||
"minItems": { "$ref": "#/definitions/positiveIntegerDefault0" },
|
||||
"uniqueItems": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"maxProperties": { "$ref": "#/definitions/positiveInteger" },
|
||||
"minProperties": { "$ref": "#/definitions/positiveIntegerDefault0" },
|
||||
"required": { "$ref": "#/definitions/stringArray" },
|
||||
"additionalProperties": {
|
||||
"anyOf": [
|
||||
{ "type": "boolean" },
|
||||
{ "$ref": "#" }
|
||||
],
|
||||
"default": {}
|
||||
},
|
||||
"definitions": {
|
||||
"type": "object",
|
||||
"additionalProperties": { "$ref": "#" },
|
||||
"default": {}
|
||||
},
|
||||
"properties": {
|
||||
"type": "object",
|
||||
"additionalProperties": { "$ref": "#" },
|
||||
"default": {}
|
||||
},
|
||||
"patternProperties": {
|
||||
"type": "object",
|
||||
"additionalProperties": { "$ref": "#" },
|
||||
"default": {}
|
||||
},
|
||||
"dependencies": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"anyOf": [
|
||||
{ "$ref": "#" },
|
||||
{ "$ref": "#/definitions/stringArray" }
|
||||
]
|
||||
}
|
||||
},
|
||||
"enum": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"uniqueItems": true
|
||||
},
|
||||
"type": {
|
||||
"anyOf": [
|
||||
{ "$ref": "#/definitions/simpleTypes" },
|
||||
{
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/definitions/simpleTypes" },
|
||||
"minItems": 1,
|
||||
"uniqueItems": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"allOf": { "$ref": "#/definitions/schemaArray" },
|
||||
"anyOf": { "$ref": "#/definitions/schemaArray" },
|
||||
"oneOf": { "$ref": "#/definitions/schemaArray" },
|
||||
"not": { "$ref": "#" }
|
||||
},
|
||||
"dependencies": {
|
||||
"exclusiveMaximum": [ "maximum" ],
|
||||
"exclusiveMinimum": [ "minimum" ]
|
||||
},
|
||||
"default": {}
|
||||
}
|
1
bin/jsonschema/.gitignore
vendored
Normal file
1
bin/jsonschema/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
TODO
|
4
bin/jsonschema/.travis.yml
Normal file
4
bin/jsonschema/.travis.yml
Normal file
@ -0,0 +1,4 @@
|
||||
language: python
|
||||
python: "2.7"
|
||||
install: pip install jsonschema
|
||||
script: bin/jsonschema_suite check
|
19
bin/jsonschema/LICENSE
Normal file
19
bin/jsonschema/LICENSE
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2012 Julian Berman
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
148
bin/jsonschema/README.md
Normal file
148
bin/jsonschema/README.md
Normal file
@ -0,0 +1,148 @@
|
||||
JSON Schema Test Suite [](https://travis-ci.org/json-schema/JSON-Schema-Test-Suite)
|
||||
======================
|
||||
|
||||
This repository contains a set of JSON objects that implementors of JSON Schema
|
||||
validation libraries can use to test their validators.
|
||||
|
||||
It is meant to be language agnostic and should require only a JSON parser.
|
||||
|
||||
The conversion of the JSON objects into tests within your test framework of
|
||||
choice is still the job of the validator implementor.
|
||||
|
||||
Structure of a Test
|
||||
-------------------
|
||||
|
||||
If you're going to use this suite, you need to know how tests are laid out. The
|
||||
tests are contained in the `tests` directory at the root of this repository.
|
||||
|
||||
Inside that directory is a subdirectory for each draft or version of the
|
||||
schema. We'll use `draft3` as an example.
|
||||
|
||||
If you look inside the draft directory, there are a number of `.json` files,
|
||||
which logically group a set of test cases together. Often the grouping is by
|
||||
property under test, but not always, especially within optional test files
|
||||
(discussed below).
|
||||
|
||||
Inside each `.json` file is a single array containing objects. It's easiest to
|
||||
illustrate the structure of these with an example:
|
||||
|
||||
```json
|
||||
{
|
||||
"description": "the description of the test case",
|
||||
"schema": {"the schema that should" : "be validated against"},
|
||||
"tests": [
|
||||
{
|
||||
"description": "a specific test of a valid instance",
|
||||
"data": "the instance",
|
||||
"valid": true
|
||||
},
|
||||
{
|
||||
"description": "another specific test this time, invalid",
|
||||
"data": 15,
|
||||
"valid": false
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
So a description, a schema, and some tests, where tests is an array containing
|
||||
one or more objects with descriptions, data, and a boolean indicating whether
|
||||
they should be valid or invalid.
|
||||
|
||||
Coverage
|
||||
--------
|
||||
|
||||
Draft 3 and 4 should have full coverage. If you see anything missing or think
|
||||
there is a useful test missing, please send a pull request or open an issue.
|
||||
|
||||
Who Uses the Test Suite
|
||||
-----------------------
|
||||
|
||||
This suite is being used by:
|
||||
|
||||
### Coffeescript ###
|
||||
|
||||
* [jsck](https://github.com/pandastrike/jsck)
|
||||
|
||||
### Dart ###
|
||||
|
||||
* [json_schema](https://github.com/patefacio/json_schema)
|
||||
|
||||
### Erlang ###
|
||||
|
||||
* [jesse](https://github.com/klarna/jesse)
|
||||
|
||||
### Go ###
|
||||
|
||||
* [gojsonschema](https://github.com/sigu-399/gojsonschema)
|
||||
* [validate-json](https://github.com/cesanta/validate-json)
|
||||
|
||||
### Haskell ###
|
||||
|
||||
* [aeson-schema](https://github.com/timjb/aeson-schema)
|
||||
* [hjsonschema](https://github.com/seagreen/hjsonschema)
|
||||
|
||||
### Java ###
|
||||
|
||||
* [json-schema-validator](https://github.com/fge/json-schema-validator)
|
||||
|
||||
### JavaScript ###
|
||||
|
||||
* [json-schema-benchmark](https://github.com/Muscula/json-schema-benchmark)
|
||||
* [direct-schema](https://github.com/IreneKnapp/direct-schema)
|
||||
* [is-my-json-valid](https://github.com/mafintosh/is-my-json-valid)
|
||||
* [jassi](https://github.com/iclanzan/jassi)
|
||||
* [JaySchema](https://github.com/natesilva/jayschema)
|
||||
* [json-schema-valid](https://github.com/ericgj/json-schema-valid)
|
||||
* [Jsonary](https://github.com/jsonary-js/jsonary)
|
||||
* [jsonschema](https://github.com/tdegrunt/jsonschema)
|
||||
* [request-validator](https://github.com/bugventure/request-validator)
|
||||
* [skeemas](https://github.com/Prestaul/skeemas)
|
||||
* [tv4](https://github.com/geraintluff/tv4)
|
||||
* [z-schema](https://github.com/zaggino/z-schema)
|
||||
* [jsen](https://github.com/bugventure/jsen)
|
||||
* [ajv](https://github.com/epoberezkin/ajv)
|
||||
|
||||
### Node.js ###
|
||||
|
||||
The JSON Schema Test Suite is also available as an
|
||||
[npm](https://www.npmjs.com/package/json-schema-test-suite) package.
|
||||
Node-specific support is maintained on the [node branch](https://github.com/json-schema/JSON-Schema-Test-Suite/tree/node).
|
||||
See [NODE-README.md](https://github.com/json-schema/JSON-Schema-Test-Suite/blob/node/NODE-README.md)
|
||||
for more information.
|
||||
|
||||
### .NET ###
|
||||
|
||||
* [Newtonsoft.Json.Schema](https://github.com/JamesNK/Newtonsoft.Json.Schema)
|
||||
|
||||
### PHP ###
|
||||
|
||||
* [json-schema](https://github.com/justinrainbow/json-schema)
|
||||
|
||||
### Python ###
|
||||
|
||||
* [jsonschema](https://github.com/Julian/jsonschema)
|
||||
|
||||
### Ruby ###
|
||||
|
||||
* [json-schema](https://github.com/hoxworth/json-schema)
|
||||
|
||||
### Rust ###
|
||||
|
||||
* [valico](https://github.com/rustless/valico)
|
||||
|
||||
### Swift ###
|
||||
|
||||
* [JSONSchema](https://github.com/kylef/JSONSchema.swift)
|
||||
|
||||
If you use it as well, please fork and send a pull request adding yourself to
|
||||
the list :).
|
||||
|
||||
Contributing
|
||||
------------
|
||||
|
||||
If you see something missing or incorrect, a pull request is most welcome!
|
||||
|
||||
There are some sanity checks in place for testing the test suite. You can run
|
||||
them with `bin/jsonschema_suite check` or `tox`. They will be run automatically by
|
||||
[Travis CI](https://travis-ci.org/) as well.
|
283
bin/jsonschema/bin/jsonschema_suite
Executable file
283
bin/jsonschema/bin/jsonschema_suite
Executable file
@ -0,0 +1,283 @@
|
||||
#! /usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
try:
|
||||
import argparse
|
||||
except ImportError:
|
||||
print(textwrap.dedent("""
|
||||
The argparse library could not be imported. jsonschema_suite requires
|
||||
either Python 2.7 or for you to install argparse. You can do so by
|
||||
running `pip install argparse`, `easy_install argparse` or by
|
||||
downloading argparse and running `python2.6 setup.py install`.
|
||||
|
||||
See https://pypi.python.org/pypi/argparse for details.
|
||||
""".strip("\n")))
|
||||
sys.exit(1)
|
||||
|
||||
import errno
|
||||
import fnmatch
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
if getattr(unittest, "skipIf", None) is None:
|
||||
unittest.skipIf = lambda cond, msg : lambda fn : fn
|
||||
|
||||
try:
|
||||
import jsonschema
|
||||
except ImportError:
|
||||
jsonschema = None
|
||||
else:
|
||||
validators = getattr(
|
||||
jsonschema.validators, "validators", jsonschema.validators
|
||||
)
|
||||
|
||||
|
||||
ROOT_DIR = os.path.join(
|
||||
os.path.dirname(__file__), os.pardir).rstrip("__pycache__")
|
||||
SUITE_ROOT_DIR = os.path.join(ROOT_DIR, "tests")
|
||||
|
||||
REMOTES = {
|
||||
"integer.json": {"type": "integer"},
|
||||
"subSchemas.json": {
|
||||
"integer": {"type": "integer"},
|
||||
"refToInteger": {"$ref": "#/integer"},
|
||||
},
|
||||
"folder/folderInteger.json": {"type": "integer"}
|
||||
}
|
||||
REMOTES_DIR = os.path.join(ROOT_DIR, "remotes")
|
||||
|
||||
TESTSUITE_SCHEMA = {
|
||||
"$schema": "http://json-schema.org/draft-03/schema#",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {"type": "string", "required": True},
|
||||
"schema": {"required": True},
|
||||
"tests": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {"type": "string", "required": True},
|
||||
"data": {"required": True},
|
||||
"valid": {"type": "boolean", "required": True}
|
||||
},
|
||||
"additionalProperties": False
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"additionalProperties": False,
|
||||
"minItems": 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def files(paths):
|
||||
for path in paths:
|
||||
with open(path) as test_file:
|
||||
yield json.load(test_file)
|
||||
|
||||
|
||||
def groups(paths):
|
||||
for test_file in files(paths):
|
||||
for group in test_file:
|
||||
yield group
|
||||
|
||||
|
||||
def cases(paths):
|
||||
for test_group in groups(paths):
|
||||
for test in test_group["tests"]:
|
||||
test["schema"] = test_group["schema"]
|
||||
yield test
|
||||
|
||||
|
||||
def collect(root_dir):
|
||||
for root, dirs, files in os.walk(root_dir):
|
||||
for filename in fnmatch.filter(files, "*.json"):
|
||||
yield os.path.join(root, filename)
|
||||
|
||||
|
||||
class SanityTests(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
print("Looking for tests in %s" % SUITE_ROOT_DIR)
|
||||
cls.test_files = list(collect(SUITE_ROOT_DIR))
|
||||
print("Found %s test files" % len(cls.test_files))
|
||||
assert cls.test_files, "Didn't find the test files!"
|
||||
|
||||
def test_all_files_are_valid_json(self):
|
||||
for path in self.test_files:
|
||||
with open(path) as test_file:
|
||||
try:
|
||||
json.load(test_file)
|
||||
except ValueError as error:
|
||||
self.fail("%s contains invalid JSON (%s)" % (path, error))
|
||||
|
||||
def test_all_descriptions_have_reasonable_length(self):
|
||||
for case in cases(self.test_files):
|
||||
descript = case["description"]
|
||||
self.assertLess(
|
||||
len(descript),
|
||||
60,
|
||||
"%r is too long! (keep it to less than 60 chars)" % (descript,)
|
||||
)
|
||||
|
||||
def test_all_descriptions_are_unique(self):
|
||||
for group in groups(self.test_files):
|
||||
descriptions = set(test["description"] for test in group["tests"])
|
||||
self.assertEqual(
|
||||
len(descriptions),
|
||||
len(group["tests"]),
|
||||
"%r contains a duplicate description" % (group,)
|
||||
)
|
||||
|
||||
@unittest.skipIf(jsonschema is None, "Validation library not present!")
|
||||
def test_all_schemas_are_valid(self):
|
||||
for schema in os.listdir(SUITE_ROOT_DIR):
|
||||
schema_validator = validators.get(schema)
|
||||
if schema_validator is not None:
|
||||
test_files = collect(os.path.join(SUITE_ROOT_DIR, schema))
|
||||
for case in cases(test_files):
|
||||
try:
|
||||
schema_validator.check_schema(case["schema"])
|
||||
except jsonschema.SchemaError as error:
|
||||
self.fail("%s contains an invalid schema (%s)" %
|
||||
(case, error))
|
||||
else:
|
||||
warnings.warn("No schema validator for %s" % schema)
|
||||
|
||||
@unittest.skipIf(jsonschema is None, "Validation library not present!")
|
||||
def test_suites_are_valid(self):
|
||||
validator = jsonschema.Draft3Validator(TESTSUITE_SCHEMA)
|
||||
for tests in files(self.test_files):
|
||||
try:
|
||||
validator.validate(tests)
|
||||
except jsonschema.ValidationError as error:
|
||||
self.fail(str(error))
|
||||
|
||||
def test_remote_schemas_are_updated(self):
|
||||
for url, schema in REMOTES.items():
|
||||
filepath = os.path.join(REMOTES_DIR, url)
|
||||
with open(filepath) as schema_file:
|
||||
self.assertEqual(json.load(schema_file), schema)
|
||||
|
||||
|
||||
def main(arguments):
|
||||
if arguments.command == "check":
|
||||
suite = unittest.TestLoader().loadTestsFromTestCase(SanityTests)
|
||||
result = unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
sys.exit(not result.wasSuccessful())
|
||||
elif arguments.command == "flatten":
|
||||
selected_cases = [case for case in cases(collect(arguments.version))]
|
||||
|
||||
if arguments.randomize:
|
||||
random.shuffle(selected_cases)
|
||||
|
||||
json.dump(selected_cases, sys.stdout, indent=4, sort_keys=True)
|
||||
elif arguments.command == "remotes":
|
||||
json.dump(REMOTES, sys.stdout, indent=4, sort_keys=True)
|
||||
elif arguments.command == "dump_remotes":
|
||||
if arguments.update:
|
||||
shutil.rmtree(arguments.out_dir, ignore_errors=True)
|
||||
|
||||
try:
|
||||
os.makedirs(arguments.out_dir)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
print("%s already exists. Aborting." % arguments.out_dir)
|
||||
sys.exit(1)
|
||||
raise
|
||||
|
||||
for url, schema in REMOTES.items():
|
||||
filepath = os.path.join(arguments.out_dir, url)
|
||||
|
||||
try:
|
||||
os.makedirs(os.path.dirname(filepath))
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
with open(filepath, "wb") as out_file:
|
||||
json.dump(schema, out_file, indent=4, sort_keys=True)
|
||||
elif arguments.command == "serve":
|
||||
try:
|
||||
from flask import Flask, jsonify
|
||||
except ImportError:
|
||||
print(textwrap.dedent("""
|
||||
The Flask library is required to serve the remote schemas.
|
||||
|
||||
You can install it by running `pip install Flask`.
|
||||
|
||||
Alternatively, see the `jsonschema_suite remotes` or
|
||||
`jsonschema_suite dump_remotes` commands to create static files
|
||||
that can be served with your own web server.
|
||||
""".strip("\n")))
|
||||
sys.exit(1)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/<path:path>")
|
||||
def serve_path(path):
|
||||
if path in REMOTES:
|
||||
return jsonify(REMOTES[path])
|
||||
return "Document does not exist.", 404
|
||||
|
||||
app.run(port=1234)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="JSON Schema Test Suite utilities",
|
||||
)
|
||||
subparsers = parser.add_subparsers(help="utility commands", dest="command")
|
||||
|
||||
check = subparsers.add_parser("check", help="Sanity check the test suite.")
|
||||
|
||||
flatten = subparsers.add_parser(
|
||||
"flatten",
|
||||
help="Output a flattened file containing a selected version's test cases."
|
||||
)
|
||||
flatten.add_argument(
|
||||
"--randomize",
|
||||
action="store_true",
|
||||
help="Randomize the order of the outputted cases.",
|
||||
)
|
||||
flatten.add_argument(
|
||||
"version", help="The directory containing the version to output",
|
||||
)
|
||||
|
||||
remotes = subparsers.add_parser(
|
||||
"remotes",
|
||||
help="Output the expected URLs and their associated schemas for remote "
|
||||
"ref tests as a JSON object."
|
||||
)
|
||||
|
||||
dump_remotes = subparsers.add_parser(
|
||||
"dump_remotes", help="Dump the remote ref schemas into a file tree",
|
||||
)
|
||||
dump_remotes.add_argument(
|
||||
"--update",
|
||||
action="store_true",
|
||||
help="Update the remotes in an existing directory.",
|
||||
)
|
||||
dump_remotes.add_argument(
|
||||
"--out-dir",
|
||||
default=REMOTES_DIR,
|
||||
type=os.path.abspath,
|
||||
help="The output directory to create as the root of the file tree",
|
||||
)
|
||||
|
||||
serve = subparsers.add_parser(
|
||||
"serve",
|
||||
help="Start a webserver to serve schemas used by remote ref tests."
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(parser.parse_args())
|
BIN
bin/jsonschema/remotes/.DS_Store
vendored
Normal file
BIN
bin/jsonschema/remotes/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
bin/jsonschema/remotes/folder/folderInteger.json
Normal file
BIN
bin/jsonschema/remotes/folder/folderInteger.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/remotes/integer.json
Normal file
BIN
bin/jsonschema/remotes/integer.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/remotes/subSchemas.json
Normal file
BIN
bin/jsonschema/remotes/subSchemas.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/.DS_Store
vendored
Normal file
BIN
bin/jsonschema/tests/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/additionalItems.json
Normal file
BIN
bin/jsonschema/tests/draft3/additionalItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/additionalProperties.json
Normal file
BIN
bin/jsonschema/tests/draft3/additionalProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/default.json
Normal file
BIN
bin/jsonschema/tests/draft3/default.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/dependencies.json
Normal file
BIN
bin/jsonschema/tests/draft3/dependencies.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/disallow.json
Normal file
BIN
bin/jsonschema/tests/draft3/disallow.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/divisibleBy.json
Normal file
BIN
bin/jsonschema/tests/draft3/divisibleBy.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/enum.json
Normal file
BIN
bin/jsonschema/tests/draft3/enum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/extends.json
Normal file
BIN
bin/jsonschema/tests/draft3/extends.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/items.json
Normal file
BIN
bin/jsonschema/tests/draft3/items.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/maxItems.json
Normal file
BIN
bin/jsonschema/tests/draft3/maxItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/maxLength.json
Normal file
BIN
bin/jsonschema/tests/draft3/maxLength.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/maximum.json
Normal file
BIN
bin/jsonschema/tests/draft3/maximum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/minItems.json
Normal file
BIN
bin/jsonschema/tests/draft3/minItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/minLength.json
Normal file
BIN
bin/jsonschema/tests/draft3/minLength.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/minimum.json
Normal file
BIN
bin/jsonschema/tests/draft3/minimum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/optional/bignum.json
Normal file
BIN
bin/jsonschema/tests/draft3/optional/bignum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/optional/format.json
Normal file
BIN
bin/jsonschema/tests/draft3/optional/format.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/optional/jsregex.json
Normal file
BIN
bin/jsonschema/tests/draft3/optional/jsregex.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/optional/zeroTerminatedFloats.json
Normal file
BIN
bin/jsonschema/tests/draft3/optional/zeroTerminatedFloats.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/pattern.json
Normal file
BIN
bin/jsonschema/tests/draft3/pattern.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/patternProperties.json
Normal file
BIN
bin/jsonschema/tests/draft3/patternProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/properties.json
Normal file
BIN
bin/jsonschema/tests/draft3/properties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/ref.json
Normal file
BIN
bin/jsonschema/tests/draft3/ref.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/refRemote.json
Normal file
BIN
bin/jsonschema/tests/draft3/refRemote.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/required.json
Normal file
BIN
bin/jsonschema/tests/draft3/required.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/type.json
Normal file
BIN
bin/jsonschema/tests/draft3/type.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft3/uniqueItems.json
Normal file
BIN
bin/jsonschema/tests/draft3/uniqueItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/.DS_Store
vendored
Normal file
BIN
bin/jsonschema/tests/draft4/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/additionalItems.json
Normal file
BIN
bin/jsonschema/tests/draft4/additionalItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/additionalProperties.json
Normal file
BIN
bin/jsonschema/tests/draft4/additionalProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/allOf.json
Normal file
BIN
bin/jsonschema/tests/draft4/allOf.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/anyOf.json
Normal file
BIN
bin/jsonschema/tests/draft4/anyOf.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/default.json
Normal file
BIN
bin/jsonschema/tests/draft4/default.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/definitions.json
Normal file
BIN
bin/jsonschema/tests/draft4/definitions.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/dependencies.json
Normal file
BIN
bin/jsonschema/tests/draft4/dependencies.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/enum.json
Normal file
BIN
bin/jsonschema/tests/draft4/enum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/items.json
Normal file
BIN
bin/jsonschema/tests/draft4/items.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/maxItems.json
Normal file
BIN
bin/jsonschema/tests/draft4/maxItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/maxLength.json
Normal file
BIN
bin/jsonschema/tests/draft4/maxLength.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/maxProperties.json
Normal file
BIN
bin/jsonschema/tests/draft4/maxProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/maximum.json
Normal file
BIN
bin/jsonschema/tests/draft4/maximum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/minItems.json
Normal file
BIN
bin/jsonschema/tests/draft4/minItems.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/minLength.json
Normal file
BIN
bin/jsonschema/tests/draft4/minLength.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/minProperties.json
Normal file
BIN
bin/jsonschema/tests/draft4/minProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/minimum.json
Normal file
BIN
bin/jsonschema/tests/draft4/minimum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/multipleOf.json
Normal file
BIN
bin/jsonschema/tests/draft4/multipleOf.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/not.json
Normal file
BIN
bin/jsonschema/tests/draft4/not.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/oneOf.json
Normal file
BIN
bin/jsonschema/tests/draft4/oneOf.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/optional/bignum.json
Normal file
BIN
bin/jsonschema/tests/draft4/optional/bignum.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/optional/format.json
Normal file
BIN
bin/jsonschema/tests/draft4/optional/format.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/optional/zeroTerminatedFloats.json
Normal file
BIN
bin/jsonschema/tests/draft4/optional/zeroTerminatedFloats.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/pattern.json
Normal file
BIN
bin/jsonschema/tests/draft4/pattern.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/patternProperties.json
Normal file
BIN
bin/jsonschema/tests/draft4/patternProperties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/properties.json
Normal file
BIN
bin/jsonschema/tests/draft4/properties.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/ref.json
Normal file
BIN
bin/jsonschema/tests/draft4/ref.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/refRemote.json
Normal file
BIN
bin/jsonschema/tests/draft4/refRemote.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/required.json
Normal file
BIN
bin/jsonschema/tests/draft4/required.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/type.json
Normal file
BIN
bin/jsonschema/tests/draft4/type.json
Normal file
Binary file not shown.
BIN
bin/jsonschema/tests/draft4/uniqueItems.json
Normal file
BIN
bin/jsonschema/tests/draft4/uniqueItems.json
Normal file
Binary file not shown.
8
bin/jsonschema/tox.ini
Normal file
8
bin/jsonschema/tox.ini
Normal file
@ -0,0 +1,8 @@
|
||||
[tox]
|
||||
minversion = 1.6
|
||||
envlist = py27
|
||||
skipsdist = True
|
||||
|
||||
[testenv]
|
||||
deps = jsonschema
|
||||
commands = {envpython} bin/jsonschema_suite check
|
@ -774,6 +774,7 @@ INPUT = readme.md \
|
||||
doc/encoding.md \
|
||||
doc/dom.md \
|
||||
doc/sax.md \
|
||||
doc/schema.md \
|
||||
doc/performance.md \
|
||||
doc/internals.md \
|
||||
doc/faq.md
|
||||
|
@ -774,6 +774,7 @@ INPUT = readme.zh-cn.md \
|
||||
doc/encoding.zh-cn.md \
|
||||
doc/dom.zh-cn.md \
|
||||
doc/sax.zh-cn.md \
|
||||
doc/schema.zh-cn.md \
|
||||
doc/performance.zh-cn.md \
|
||||
doc/internals.md \
|
||||
doc/faq.zh-cn.md
|
||||
|
237
doc/schema.md
Normal file
237
doc/schema.md
Normal file
@ -0,0 +1,237 @@
|
||||
# Schema
|
||||
|
||||
## Status: experimental, shall be included in v1.1
|
||||
|
||||
JSON Schema is a draft standard for describing format of JSON. The schema itself is also a JSON. By validating a JSON with JSON Schema, your code can safely access the DOM without manually checking types, or whether a key exists, etc. It can also ensure that the serialized JSON conform to a specified schema.
|
||||
|
||||
RapidJSON implemented a JSON Schema validator for [JSON Schema Draft v4](http://json-schema.org/documentation.html). If you do not familiar with JSON Schema, you may refer to [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/).
|
||||
|
||||
[TOC]
|
||||
|
||||
## Basic Usage
|
||||
|
||||
First of all, you need to parse a JSON Schema into `Document`, and then compile the `Document` into `SchemaDocument`.
|
||||
|
||||
Secondly, construct a `SchemaValidator` with the `SchedmaDocument`. It is similar to a `Writer` in the sense of handling SAX events. So, you can use `document.Accept(validator)` to validate a document, and then check the validity.
|
||||
|
||||
~~~cpp
|
||||
#include "rapidjson/schema.h"
|
||||
|
||||
// ...
|
||||
|
||||
Document sd;
|
||||
if (!sd.Parse(schemaJson)) {
|
||||
// the schema is not a valid JSON.
|
||||
// ...
|
||||
}
|
||||
SchemaDocument schema(sd); // Compile a Document to SchemaDocument
|
||||
// sd is no longer needed here.
|
||||
|
||||
Document d;
|
||||
if (!d.Parse(inputJson)) {
|
||||
// the input is not a valid JSON.
|
||||
// ...
|
||||
}
|
||||
|
||||
SchemaValidator validator(schema);
|
||||
if (!d.Accept(validator)) {
|
||||
// Input JSON is invalid according to the schema
|
||||
// Output diagnostic information
|
||||
StringBuffer sb;
|
||||
validator.GetInvalidSchemaPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid schema: %s\n", sb.GetString());
|
||||
printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword());
|
||||
sb.Clear();
|
||||
validator.GetInvalidDocumentPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid document: %s\n", sb.GetString());
|
||||
}
|
||||
~~~
|
||||
|
||||
Some notes:
|
||||
|
||||
* One `SchemaDocment` can be referenced by multiple `SchemaValidator`s. It will not be modified by `SchemaValidator`s.
|
||||
* A `SchemaValidator` may be reused to validate multiple documents. To run it for other documents, call `validator.Reset()` first.
|
||||
|
||||
## Validation during parsing/serialization
|
||||
|
||||
Differ to most JSON Schema validator implementations, RapidJSON provides a SAX-based schema validator. Therefore, you can parse a JSON from a stream while validating it on the fly. If the validator encounters a JSON value that invalidates the supplied schema, the parsing will be terminated immediately. This design is especially useful for parsing large JSON files.
|
||||
|
||||
### DOM parsing
|
||||
|
||||
For using DOM in parsing, `Document` needs some preparation and finalizing tasks, in addition to receiving SAX events, thus it needs some work to route the reader, validator and the document. `SchemaValidatingReader` is a helper class that doing such work.
|
||||
|
||||
~~~cpp
|
||||
#include "rapidjson/filereadstream.h"
|
||||
|
||||
// ...
|
||||
SchemaDocument schema(sd); // Compile a Document to SchemaDocument
|
||||
|
||||
// Use reader to parse the JSON
|
||||
FILE* fp = fopen("big.json", "r");
|
||||
FileReadStream is(fp, buffer, sizeof(buffer));
|
||||
|
||||
// Parse JSON from reader, validate the SAX events, and store in d.
|
||||
Document d;
|
||||
SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema);
|
||||
d.Populate(reader);
|
||||
|
||||
if (!reader.GetParseResult()) {
|
||||
// Not a valid JSON
|
||||
// When reader.GetParseResult().Code() == kParseErrorTermination,
|
||||
// it may be terminated by:
|
||||
// (1) the validator found that the JSON is invalid according to schema; or
|
||||
// (2) the input stream has I/O error.
|
||||
|
||||
// Check the validation result
|
||||
if (!reader.IsValid()) {
|
||||
// Input JSON is invalid according to the schema
|
||||
// Output diagnostic information
|
||||
StringBuffer sb;
|
||||
reader.GetInvalidSchemaPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid schema: %s\n", sb.GetString());
|
||||
printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword());
|
||||
sb.Clear();
|
||||
reader.GetInvalidDocumentPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid document: %s\n", sb.GetString());
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
### SAX parsing
|
||||
|
||||
For using SAX in parsing, it is much simpler. If it only need to validate the JSON without further processing, it is simply:
|
||||
|
||||
~~~
|
||||
SchemaValidator validator(schema);
|
||||
Reader reader;
|
||||
if (!reader.Parse(stream, validator)) {
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
This is exactly the method used in [schemavalidator](example/schemavalidator/schemavalidator.cpp) example. The distinct advantage is low memory usage, no matter how big the JSON was (the memory usage depends on the complexity of the schema).
|
||||
|
||||
If you need to handle the SAX events further, then you need to use the template class `GenericSchemaValidator` to set the output handler of the validator:
|
||||
|
||||
~~~
|
||||
MyHandler handler;
|
||||
GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler);
|
||||
Reader reader;
|
||||
if (!reader.Parse(ss, validator)) {
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
### Serialization
|
||||
|
||||
It is also possible to do validation during serializing. This can ensure the result JSON is valid according to the JSON schema.
|
||||
|
||||
~~~
|
||||
StringBuffer sb;
|
||||
Writer<StringBuffer> writer(sb);
|
||||
GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer);
|
||||
if (!d.Accept(validator)) {
|
||||
// Some problem during Accept(), it may be validation or encoding issues.
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
Of course, if your application only needs SAX-style serialization, it can simply send SAX events to `SchemaValidator` instead of `Writer`.
|
||||
|
||||
## Remote Schema
|
||||
|
||||
JSON Schema supports [`$ref` keyword](http://spacetelescope.github.io/understanding-json-schema/structuring.html), which is a [JSON pointer](pointer.md) referencing to a local or remote schema. Local pointer is prefixed with `#`, while remote pointer is an relative or absolute URI. For example:
|
||||
|
||||
~~~js
|
||||
{ "$ref": "definitions.json#/address" }
|
||||
~~~
|
||||
|
||||
As `SchemaValidator` does not know how to resolve such URI, it needs a user-provided `IRemoteSchemaDocumentProvider` instance to do so.
|
||||
|
||||
~~~
|
||||
class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider {
|
||||
public:
|
||||
virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeTyp length) {
|
||||
// Resolve the uri and returns a pointer to that schema.
|
||||
}
|
||||
};
|
||||
|
||||
// ...
|
||||
|
||||
MyRemoteSchemaDocumentProvider provider;
|
||||
SchemaValidator validator(schema, &provider);
|
||||
~~~
|
||||
|
||||
## Conformance
|
||||
|
||||
RapidJSON passed 262 out of 263 tests in [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4).
|
||||
|
||||
The failed test is "changed scope ref invalid" of "change resolution scope" in `refRemote.json`. It is due to that `id` schema keyword and URI combining function are not implemented.
|
||||
|
||||
Besides, the `format` schema keyword for string values is ignored, since it is not required by the specification.
|
||||
|
||||
### Regular Expression
|
||||
|
||||
The schema keyword `pattern` and `patternProperties` uses regular expression to match the required pattern.
|
||||
|
||||
RapidJSON implemented a simple NFA regular expression engine, which is used by default. It supports the following syntax.
|
||||
|
||||
|Syntax|Description|
|
||||
|------|-----------|
|
||||
|`ab` | Concatenation
|
||||
|`a|b` | Alternation
|
||||
|`a?` | Zero or one
|
||||
|`a*` | Zero or more
|
||||
|`a+` | One or more
|
||||
|`a{3}` | Exactly 3 times
|
||||
|`a{3,}` | At least 3 times
|
||||
|`a{3,5}`| 3 to 5 times
|
||||
|`(ab)` | Grouping
|
||||
|`^a` | At the beginning
|
||||
|`a$` | At the end
|
||||
|`.` | Any character
|
||||
|`[abc]` | Character classes
|
||||
|`[a-c]` | Character class range
|
||||
|`[a-z0-9_]` | Character class combination
|
||||
|`[^abc]` | Negated character classes
|
||||
|`[^a-c]` | Negated character class range
|
||||
|`[\b]` | Backspace (U+0008)
|
||||
|`\|`, `\\`, ... | Escape characters
|
||||
|`\f` | Form feed (U+000C)
|
||||
|`\n` | Line feed (U+000A)
|
||||
|`\r` | Carriage return (U+000D)
|
||||
|`\t` | Tab (U+0009)
|
||||
|`\v` | Vertical tab (U+000B)
|
||||
|
||||
For C++11 compiler, it is also possible to use the `std::regex` by defining `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` and `RAPIDJSON_SCHEMA_USE_STDREGEX=1`. If your schemas do not need `pattern` and `patternProperties`, you can set both macros to zero to disable this feature, which will reduce some code size.
|
||||
|
||||
## Performance
|
||||
|
||||
Most C++ JSON libraries have not yet supporting JSON Schema. So we tried to evaluate the performance of RapidJSON's JSON Schema validator according to [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark), which tests 11 JavaScript libraries running on Node.js.
|
||||
|
||||
That benchmark runs validations on [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite), in which some test suites and tests are excluded. We made the same benchmarking procedure in [`schematest.cpp`](test/perftest/schematest.cpp).
|
||||
|
||||
On a Mac Book Pro (2.8 GHz Intel Core i7), the following results are collected.
|
||||
|
||||
|Validator|Relative speed|Number of test runs per second|
|
||||
|---------|:------------:|:----------------------------:|
|
||||
|RapidJSON|36521%|7220217|
|
||||
|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)|
|
||||
|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)|
|
||||
|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)|
|
||||
|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)|
|
||||
|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)|
|
||||
|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)|
|
||||
|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)|
|
||||
|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)|
|
||||
|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)|
|
||||
|tv4|0.5%|93 (± 0.94%)|
|
||||
|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)|
|
||||
|
||||
That is, RapidJSON is about ~365 times faster than the fastest JavaScript library (ajv). And ~344 thousand times faster than the slowest one.
|
237
doc/schema.zh-cn.md
Normal file
237
doc/schema.zh-cn.md
Normal file
@ -0,0 +1,237 @@
|
||||
# Schema
|
||||
|
||||
## 状态: 实验性,应该会合进 v1.1
|
||||
|
||||
JSON Schema 是描述 JSON 格式的一个标准草案。一个 schema 本身也是一个 JSON。使用 JSON Schema 去校验 JSON,可以让你的代码安全地访问 DOM,而无须检查类型或键值是否存在等。这也能确保输出的 JSON 是符合指定的 schema。
|
||||
|
||||
RapidJSON 实现了一个 [JSON Schema Draft v4](http://json-schema.org/documentation.html) 的校验器。若你不熟悉 JSON Schema,可以参考 [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/)。
|
||||
|
||||
[TOC]
|
||||
|
||||
## 基本用法
|
||||
|
||||
首先,你要把 JSON Schema 解析成 `Document`,再把它编译成一个 `SchemaDocument`。
|
||||
|
||||
然后,利用该 `SchemaDocument` 创建一个 `SchemaValidator`。它与 `Writer` 相似,都是能够处理 SAX 事件的。因此,你可以用 `document.Accept(validator)` 去校验一个 JSON,然后再获取校验结果。
|
||||
|
||||
~~~cpp
|
||||
#include "rapidjson/schema.h"
|
||||
|
||||
// ...
|
||||
|
||||
Document sd;
|
||||
if (!sd.Parse(schemaJson)) {
|
||||
// the schema is not a valid JSON.
|
||||
// ...
|
||||
}
|
||||
SchemaDocument schema(sd); // Compile a Document to SchemaDocument
|
||||
// sd is no longer needed here.
|
||||
|
||||
Document d;
|
||||
if (!d.Parse(inputJson)) {
|
||||
// the input is not a valid JSON.
|
||||
// ...
|
||||
}
|
||||
|
||||
SchemaValidator validator(schema);
|
||||
if (!d.Accept(validator)) {
|
||||
// Input JSON is invalid according to the schema
|
||||
// Output diagnostic information
|
||||
StringBuffer sb;
|
||||
validator.GetInvalidSchemaPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid schema: %s\n", sb.GetString());
|
||||
printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword());
|
||||
sb.Clear();
|
||||
validator.GetInvalidDocumentPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid document: %s\n", sb.GetString());
|
||||
}
|
||||
~~~
|
||||
|
||||
一些注意点:
|
||||
|
||||
* 一个 `SchemaDocment` 能被多个 `SchemaValidator` 吊用。它不会被 `SchemaValidator` 修改。
|
||||
* 一个 `SchemaValidator` 可以重复使用来校验多个文件。在校验其他文件前,先调用 `validator.Reset()`。
|
||||
|
||||
## 在解析/生成时进行校验
|
||||
|
||||
与大部分 JSON Schema 校验器有所不同,RapidJSON 提供了一个基于 SAX 的 schema 校验器实现。因此,你可以在输入流解析 JSON 的同时进行校验。若校验器遇到一个与 schema 不符的值,就会立即终止解析。这设计对于解析大型 JSON 文件时特别有用。
|
||||
|
||||
### DOM 解析
|
||||
|
||||
在使用 DOM 进行解析时,`Document` 除了接收 SAX 事件外,还需做一些准备及结束工作,因此,为了连接 `Reader`、`SchemaValidator` 和 `Document` 要做多一点事情。`SchemaValidatingReader` 是一个辅助类去做那些工作。
|
||||
|
||||
~~~cpp
|
||||
#include "rapidjson/filereadstream.h"
|
||||
|
||||
// ...
|
||||
SchemaDocument schema(sd); // Compile a Document to SchemaDocument
|
||||
|
||||
// Use reader to parse the JSON
|
||||
FILE* fp = fopen("big.json", "r");
|
||||
FileReadStream is(fp, buffer, sizeof(buffer));
|
||||
|
||||
// Parse JSON from reader, validate the SAX events, and store in d.
|
||||
Document d;
|
||||
SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema);
|
||||
d.Populate(reader);
|
||||
|
||||
if (!reader.GetParseResult()) {
|
||||
// Not a valid JSON
|
||||
// When reader.GetParseResult().Code() == kParseErrorTermination,
|
||||
// it may be terminated by:
|
||||
// (1) the validator found that the JSON is invalid according to schema; or
|
||||
// (2) the input stream has I/O error.
|
||||
|
||||
// Check the validation result
|
||||
if (!reader.IsValid()) {
|
||||
// Input JSON is invalid according to the schema
|
||||
// Output diagnostic information
|
||||
StringBuffer sb;
|
||||
reader.GetInvalidSchemaPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid schema: %s\n", sb.GetString());
|
||||
printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword());
|
||||
sb.Clear();
|
||||
reader.GetInvalidDocumentPointer().StringifyUriFragment(sb);
|
||||
printf("Invalid document: %s\n", sb.GetString());
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
### SAX 解析
|
||||
|
||||
使用 SAX 解析时,情况就简单得多。若只需要校验 JSON 而无需进一步处理,那么仅需要:
|
||||
|
||||
~~~
|
||||
SchemaValidator validator(schema);
|
||||
Reader reader;
|
||||
if (!reader.Parse(stream, validator)) {
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
这种方式和 [schemavalidator](example/schemavalidator/schemavalidator.cpp) 例子完全相同。这带来的独特优势是,无论 JSON 多巨大,永远维持低内存用量(内存用量只与 Schema 的复杂度相关)。
|
||||
|
||||
若你需要进一步处理 SAX 事件,便可使用模板类 `GenericSchemaValidator` 去设置校验器的输出 `Handler`:
|
||||
|
||||
~~~
|
||||
MyHandler handler;
|
||||
GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler);
|
||||
Reader reader;
|
||||
if (!reader.Parse(ss, validator)) {
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
### 生成
|
||||
|
||||
我们也可以在生成(serialization)的时候进行校验。这能确保输出的 JSON 符合一个 JSON Schema。
|
||||
|
||||
~~~
|
||||
StringBuffer sb;
|
||||
Writer<StringBuffer> writer(sb);
|
||||
GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer);
|
||||
if (!d.Accept(validator)) {
|
||||
// Some problem during Accept(), it may be validation or encoding issues.
|
||||
if (!validator.IsValid()) {
|
||||
// ...
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
当然,如果你的应用仅需要 SAX 风格的生成,那么只需要把 SAX 事件由原来发送到 `Writer`,改为发送到 `SchemaValidator`。
|
||||
|
||||
## 远程 Schema
|
||||
|
||||
JSON Schema 支持 [`$ref` 关键字](http://spacetelescope.github.io/understanding-json-schema/structuring.html),它是一个[JSON pointer](pointer.md) 引用至一个本地(local)或远程(remote) schema。本地指针的首字符是 `#`,而远程指针是一个相对或绝对 URI。例如:
|
||||
|
||||
~~~js
|
||||
{ "$ref": "definitions.json#/address" }
|
||||
~~~
|
||||
|
||||
由于 `SchemaValidator` 并不知道如何处理那些 URI,它需要使用者提供一个 `IRemoteSchemaDocumentProvider` 的实例去处理。
|
||||
|
||||
~~~
|
||||
class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider {
|
||||
public:
|
||||
virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeTyp length) {
|
||||
// Resolve the uri and returns a pointer to that schema.
|
||||
}
|
||||
};
|
||||
|
||||
// ...
|
||||
|
||||
MyRemoteSchemaDocumentProvider provider;
|
||||
SchemaValidator validator(schema, &provider);
|
||||
~~~
|
||||
|
||||
## 标准的符合程度
|
||||
|
||||
RapidJSON 通过了 [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4) 中 263 个测试的 262 个。
|
||||
|
||||
没通过的测试是 `refRemote.json` 中的 "change resolution scope" - "changed scope ref invalid"。这是由于未实现 `id` schema 关键字及 URI 合并功能。
|
||||
|
||||
除此以外,关于字符串类型的 `format` schema 关键字也会被忽略,因为标准中并没需求必须实现。
|
||||
|
||||
### 正则表达式
|
||||
|
||||
`pattern` 及 `patternProperties` 这两个 schema 关键字使用了正则表达式去匹配所需的模式。
|
||||
|
||||
RapidJSON 实现了一个简单的 NFA 正则表达式引擎,并预设使用。它支持以下语法。
|
||||
|
||||
|语法|描述|
|
||||
|------|-----------|
|
||||
|`ab` | 串联
|
||||
|`a|b` | 交替
|
||||
|`a?` | 零或一次
|
||||
|`a*` | 零或多次
|
||||
|`a+` | 一或多次
|
||||
|`a{3}` | 刚好 3 次
|
||||
|`a{3,}` | 至少 3 次
|
||||
|`a{3,5}`| 3 至 5 次
|
||||
|`(ab)` | 分组
|
||||
|`^a` | 在开始处
|
||||
|`a$` | 在结束处
|
||||
|`.` | 任何字符
|
||||
|`[abc]` | 字符组
|
||||
|`[a-c]` | 字符组范围
|
||||
|`[a-z0-9_]` | 字符组组合
|
||||
|`[^abc]` | 字符组取反
|
||||
|`[^a-c]` | 字符组范围取反
|
||||
|`[\b]` | 退格符 (U+0008)
|
||||
|`\|`, `\\`, ... | 转义字符
|
||||
|`\f` | 馈页 (U+000C)
|
||||
|`\n` | 馈行 (U+000A)
|
||||
|`\r` | 回车 (U+000D)
|
||||
|`\t` | 制表 (U+0009)
|
||||
|`\v` | 垂直制表 (U+000B)
|
||||
|
||||
对于使用 C++11 编译器的使用者,也可使用 `std::regex`,只需定义 `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` 及 `RAPIDJSON_SCHEMA_USE_STDREGEX=1`。若你的 schema 无需使用 `pattern` 或 `patternProperties`,可以把两个宏都设为零,以禁用此功能,这样做可节省一些代码体积。
|
||||
|
||||
## 性能
|
||||
|
||||
大部分 C++ JSON 库都未支持 JSON Schema。因此我们尝试按照 [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark) 去评估 RapidJSON 的 JSON Schema 校验器。该评测测试了 11 个运行在 node.js 上的 JavaScript 库。
|
||||
|
||||
该评测校验 [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) 中的测试,当中排除了一些测试套件及个别测试。我们在 [`schematest.cpp`](test/perftest/schematest.cpp) 实现了相同的评测。
|
||||
|
||||
在 MacBook Pro (2.8 GHz Intel Core i7) 上收集到以下结果。
|
||||
|
||||
|校验器|相对速度|每秒执行的测试数目|
|
||||
|---------|:------------:|:----------------------------:|
|
||||
|RapidJSON|36521%|7220217|
|
||||
|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)|
|
||||
|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)|
|
||||
|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)|
|
||||
|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)|
|
||||
|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)|
|
||||
|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)|
|
||||
|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)|
|
||||
|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)|
|
||||
|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)|
|
||||
|tv4|0.5%|93 (± 0.94%)|
|
||||
|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)|
|
||||
|
||||
换言之,RapidJSON 比最快的 JavaScript 库(ajv)快约 365 倍。比最慢的快 34 万倍。
|
@ -9,6 +9,7 @@ set(EXAMPLES
|
||||
messagereader
|
||||
pretty
|
||||
prettyauto
|
||||
schemavalidator
|
||||
serialize
|
||||
simpledom
|
||||
simplereader
|
||||
|
72
example/schemavalidator/schemavalidator.cpp
Normal file
72
example/schemavalidator/schemavalidator.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
// Schema Validator example
|
||||
|
||||
// The example validates JSON text from stdin with a JSON schema specified in the argument.
|
||||
|
||||
#include "rapidjson/error/en.h"
|
||||
#include "rapidjson/filereadstream.h"
|
||||
#include "rapidjson/schema.h"
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
|
||||
using namespace rapidjson;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: schemavalidator schema.json < input.json\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// Read a JSON schema from file into Document
|
||||
Document d;
|
||||
char buffer[4096];
|
||||
|
||||
{
|
||||
FILE *fp = fopen(argv[1], "r");
|
||||
if (!fp) {
|
||||
printf("Schema file '%s' not found\n", argv[1]);
|
||||
return -1;
|
||||
}
|
||||
FileReadStream fs(fp, buffer, sizeof(buffer));
|
||||
d.ParseStream(fs);
|
||||
if (d.HasParseError()) {
|
||||
fprintf(stderr, "Schema file '%s' is not a valid JSON\n", argv[1]);
|
||||
fprintf(stderr, "Error(offset %u): %s\n",
|
||||
static_cast<unsigned>(d.GetErrorOffset()),
|
||||
GetParseError_En(d.GetParseError()));
|
||||
fclose(fp);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
// Then convert the Document into SchemaDocument
|
||||
SchemaDocument sd(d);
|
||||
|
||||
// Use reader to parse the JSON in stdin, and forward SAX events to validator
|
||||
SchemaValidator validator(sd);
|
||||
Reader reader;
|
||||
FileReadStream is(stdin, buffer, sizeof(buffer));
|
||||
if (!reader.Parse(is, validator) && reader.GetParseErrorCode() != kParseErrorTermination) {
|
||||
// Schema validator error would cause kParseErrorTermination, which will handle it in next step.
|
||||
fprintf(stderr, "Input is not a valid JSON\n");
|
||||
fprintf(stderr, "Error(offset %u): %s\n",
|
||||
static_cast<unsigned>(reader.GetErrorOffset()),
|
||||
GetParseError_En(reader.GetParseErrorCode()));
|
||||
}
|
||||
|
||||
// Check the validation result
|
||||
if (validator.IsValid()) {
|
||||
printf("Input JSON is valid.\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
else {
|
||||
printf("Input JSON is invalid.\n");
|
||||
StringBuffer sb;
|
||||
validator.GetInvalidSchemaPointer().StringifyUriFragment(sb);
|
||||
fprintf(stderr, "Invalid schema: %s\n", sb.GetString());
|
||||
fprintf(stderr, "Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword());
|
||||
sb.Clear();
|
||||
validator.GetInvalidDocumentPointer().StringifyUriFragment(sb);
|
||||
fprintf(stderr, "Invalid document: %s\n", sb.GetString());
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
@ -1535,22 +1535,22 @@ public:
|
||||
case kTrueType: return handler.Bool(true);
|
||||
|
||||
case kObjectType:
|
||||
if (!handler.StartObject())
|
||||
if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
|
||||
return false;
|
||||
for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
|
||||
RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator.
|
||||
if (!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.flags_ & kCopyFlag) != 0))
|
||||
if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.flags_ & kCopyFlag) != 0)))
|
||||
return false;
|
||||
if (!m->value.Accept(handler))
|
||||
if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
|
||||
return false;
|
||||
}
|
||||
return handler.EndObject(data_.o.size);
|
||||
|
||||
case kArrayType:
|
||||
if (!handler.StartArray())
|
||||
if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
|
||||
return false;
|
||||
for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v)
|
||||
if (!v->Accept(handler))
|
||||
for (const GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v)
|
||||
if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
|
||||
return false;
|
||||
return handler.EndArray(data_.a.size);
|
||||
|
||||
@ -1559,11 +1559,11 @@ public:
|
||||
|
||||
default:
|
||||
RAPIDJSON_ASSERT(GetType() == kNumberType);
|
||||
if (IsInt()) return handler.Int(data_.n.i.i);
|
||||
if (IsDouble()) return handler.Double(data_.n.d);
|
||||
else if (IsInt()) return handler.Int(data_.n.i.i);
|
||||
else if (IsUint()) return handler.Uint(data_.n.u.u);
|
||||
else if (IsInt64()) return handler.Int64(data_.n.i64);
|
||||
else if (IsUint64()) return handler.Uint64(data_.n.u64);
|
||||
else return handler.Double(data_.n.d);
|
||||
else return handler.Uint64(data_.n.u64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1869,6 +1869,21 @@ public:
|
||||
*/
|
||||
friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
|
||||
|
||||
//! Populate this document by a generator which produces SAX events.
|
||||
/*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype.
|
||||
\param g Generator functor which sends SAX events to the parameter.
|
||||
\return The document itself for fluent API.
|
||||
*/
|
||||
template <typename Generator>
|
||||
GenericDocument& Populate(Generator& g) {
|
||||
ClearStackOnExit scope(*this);
|
||||
if (g(*this)) {
|
||||
RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
|
||||
ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
//!@name Parse from stream
|
||||
//!@{
|
||||
|
||||
@ -2017,9 +2032,10 @@ private:
|
||||
};
|
||||
|
||||
// callers of the following private Handler functions
|
||||
template <typename,typename,typename> friend class GenericReader; // for parsing
|
||||
// template <typename,typename,typename> friend class GenericReader; // for parsing
|
||||
template <typename, typename> friend class GenericValue; // for deep copying
|
||||
|
||||
public:
|
||||
// Implementation of Handler
|
||||
bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; }
|
||||
bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; }
|
||||
|
677
include/rapidjson/internal/regex.h
Normal file
677
include/rapidjson/internal/regex.h
Normal file
@ -0,0 +1,677 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_INTERNAL_REGEX_H_
|
||||
#define RAPIDJSON_INTERNAL_REGEX_H_
|
||||
|
||||
#include "../rapidjson.h"
|
||||
#include "stack.h"
|
||||
|
||||
#ifdef __clang__
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(padded)
|
||||
RAPIDJSON_DIAG_OFF(switch-enum)
|
||||
RAPIDJSON_DIAG_OFF(implicit-fallthrough)
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(effc++)
|
||||
#endif
|
||||
|
||||
#ifndef RAPIDJSON_REGEX_VERBOSE
|
||||
#define RAPIDJSON_REGEX_VERBOSE 0
|
||||
#endif
|
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN
|
||||
namespace internal {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// GenericRegex
|
||||
|
||||
static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
|
||||
static const SizeType kRegexInvalidRange = ~SizeType(0);
|
||||
|
||||
//! Regular expression engine with subset of ECMAscript grammar.
|
||||
/*!
|
||||
Supported regular expression syntax:
|
||||
- \c ab Concatenation
|
||||
- \c a|b Alternation
|
||||
- \c a? Zero or one
|
||||
- \c a* Zero or more
|
||||
- \c a+ One or more
|
||||
- \c a{3} Exactly 3 times
|
||||
- \c a{3,} At least 3 times
|
||||
- \c a{3,5} 3 to 5 times
|
||||
- \c (ab) Grouping
|
||||
- \c ^a At the beginning
|
||||
- \c a$ At the end
|
||||
- \c . Any character
|
||||
- \c [abc] Character classes
|
||||
- \c [a-c] Character class range
|
||||
- \c [a-z0-9_] Character class combination
|
||||
- \c [^abc] Negated character classes
|
||||
- \c [^a-c] Negated character class range
|
||||
- \c [\b] Backspace (U+0008)
|
||||
- \c \\| \\\\ ... Escape characters
|
||||
- \c \\f Form feed (U+000C)
|
||||
- \c \\n Line feed (U+000A)
|
||||
- \c \\r Carriage return (U+000D)
|
||||
- \c \\t Tab (U+0009)
|
||||
- \c \\v Vertical tab (U+000B)
|
||||
|
||||
\note This is a Thompson NFA engine, implemented with reference to
|
||||
Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
|
||||
https://swtch.com/~rsc/regexp/regexp1.html
|
||||
*/
|
||||
template <typename Encoding, typename Allocator = CrtAllocator>
|
||||
class GenericRegex {
|
||||
public:
|
||||
typedef typename Encoding::Ch Ch;
|
||||
|
||||
GenericRegex(const Ch* source, Allocator* allocator = 0) :
|
||||
states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
|
||||
stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_()
|
||||
{
|
||||
GenericStringStream<Encoding> ss(source);
|
||||
DecodedStream<GenericStringStream<Encoding> > ds(ss);
|
||||
Parse(ds);
|
||||
}
|
||||
|
||||
~GenericRegex() {
|
||||
Allocator::Free(stateSet_);
|
||||
}
|
||||
|
||||
bool IsValid() const {
|
||||
return root_ != kRegexInvalidState;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool Match(InputStream& is) const {
|
||||
return SearchWithAnchoring(is, true, true);
|
||||
}
|
||||
|
||||
bool Match(const Ch* s) const {
|
||||
GenericStringStream<Encoding> is(s);
|
||||
return Match(is);
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool Search(InputStream& is) const {
|
||||
return SearchWithAnchoring(is, anchorBegin_, anchorEnd_);
|
||||
}
|
||||
|
||||
bool Search(const Ch* s) const {
|
||||
GenericStringStream<Encoding> is(s);
|
||||
return Search(is);
|
||||
}
|
||||
|
||||
private:
|
||||
enum Operator {
|
||||
kZeroOrOne,
|
||||
kZeroOrMore,
|
||||
kOneOrMore,
|
||||
kConcatenation,
|
||||
kAlternation,
|
||||
kLeftParenthesis
|
||||
};
|
||||
|
||||
static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
|
||||
static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
|
||||
static const unsigned kRangeNegationFlag = 0x80000000;
|
||||
|
||||
struct Range {
|
||||
unsigned start; //
|
||||
unsigned end;
|
||||
SizeType next;
|
||||
};
|
||||
|
||||
struct State {
|
||||
SizeType out; //!< Equals to kInvalid for matching state
|
||||
SizeType out1; //!< Equals to non-kInvalid for split
|
||||
SizeType rangeStart;
|
||||
unsigned codepoint;
|
||||
};
|
||||
|
||||
struct Frag {
|
||||
Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
|
||||
SizeType start;
|
||||
SizeType out; //!< link-list of all output states
|
||||
SizeType minIndex;
|
||||
};
|
||||
|
||||
template <typename SourceStream>
|
||||
class DecodedStream {
|
||||
public:
|
||||
DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
|
||||
unsigned Peek() { return codepoint_; }
|
||||
unsigned Take() {
|
||||
unsigned c = codepoint_;
|
||||
if (c) // No further decoding when '\0'
|
||||
Decode();
|
||||
return c;
|
||||
}
|
||||
|
||||
private:
|
||||
void Decode() {
|
||||
if (!Encoding::Decode(ss_, &codepoint_))
|
||||
codepoint_ = 0;
|
||||
}
|
||||
|
||||
SourceStream& ss_;
|
||||
unsigned codepoint_;
|
||||
};
|
||||
|
||||
State& GetState(SizeType index) {
|
||||
RAPIDJSON_ASSERT(index < stateCount_);
|
||||
return states_.template Bottom<State>()[index];
|
||||
}
|
||||
|
||||
const State& GetState(SizeType index) const {
|
||||
RAPIDJSON_ASSERT(index < stateCount_);
|
||||
return states_.template Bottom<State>()[index];
|
||||
}
|
||||
|
||||
Range& GetRange(SizeType index) {
|
||||
RAPIDJSON_ASSERT(index < rangeCount_);
|
||||
return ranges_.template Bottom<Range>()[index];
|
||||
}
|
||||
|
||||
const Range& GetRange(SizeType index) const {
|
||||
RAPIDJSON_ASSERT(index < rangeCount_);
|
||||
return ranges_.template Bottom<Range>()[index];
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
void Parse(DecodedStream<InputStream>& ds) {
|
||||
Allocator allocator;
|
||||
Stack<Allocator> operandStack(&allocator, 256); // Frag
|
||||
Stack<Allocator> operatorStack(&allocator, 256); // Operator
|
||||
Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
|
||||
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
|
||||
unsigned codepoint;
|
||||
while (ds.Peek() != 0) {
|
||||
switch (codepoint = ds.Take()) {
|
||||
case '^':
|
||||
anchorBegin_ = true;
|
||||
break;
|
||||
|
||||
case '$':
|
||||
anchorEnd_ = true;
|
||||
break;
|
||||
|
||||
case '|':
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
*operatorStack.template Push<Operator>() = kAlternation;
|
||||
*atomCountStack.template Top<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
*operatorStack.template Push<Operator>() = kLeftParenthesis;
|
||||
*atomCountStack.template Push<unsigned>() = 0;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
if (operatorStack.Empty())
|
||||
return;
|
||||
operatorStack.template Pop<Operator>(1);
|
||||
atomCountStack.template Pop<unsigned>(1);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
if (!Eval(operandStack, kZeroOrOne))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
if (!Eval(operandStack, kZeroOrMore))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '+':
|
||||
if (!Eval(operandStack, kOneOrMore))
|
||||
return;
|
||||
break;
|
||||
|
||||
case '{':
|
||||
{
|
||||
unsigned n, m;
|
||||
if (!ParseUnsigned(ds, &n) || n == 0)
|
||||
return;
|
||||
|
||||
if (ds.Peek() == ',') {
|
||||
ds.Take();
|
||||
if (ds.Peek() == '}')
|
||||
m = 0;
|
||||
else if (!ParseUnsigned(ds, &m) || m < n)
|
||||
return;
|
||||
}
|
||||
else
|
||||
m = n;
|
||||
|
||||
if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
|
||||
return;
|
||||
ds.Take();
|
||||
}
|
||||
break;
|
||||
|
||||
case '.':
|
||||
PushOperand(operandStack, kAnyCharacterClass);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '[':
|
||||
{
|
||||
SizeType range;
|
||||
if (!ParseRange(ds, &range))
|
||||
return;
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
|
||||
GetState(s).rangeStart = range;
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
||||
}
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
break;
|
||||
|
||||
case '\\': // Escape character
|
||||
if (!CharacterEscape(ds, &codepoint))
|
||||
return; // Unsupported escape character
|
||||
// fall through to default
|
||||
|
||||
default: // Pattern character
|
||||
PushOperand(operandStack, codepoint);
|
||||
ImplicitConcatenation(atomCountStack, operatorStack);
|
||||
}
|
||||
}
|
||||
|
||||
while (!operatorStack.Empty())
|
||||
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
||||
return;
|
||||
|
||||
// Link the operand to matching state.
|
||||
if (operandStack.GetSize() == sizeof(Frag)) {
|
||||
Frag* e = operandStack.template Pop<Frag>(1);
|
||||
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
||||
root_ = e->start;
|
||||
|
||||
#if RAPIDJSON_REGEX_VERBOSE
|
||||
printf("root: %d\n", root_);
|
||||
for (SizeType i = 0; i < stateCount_ ; i++) {
|
||||
State& s = GetState(i);
|
||||
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Preallocate buffer for SearchWithAnchoring()
|
||||
RAPIDJSON_ASSERT(stateSet_ == 0);
|
||||
if (stateCount_ > 0) {
|
||||
stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize()));
|
||||
state0_.template Reserve<SizeType>(stateCount_);
|
||||
state1_.template Reserve<SizeType>(stateCount_);
|
||||
}
|
||||
}
|
||||
|
||||
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
|
||||
State* s = states_.template Push<State>();
|
||||
s->out = out;
|
||||
s->out1 = out1;
|
||||
s->codepoint = codepoint;
|
||||
s->rangeStart = kRegexInvalidRange;
|
||||
return stateCount_++;
|
||||
}
|
||||
|
||||
void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
|
||||
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
||||
}
|
||||
|
||||
void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
|
||||
if (*atomCountStack.template Top<unsigned>())
|
||||
*operatorStack.template Push<Operator>() = kConcatenation;
|
||||
(*atomCountStack.template Top<unsigned>())++;
|
||||
}
|
||||
|
||||
SizeType Append(SizeType l1, SizeType l2) {
|
||||
SizeType old = l1;
|
||||
while (GetState(l1).out != kRegexInvalidState)
|
||||
l1 = GetState(l1).out;
|
||||
GetState(l1).out = l2;
|
||||
return old;
|
||||
}
|
||||
|
||||
void Patch(SizeType l, SizeType s) {
|
||||
for (SizeType next; l != kRegexInvalidState; l = next) {
|
||||
next = GetState(l).out;
|
||||
GetState(l).out = s;
|
||||
}
|
||||
}
|
||||
|
||||
bool Eval(Stack<Allocator>& operandStack, Operator op) {
|
||||
switch (op) {
|
||||
case kConcatenation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
Patch(e1.out, e2.start);
|
||||
*operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kAlternation:
|
||||
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
||||
Frag e2 = *operandStack.template Pop<Frag>(1);
|
||||
Frag e1 = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(e1.start, e2.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrOne:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kZeroOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case kOneOrMore:
|
||||
if (operandStack.GetSize() >= sizeof(Frag)) {
|
||||
Frag e = *operandStack.template Pop<Frag>(1);
|
||||
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
||||
Patch(e.out, s);
|
||||
*operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
|
||||
RAPIDJSON_ASSERT(n > 0);
|
||||
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
|
||||
if (operandStack.GetSize() < sizeof(Frag))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
|
||||
CloneTopOperand(operandStack);
|
||||
|
||||
if (m == 0)
|
||||
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
|
||||
else if (m > n) {
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a
|
||||
Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
|
||||
for (unsigned i = n; i < m - 1; i++)
|
||||
CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
|
||||
for (unsigned i = n; i < m; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n - 1; i++)
|
||||
Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
|
||||
|
||||
void CloneTopOperand(Stack<Allocator>& operandStack) {
|
||||
const Frag *src = operandStack.template Top<Frag>();
|
||||
SizeType count = stateCount_ - src->minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
|
||||
State* s = states_.template Push<State>(count);
|
||||
memcpy(s, &GetState(src->minIndex), count * sizeof(State));
|
||||
for (SizeType j = 0; j < count; j++) {
|
||||
if (s[j].out != kRegexInvalidState)
|
||||
s[j].out += count;
|
||||
if (s[j].out1 != kRegexInvalidState)
|
||||
s[j].out1 += count;
|
||||
}
|
||||
*operandStack.template Push<Frag>() = Frag(src->start + count, src->out + count, src->minIndex + count);
|
||||
stateCount_ += count;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
|
||||
unsigned r = 0;
|
||||
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
|
||||
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
|
||||
return false; // overflow
|
||||
r = r * 10 + (ds.Take() - '0');
|
||||
}
|
||||
*u = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool ParseRange(DecodedStream<InputStream>& ds, SizeType* range) {
|
||||
bool isBegin = true;
|
||||
bool negate = false;
|
||||
int step = 0;
|
||||
SizeType start = kRegexInvalidRange;
|
||||
SizeType current = kRegexInvalidRange;
|
||||
unsigned codepoint;
|
||||
while ((codepoint = ds.Take()) != 0) {
|
||||
if (isBegin) {
|
||||
isBegin = false;
|
||||
if (codepoint == '^') {
|
||||
negate = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
switch (codepoint) {
|
||||
case ']':
|
||||
if (start == kRegexInvalidRange)
|
||||
return false; // Error: nothing inside []
|
||||
if (step == 2) { // Add trailing '-'
|
||||
SizeType r = NewRange('-');
|
||||
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
||||
GetRange(current).next = r;
|
||||
}
|
||||
if (negate)
|
||||
GetRange(start).start |= kRangeNegationFlag;
|
||||
*range = start;
|
||||
return true;
|
||||
|
||||
case '\\':
|
||||
if (ds.Peek() == 'b') {
|
||||
ds.Take();
|
||||
codepoint = 0x0008; // Escape backspace character
|
||||
}
|
||||
else if (!CharacterEscape(ds, &codepoint))
|
||||
return false;
|
||||
// fall through to default
|
||||
|
||||
default:
|
||||
switch (step) {
|
||||
case 1:
|
||||
if (codepoint == '-') {
|
||||
step++;
|
||||
break;
|
||||
}
|
||||
// fall through to step 0 for other characters
|
||||
|
||||
case 0:
|
||||
{
|
||||
SizeType r = NewRange(codepoint);
|
||||
if (current != kRegexInvalidRange)
|
||||
GetRange(current).next = r;
|
||||
if (start == kRegexInvalidRange)
|
||||
start = r;
|
||||
current = r;
|
||||
}
|
||||
step = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
RAPIDJSON_ASSERT(step == 2);
|
||||
GetRange(current).end = codepoint;
|
||||
step = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SizeType NewRange(unsigned codepoint) {
|
||||
Range* r = ranges_.template Push<Range>();
|
||||
r->start = r->end = codepoint;
|
||||
r->next = kRegexInvalidRange;
|
||||
return rangeCount_++;
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool CharacterEscape(DecodedStream<InputStream>& ds, unsigned* escapedCodepoint) {
|
||||
unsigned codepoint;
|
||||
switch (codepoint = ds.Take()) {
|
||||
case '^':
|
||||
case '$':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '.':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '\\':
|
||||
*escapedCodepoint = codepoint; return true;
|
||||
case 'f': *escapedCodepoint = 0x000C; return true;
|
||||
case 'n': *escapedCodepoint = 0x000A; return true;
|
||||
case 'r': *escapedCodepoint = 0x000D; return true;
|
||||
case 't': *escapedCodepoint = 0x0009; return true;
|
||||
case 'v': *escapedCodepoint = 0x000B; return true;
|
||||
default:
|
||||
return false; // Unsupported escape character
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InputStream>
|
||||
bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const {
|
||||
RAPIDJSON_ASSERT(IsValid());
|
||||
DecodedStream<InputStream> ds(is);
|
||||
|
||||
state0_.Clear();
|
||||
Stack<Allocator> *current = &state0_, *next = &state1_;
|
||||
const size_t stateSetSize = GetStateSetSize();
|
||||
std::memset(stateSet_, 0, stateSetSize);
|
||||
|
||||
bool matched = AddState(*current, root_);
|
||||
unsigned codepoint;
|
||||
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
|
||||
std::memset(stateSet_, 0, stateSetSize);
|
||||
next->Clear();
|
||||
matched = false;
|
||||
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
||||
const State& sr = GetState(*s);
|
||||
if (sr.codepoint == codepoint ||
|
||||
sr.codepoint == kAnyCharacterClass ||
|
||||
(sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
|
||||
{
|
||||
matched = AddState(*next, sr.out) || matched;
|
||||
if (!anchorEnd && matched)
|
||||
return true;
|
||||
}
|
||||
if (!anchorBegin)
|
||||
AddState(*next, root_);
|
||||
}
|
||||
internal::Swap(current, next);
|
||||
}
|
||||
|
||||
return matched;
|
||||
}
|
||||
|
||||
size_t GetStateSetSize() const {
|
||||
return (stateCount_ + 31) / 32 * 4;
|
||||
}
|
||||
|
||||
// Return whether the added states is a match state
|
||||
bool AddState(Stack<Allocator>& l, SizeType index) const {
|
||||
if (index == kRegexInvalidState)
|
||||
return true;
|
||||
|
||||
const State& s = GetState(index);
|
||||
if (s.out1 != kRegexInvalidState) { // Split
|
||||
bool matched = AddState(l, s.out);
|
||||
return AddState(l, s.out1) || matched;
|
||||
}
|
||||
else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
|
||||
stateSet_[index >> 5] |= (1 << (index & 31));
|
||||
*l.template PushUnsafe<SizeType>() = index;
|
||||
}
|
||||
return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
|
||||
}
|
||||
|
||||
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
|
||||
bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0;
|
||||
while (rangeIndex != kRegexInvalidRange) {
|
||||
const Range& r = GetRange(rangeIndex);
|
||||
if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end)
|
||||
return yes;
|
||||
rangeIndex = r.next;
|
||||
}
|
||||
return !yes;
|
||||
}
|
||||
|
||||
Stack<Allocator> states_;
|
||||
Stack<Allocator> ranges_;
|
||||
SizeType root_;
|
||||
SizeType stateCount_;
|
||||
SizeType rangeCount_;
|
||||
|
||||
// For SearchWithAnchoring()
|
||||
uint32_t* stateSet_; // allocated by states_.GetAllocator()
|
||||
mutable Stack<Allocator> state0_;
|
||||
mutable Stack<Allocator> state1_;
|
||||
bool anchorBegin_;
|
||||
bool anchorEnd_;
|
||||
};
|
||||
|
||||
typedef GenericRegex<UTF8<> > Regex;
|
||||
|
||||
} // namespace internal
|
||||
RAPIDJSON_NAMESPACE_END
|
||||
|
||||
#ifdef __clang__
|
||||
RAPIDJSON_DIAG_POP
|
||||
#endif
|
||||
|
||||
#endif // RAPIDJSON_INTERNAL_REGEX_H_
|
@ -38,7 +38,6 @@ public:
|
||||
// Optimization note: Do not allocate memory for stack_ in constructor.
|
||||
// Do it lazily when first Push() -> Expand() -> Resize().
|
||||
Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
|
||||
RAPIDJSON_ASSERT(stackCapacity > 0);
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
|
||||
@ -146,9 +145,24 @@ public:
|
||||
return reinterpret_cast<T*>(stackTop_ - sizeof(T));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
const T* Top() const {
|
||||
RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
|
||||
return reinterpret_cast<T*>(stackTop_ - sizeof(T));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T* End() { return reinterpret_cast<T*>(stackTop_); }
|
||||
|
||||
template<typename T>
|
||||
const T* End() const { return reinterpret_cast<T*>(stackTop_); }
|
||||
|
||||
template<typename T>
|
||||
T* Bottom() { return reinterpret_cast<T*>(stack_); }
|
||||
|
||||
template<typename T>
|
||||
const T* Bottom() const { return reinterpret_cast<T*>(stack_); }
|
||||
|
||||
bool HasAllocator() const {
|
||||
return allocator_ != 0;
|
||||
}
|
||||
@ -157,6 +171,7 @@ public:
|
||||
RAPIDJSON_ASSERT(allocator_);
|
||||
return *allocator_;
|
||||
}
|
||||
|
||||
bool Empty() const { return stackTop_ == stack_; }
|
||||
size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); }
|
||||
size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); }
|
||||
|
@ -33,6 +33,22 @@ inline SizeType StrLen(const Ch* s) {
|
||||
return SizeType(p - s);
|
||||
}
|
||||
|
||||
//! Returns number of code points in a encoded string.
|
||||
template<typename Encoding>
|
||||
bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
|
||||
GenericStringStream<Encoding> is(s);
|
||||
const typename Encoding::Ch* end = s + length;
|
||||
SizeType count = 0;
|
||||
while (is.src_ < end) {
|
||||
unsigned codepoint;
|
||||
if (!Encoding::Decode(is, &codepoint))
|
||||
return false;
|
||||
count++;
|
||||
}
|
||||
*outCount = count;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
RAPIDJSON_NAMESPACE_END
|
||||
|
||||
|
@ -101,7 +101,7 @@ public:
|
||||
//@{
|
||||
|
||||
//! Default constructor.
|
||||
GenericPointer() : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
|
||||
GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
|
||||
|
||||
//! Constructor that parses a string or URI fragment representation.
|
||||
/*!
|
||||
@ -160,7 +160,7 @@ public:
|
||||
GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
|
||||
|
||||
//! Copy constructor.
|
||||
GenericPointer(const GenericPointer& rhs) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
|
||||
GenericPointer(const GenericPointer& rhs, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
|
||||
*this = rhs;
|
||||
}
|
||||
|
||||
@ -305,6 +305,9 @@ public:
|
||||
|
||||
//@}
|
||||
|
||||
//! Get the allocator of this pointer.
|
||||
Allocator& GetAllocator() { return *allocator_; }
|
||||
|
||||
//!@name Tokens
|
||||
//@{
|
||||
|
||||
|
1979
include/rapidjson/schema.h
Normal file
1979
include/rapidjson/schema.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,8 @@ set(PERFTEST_SOURCES
|
||||
misctest.cpp
|
||||
perftest.cpp
|
||||
platformtest.cpp
|
||||
rapidjsontest.cpp)
|
||||
rapidjsontest.cpp
|
||||
schematest.cpp)
|
||||
|
||||
add_executable(perftest ${PERFTEST_SOURCES})
|
||||
target_link_libraries(perftest ${TEST_LIBRARIES})
|
||||
|
213
test/perftest/schematest.cpp
Normal file
213
test/perftest/schematest.cpp
Normal file
@ -0,0 +1,213 @@
|
||||
#include "perftest.h"
|
||||
|
||||
#if TEST_RAPIDJSON
|
||||
|
||||
#include "rapidjson/schema.h"
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
|
||||
|
||||
using namespace rapidjson;
|
||||
|
||||
template <typename Allocator>
|
||||
static char* ReadFile(const char* filename, Allocator& allocator) {
|
||||
const char *paths[] = {
|
||||
"",
|
||||
"bin/",
|
||||
"../bin/",
|
||||
"../../bin/",
|
||||
"../../../bin/"
|
||||
};
|
||||
char buffer[1024];
|
||||
FILE *fp = 0;
|
||||
for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); i++) {
|
||||
sprintf(buffer, "%s%s", paths[i], filename);
|
||||
fp = fopen(buffer, "rb");
|
||||
if (fp)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!fp)
|
||||
return 0;
|
||||
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size_t length = static_cast<size_t>(ftell(fp));
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
char* json = reinterpret_cast<char*>(allocator.Malloc(length + 1));
|
||||
size_t readLength = fread(json, 1, length, fp);
|
||||
json[readLength] = '\0';
|
||||
fclose(fp);
|
||||
return json;
|
||||
}
|
||||
|
||||
class Schema : public PerfTest {
|
||||
public:
|
||||
Schema() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
PerfTest::SetUp();
|
||||
|
||||
const char* filenames[] = {
|
||||
"additionalItems.json",
|
||||
"additionalProperties.json",
|
||||
"allOf.json",
|
||||
"anyOf.json",
|
||||
"default.json",
|
||||
"definitions.json",
|
||||
"dependencies.json",
|
||||
"enum.json",
|
||||
"items.json",
|
||||
"maximum.json",
|
||||
"maxItems.json",
|
||||
"maxLength.json",
|
||||
"maxProperties.json",
|
||||
"minimum.json",
|
||||
"minItems.json",
|
||||
"minLength.json",
|
||||
"minProperties.json",
|
||||
"multipleOf.json",
|
||||
"not.json",
|
||||
"oneOf.json",
|
||||
"pattern.json",
|
||||
"patternProperties.json",
|
||||
"properties.json",
|
||||
"ref.json",
|
||||
"refRemote.json",
|
||||
"required.json",
|
||||
"type.json",
|
||||
"uniqueItems.json"
|
||||
};
|
||||
|
||||
char jsonBuffer[65536];
|
||||
MemoryPoolAllocator<> jsonAllocator(jsonBuffer, sizeof(jsonBuffer));
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(filenames); i++) {
|
||||
char filename[FILENAME_MAX];
|
||||
sprintf(filename, "jsonschema/tests/draft4/%s", filenames[i]);
|
||||
char* json = ReadFile(filename, jsonAllocator);
|
||||
if (!json) {
|
||||
printf("json test suite file %s not found", filename);
|
||||
return;
|
||||
}
|
||||
|
||||
Document d;
|
||||
d.Parse(json);
|
||||
if (d.HasParseError()) {
|
||||
printf("json test suite file %s has parse error", filename);
|
||||
return;
|
||||
}
|
||||
|
||||
for (Value::ConstValueIterator schemaItr = d.Begin(); schemaItr != d.End(); ++schemaItr) {
|
||||
if (IsExcludeTestSuite((*schemaItr)["description"].GetString()))
|
||||
continue;
|
||||
|
||||
TestSuite* ts = new TestSuite;
|
||||
ts->schema = new SchemaDocument((*schemaItr)["schema"]);
|
||||
|
||||
const Value& tests = (*schemaItr)["tests"];
|
||||
for (Value::ConstValueIterator testItr = tests.Begin(); testItr != tests.End(); ++testItr) {
|
||||
if (IsExcludeTest((*testItr)["description"].GetString()))
|
||||
continue;
|
||||
|
||||
Document* d2 = new Document;
|
||||
d2->CopyFrom((*testItr)["data"], d2->GetAllocator());
|
||||
ts->tests.push_back(d2);
|
||||
}
|
||||
testSuites.push_back(ts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
PerfTest::TearDown();
|
||||
for (TestSuiteList::const_iterator itr = testSuites.begin(); itr != testSuites.end(); ++itr)
|
||||
delete *itr;
|
||||
testSuites.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
// Using the same exclusion in https://github.com/json-schema/JSON-Schema-Test-Suite
|
||||
static bool IsExcludeTestSuite(const std::string& description) {
|
||||
const char* excludeTestSuites[] = {
|
||||
//lost failing these tests
|
||||
"remote ref",
|
||||
"remote ref, containing refs itself",
|
||||
"fragment within remote ref",
|
||||
"ref within remote ref",
|
||||
"change resolution scope",
|
||||
// these below were added to get jsck in the benchmarks)
|
||||
"uniqueItems validation",
|
||||
"valid definition",
|
||||
"invalid definition"
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(excludeTestSuites); i++)
|
||||
if (excludeTestSuites[i] == description)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Using the same exclusion in https://github.com/json-schema/JSON-Schema-Test-Suite
|
||||
static bool IsExcludeTest(const std::string& description) {
|
||||
const char* excludeTests[] = {
|
||||
//lots of validators fail these
|
||||
"invalid definition, invalid definition schema",
|
||||
"maxLength validation, two supplementary Unicode code points is long enough",
|
||||
"minLength validation, one supplementary Unicode code point is not long enough",
|
||||
//this is to get tv4 in the benchmarks
|
||||
"heterogeneous enum validation, something else is invalid"
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(excludeTests); i++)
|
||||
if (excludeTests[i] == description)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
Schema(const Schema&);
|
||||
Schema& operator=(const Schema&);
|
||||
|
||||
protected:
|
||||
typedef std::vector<Document*> DocumentList;
|
||||
|
||||
struct TestSuite {
|
||||
TestSuite() : schema() {}
|
||||
~TestSuite() {
|
||||
delete schema;
|
||||
for (DocumentList::iterator itr = tests.begin(); itr != tests.end(); ++itr)
|
||||
delete *itr;
|
||||
}
|
||||
SchemaDocument* schema;
|
||||
DocumentList tests;
|
||||
};
|
||||
|
||||
typedef std::vector<TestSuite* > TestSuiteList;
|
||||
TestSuiteList testSuites;
|
||||
};
|
||||
|
||||
TEST_F(Schema, TestSuite) {
|
||||
char validatorBuffer[65536];
|
||||
MemoryPoolAllocator<> validatorAllocator(validatorBuffer, sizeof(validatorBuffer));
|
||||
|
||||
int testCount = 0;
|
||||
clock_t start = clock();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
for (TestSuiteList::const_iterator itr = testSuites.begin(); itr != testSuites.end(); ++itr) {
|
||||
const TestSuite& ts = **itr;
|
||||
GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<> >, MemoryPoolAllocator<> > validator(*ts.schema, &validatorAllocator);
|
||||
for (DocumentList::const_iterator testItr = ts.tests.begin(); testItr != ts.tests.end(); ++testItr) {
|
||||
validator.Reset();
|
||||
(*testItr)->Accept(validator);
|
||||
testCount++;
|
||||
}
|
||||
validatorAllocator.Clear();
|
||||
}
|
||||
}
|
||||
clock_t end = clock();
|
||||
double duration = double(end - start) / CLOCKS_PER_SEC;
|
||||
printf("%d tests in %f s -> %f tests per sec\n", testCount, duration, testCount / duration);
|
||||
}
|
||||
|
||||
#endif
|
@ -11,7 +11,10 @@ set(UNITTEST_SOURCES
|
||||
pointertest.cpp
|
||||
prettywritertest.cpp
|
||||
readertest.cpp
|
||||
regextest.cpp
|
||||
schematest.cpp
|
||||
simdtest.cpp
|
||||
strfunctest.cpp
|
||||
stringbuffertest.cpp
|
||||
strtodtest.cpp
|
||||
unittest.cpp
|
||||
|
@ -32,6 +32,12 @@ static const char kJson[] = "{\n"
|
||||
" \"m~n\" : 8\n"
|
||||
"}";
|
||||
|
||||
TEST(Pointer, DefaultConstructor) {
|
||||
Pointer p;
|
||||
EXPECT_TRUE(p.IsValid());
|
||||
EXPECT_EQ(0u, p.GetTokenCount());
|
||||
}
|
||||
|
||||
TEST(Pointer, Parse) {
|
||||
{
|
||||
Pointer p("");
|
||||
|
533
test/unittest/regextest.cpp
Normal file
533
test/unittest/regextest.cpp
Normal file
@ -0,0 +1,533 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "unittest.h"
|
||||
#include "rapidjson/internal/regex.h"
|
||||
|
||||
using namespace rapidjson::internal;
|
||||
|
||||
TEST(Regex, Concatenation) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Alternation1) {
|
||||
Regex re("abab|abbb");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abab"));
|
||||
EXPECT_TRUE(re.Match("abbb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("ababa"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
EXPECT_FALSE(re.Match("abbbb"));
|
||||
}
|
||||
|
||||
TEST(Regex, Alternation2) {
|
||||
Regex re("a|b|c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis1) {
|
||||
Regex re("(ab)c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis2) {
|
||||
Regex re("a(bc)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Parenthesis3) {
|
||||
Regex re("(a|b)(c|d)");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ac"));
|
||||
EXPECT_TRUE(re.Match("ad"));
|
||||
EXPECT_TRUE(re.Match("bc"));
|
||||
EXPECT_TRUE(re.Match("bd"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("cd"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne1) {
|
||||
Regex re("a?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne2) {
|
||||
Regex re("a?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne3) {
|
||||
Regex re("ab?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne4) {
|
||||
Regex re("a?b?");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrOne5) {
|
||||
Regex re("a(ab)?b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match("abb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore1) {
|
||||
Regex re("a*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore2) {
|
||||
Regex re("a*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("bb"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore3) {
|
||||
Regex re("a*b*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match(""));
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("bb"));
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, ZeroOrMore4) {
|
||||
Regex re("a(ab)*b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore1) {
|
||||
Regex re("a+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("aa"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore2) {
|
||||
Regex re("a+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore3) {
|
||||
Regex re("a+b+");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("ab"));
|
||||
EXPECT_TRUE(re.Match("aab"));
|
||||
EXPECT_TRUE(re.Match("abb"));
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("ba"));
|
||||
}
|
||||
|
||||
TEST(Regex, OneOrMore4) {
|
||||
Regex re("a(ab)+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("aabb"));
|
||||
EXPECT_TRUE(re.Match("aababb"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("ab"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact1) {
|
||||
Regex re("ab{3}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact2) {
|
||||
Regex re("a(bc){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierExact3) {
|
||||
Regex re("a(b|c){3}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin1) {
|
||||
Regex re("ab{3,}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin2) {
|
||||
Regex re("a(bc){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMin3) {
|
||||
Regex re("a(b|c){3,}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax1) {
|
||||
Regex re("ab{3,5}c");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbc"));
|
||||
EXPECT_TRUE(re.Match("abbbbbc"));
|
||||
EXPECT_FALSE(re.Match("ac"));
|
||||
EXPECT_FALSE(re.Match("abc"));
|
||||
EXPECT_FALSE(re.Match("abbc"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbc"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax2) {
|
||||
Regex re("a(bc){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcd"));
|
||||
EXPECT_TRUE(re.Match("abcbcbcbcbcd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcd"));
|
||||
EXPECT_FALSE(re.Match("abcbcbcbcbcbcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, QuantifierMinMax3) {
|
||||
Regex re("a(b|c){3,5}d");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("abbbd"));
|
||||
EXPECT_TRUE(re.Match("acccd"));
|
||||
EXPECT_TRUE(re.Match("abcbd"));
|
||||
EXPECT_TRUE(re.Match("accccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbd"));
|
||||
EXPECT_TRUE(re.Match("acccccd"));
|
||||
EXPECT_TRUE(re.Match("abbbbbd"));
|
||||
EXPECT_FALSE(re.Match("ad"));
|
||||
EXPECT_FALSE(re.Match("abbd"));
|
||||
EXPECT_FALSE(re.Match("accccccd"));
|
||||
EXPECT_FALSE(re.Match("abbbbbbd"));
|
||||
}
|
||||
|
||||
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
|
||||
|
||||
TEST(Regex, Unicode) {
|
||||
Regex re("a" EURO "+b");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a" EURO "b"));
|
||||
EXPECT_TRUE(re.Match("a" EURO EURO "b"));
|
||||
EXPECT_FALSE(re.Match("a?b"));
|
||||
EXPECT_FALSE(re.Match("a" EURO "\xAC" "b")); // unaware of UTF-8 will match
|
||||
}
|
||||
|
||||
TEST(Regex, AnyCharacter) {
|
||||
Regex re(".");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match(EURO));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange1) {
|
||||
Regex re("[abc]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange2) {
|
||||
Regex re("[^abc]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("`"));
|
||||
EXPECT_TRUE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange3) {
|
||||
Regex re("[a-c]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("b"));
|
||||
EXPECT_TRUE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange4) {
|
||||
Regex re("[^a-c]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("`"));
|
||||
EXPECT_TRUE(re.Match("d"));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
EXPECT_FALSE(re.Match("c"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("aa"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange5) {
|
||||
Regex re("[-]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("a"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange6) {
|
||||
Regex re("[a-]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange7) {
|
||||
Regex re("[-a]");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("a"));
|
||||
EXPECT_TRUE(re.Match("-"));
|
||||
EXPECT_FALSE(re.Match(""));
|
||||
EXPECT_FALSE(re.Match("`"));
|
||||
EXPECT_FALSE(re.Match("b"));
|
||||
}
|
||||
|
||||
TEST(Regex, CharacterRange8) {
|
||||
Regex re("[a-zA-Z0-9]*");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("Milo"));
|
||||
EXPECT_TRUE(re.Match("MT19937"));
|
||||
EXPECT_TRUE(re.Match("43"));
|
||||
EXPECT_FALSE(re.Match("a_b"));
|
||||
EXPECT_FALSE(re.Match("!"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search) {
|
||||
Regex re("abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("_abc"));
|
||||
EXPECT_TRUE(re.Search("abc_"));
|
||||
EXPECT_TRUE(re.Search("_abc_"));
|
||||
EXPECT_TRUE(re.Search("__abc__"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_BeginAnchor) {
|
||||
Regex re("^abc");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("abc_"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("_abc"));
|
||||
EXPECT_FALSE(re.Search("_abc_"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_EndAnchor) {
|
||||
Regex re("abc$");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_TRUE(re.Search("_abc"));
|
||||
EXPECT_TRUE(re.Search("abcabc"));
|
||||
EXPECT_FALSE(re.Search("abc_"));
|
||||
EXPECT_FALSE(re.Search("_abc_"));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("bc"));
|
||||
EXPECT_FALSE(re.Search("cba"));
|
||||
}
|
||||
|
||||
TEST(Regex, Search_BothAnchor) {
|
||||
Regex re("^abc$");
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Search("abc"));
|
||||
EXPECT_FALSE(re.Search(""));
|
||||
EXPECT_FALSE(re.Search("a"));
|
||||
EXPECT_FALSE(re.Search("b"));
|
||||
EXPECT_FALSE(re.Search("ab"));
|
||||
EXPECT_FALSE(re.Search("abcd"));
|
||||
}
|
||||
|
||||
TEST(Regex, Escape) {
|
||||
const char* s = "\\^\\$\\|\\(\\)\\?\\*\\+\\.\\[\\]\\{\\}\\\\\\f\\n\\r\\t\\v[\\b][\\[][\\]]";
|
||||
Regex re(s);
|
||||
ASSERT_TRUE(re.IsValid());
|
||||
EXPECT_TRUE(re.Match("^$|()?*+.[]{}\\\x0C\n\r\t\x0B\b[]"));
|
||||
EXPECT_FALSE(re.Match(s)); // Not escaping
|
||||
}
|
||||
|
||||
TEST(Regex, Invalid) {
|
||||
#define TEST_INVALID(s) \
|
||||
{\
|
||||
Regex re(s);\
|
||||
EXPECT_FALSE(re.IsValid());\
|
||||
}
|
||||
|
||||
TEST_INVALID("a|");
|
||||
TEST_INVALID("()");
|
||||
TEST_INVALID(")");
|
||||
TEST_INVALID("(a))");
|
||||
TEST_INVALID("(a|)");
|
||||
TEST_INVALID("(a||b)");
|
||||
TEST_INVALID("(|b)");
|
||||
TEST_INVALID("?");
|
||||
TEST_INVALID("*");
|
||||
TEST_INVALID("+");
|
||||
TEST_INVALID("{");
|
||||
TEST_INVALID("{}");
|
||||
TEST_INVALID("a{a}");
|
||||
TEST_INVALID("a{0}");
|
||||
TEST_INVALID("a{-1}");
|
||||
TEST_INVALID("a{}");
|
||||
TEST_INVALID("a{0,}");
|
||||
TEST_INVALID("a{,0}");
|
||||
TEST_INVALID("a{1,0}");
|
||||
TEST_INVALID("a{-1,0}");
|
||||
TEST_INVALID("a{-1,1}");
|
||||
TEST_INVALID("[]");
|
||||
TEST_INVALID("[^]");
|
||||
TEST_INVALID("[\\a]");
|
||||
TEST_INVALID("\\a");
|
||||
|
||||
#undef TEST_INVALID
|
||||
}
|
||||
|
||||
#undef EURO
|
1157
test/unittest/schematest.cpp
Normal file
1157
test/unittest/schematest.cpp
Normal file
File diff suppressed because it is too large
Load Diff
31
test/unittest/strfunctest.cpp
Normal file
31
test/unittest/strfunctest.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "unittest.h"
|
||||
|
||||
#include "rapidjson/internal/strfunc.h"
|
||||
|
||||
using namespace rapidjson;
|
||||
using namespace rapidjson::internal;
|
||||
|
||||
TEST(StrFunc, CountStringCodePoint) {
|
||||
SizeType count;
|
||||
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("", 0, &count));
|
||||
EXPECT_EQ(0u, count);
|
||||
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("Hello", 5, &count));
|
||||
EXPECT_EQ(5u, count);
|
||||
EXPECT_TRUE(CountStringCodePoint<UTF8<> >("\xC2\xA2\xE2\x82\xAC\xF0\x9D\x84\x9E", 9, &count)); // cents euro G-clef
|
||||
EXPECT_EQ(3u, count);
|
||||
EXPECT_FALSE(CountStringCodePoint<UTF8<> >("\xC2\xA2\xE2\x82\xAC\xF0\x9D\x84\x9E\x80", 10, &count));
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user