Skip to content

Commit

Permalink
API: JSON Schema validator class
Browse files Browse the repository at this point in the history
  • Loading branch information
snej committed Jan 29, 2025
1 parent cb859d8 commit 3037b5b
Show file tree
Hide file tree
Showing 56 changed files with 14,039 additions and 2 deletions.
188 changes: 188 additions & 0 deletions API/fleece/JSONSchema.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
//
// JSONSchema.hh
//
// Copyright 2025-Present Couchbase, Inc.
//
// Use of this software is governed by the Business Source License included
// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
// in that file, in accordance with the Business Source License, use of this
// software will be governed by the Apache License, Version 2.0, included in
// the file licenses/APL2.txt.
//

#pragma once
#ifndef _FLEECE_JSONSCHEMA_HH
#define _FLEECE_JSONSCHEMA_HH
#include "fleece/Fleece.hh"
#include "fleece/Mutable.hh"
#include <stdexcept>
#include <string>
#include <string_view>

FL_ASSUME_NONNULL_BEGIN

namespace fleece {

/** Validates Values against a JSON Schema. (See https://json-schema.org )
*
* Unsupported features (will throw an `unsupported_schema` exception if detected):
* - Path-relative `$ref`s (URIs that start with `/`)
* - `$dynamicRef`, `$dynamicAnchor`, `$vocabulary`
* - `format`, `contentEncoding`, `contentMediaType`
* - `dependencies`, `dependentRequired`, `dependentSchemas`, `extends`
* - `unevaluatedItems`, `unevaluatedProperties`
*
* Known bugs:
* - JSON Schema's equality comparisons do not distinguish between integers and floats,
* so `7` is equal to `7.0`. However, Fleece considers ints and floats distinct types.
* This implementation conforms to JSON Schema equality when making direct comparisons
* between numeric Values, bbut _not_ when the numbers are nested in collections.
* So for example `[7]` will not match `[7.0]`.
*
* @note This class does not download schemas on demand; it does no I/O at all.
* See the docs of \ref unknownSchemaID to see how to handle external schema refs.
* @note This class is thread-safe.
*/
class JSONSchema {
public:

/** Thrown if errors are discovered in a schema. */
class invalid_schema : public std::runtime_error { using runtime_error::runtime_error; };
/** Thrown if a schema is found to use unsupported/unimplemented features. */
class unsupported_schema : public std::runtime_error { using runtime_error::runtime_error; };

class Validation;


/// Constructor that takes a parsed JSON schema object.
/// @note The Value will be retained, so the caller doesn't need to keep a reference.
/// @param schemaRoot The parsed schema.
/// @param id_uri The absolute URI identifying this schema. Optional.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
explicit JSONSchema(Value schemaRoot, std::string_view id_uri = "");

/// Convenience constructor that takes a JSON schema string and parses it.
/// @param json The schema as JSON data.
/// @param id_uri The absolute URI identifying this schema. Optional.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
explicit JSONSchema(std::string_view json, std::string_view id_uri = "");

~JSONSchema();

/// The root of the parsed schema. (Almost always a Dict.)
Value schema() const;

/// Registers an external schema that the main schema may refer to.
/// @note The Dict will be retained, so the caller doesn't need to keep a reference.
/// @param schemaRoot The parsed schema.
/// @param id_uri The absolute URI identifying this schema.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
void addSchema(Dict schemaRoot, std::string_view id_uri);

/// Validates a parsed Fleece value against the schema.
/// @returns A \ref Validation object describing the result.
/// @throws invalid_schema if the schema itself is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
Validation validate(Value value) const LIFETIMEBOUND;

/// Convenience method that parses JSON and then validates it against the schema.
/// @returns A \ref Validation object describing the result.
/// @throws std::invalid_argument if the JSON fails to parse.
/// @throws invalid_schema if the schema itself is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
Validation validate(std::string_view json) const LIFETIMEBOUND;
Validation validate(std::string_view json, SharedKeys) const LIFETIMEBOUND;


/** Errors that can occur during validation. */
enum class Error : unsigned {
ok = 0,
invalid, // value matched against a "false" in the schema
typeMismatch, // value doesn't match "type" property in schema
outOfRange, // Number is out of range of "minimum", etc.
notMultiple, // Number is not a multiple of the "multipleOf"
tooShort, // String is too short or collection has too few items
tooLong, // String is too long or collection has too many items
patternMismatch, // String doesn't match regex pattern
missingProperty, // Dict is missing a required property
unknownProperty, // Dict has an invalid property
notEnum, // Value doesn't match any "enum" or "const" value
tooFew, // Value doesn't match anything in an "anyOf" or "oneOf" array
tooMany, // "oneOf" or "maxContains" failed
notNot, // Value matched a "not" schema
notUnique, // Array items are not unique
invalidUTF8, // A string's length could not be checked because of invalid UTF-8
unknownSchemaRef, // Reference to a schema URI that's not registered
};

static bool ok(Error e) noexcept {return e == Error::ok;}
static std::string_view errorString(Error) noexcept;

private:
struct Impl;
std::unique_ptr<Impl> _impl;
};


/** The result of validating against a JSONSchema. */
class JSONSchema::Validation {
public:
/// True if validation succeeded.
bool ok() const noexcept {return _result.error == Error::ok;}
explicit operator bool() const {return ok();}

/// The specific error. (Will be `Error::ok` if there was no error.)
Error error() const noexcept { return _result.error; }

/// The specific error, as a string.
std::string errorString() const noexcept;

/// The detected invalid Value; either the one passed to \ref validate
/// or something nested in it. (Will be nullptr if there was no error.)
Value errorValue() const noexcept {return _result.value;}

/// On error, this is the path to the detected invalid Value, in \ref KeyPath syntax.
std::string errorPath() const noexcept;

/// The key and value of the item in the schema that caused the failure;
/// e.g. `{"maxLength", 5}`.
std::pair<slice,Value> errorSchema() const noexcept;

/// A URI pointing to the item in the schema that caused the failure.
std::string errorSchemaURI() const noexcept;

/// If the error is `Error::unknownSchemaRef`, this is the URI of the unknown schema.
/// If you can download or otherwise look up the schema, you can call \ref addSchema
/// to register it, then call \ref validate again to retry.
std::string const& unknownSchemaID() const noexcept {return _unknownSchema;}

struct Result {Error error; Value value; Value schema; slice schemaKey;};
static bool ok(Result const& e) noexcept {return e.error == Error::ok;}
private:
friend class JSONSchema;

Validation(JSONSchema const& schema, Value value);
Result check(Value value, Value schema, Dict schemaBase);
Result checkValue(Value value, Dict schema, Dict schemaBase);
Result checkNumber(Value value, Dict schema, Dict schemaBase);
Result checkString(Value value, Dict schema, Dict schemaBase);
Result checkArray(Array, Dict schema, Dict schemaBase);
Result checkDict(Dict, Dict schema, Dict schemaBase);

static bool isType(Value value, Value typeVal);
static bool isType(Value value, slice schemaType);

Impl const& _schemaImpl; // The guts of the owning JSONSchema
RetainedValue _value; // The root Value being validated (only after failure)
Result _result {}; // Details of validation error
std::string _unknownSchema; // Unknown schema ID found during validation
};

}

FL_ASSUME_NONNULL_END

#endif // _FLEECE_JSONSCHEMA_HH
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,11 @@ add_executable(FleeceTests EXCLUDE_FROM_ALL
${FLEECE_TEST_SRC}
vendor/catch/catch_amalgamated.cpp
vendor/catch/CaseListReporter.cc
Tests/SchemaTests.cc
)
setup_test_build()
target_include_directories(FleeceTests PRIVATE
Experimental
Tests
vendor/catch
)
Expand Down
Empty file added Experimental/JSONSchema.cc
Empty file.
24 changes: 22 additions & 2 deletions Fleece.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
2739971725CDBD8E000C1C1B /* SmallVectorBase.hh in Headers */ = {isa = PBXBuildFile; fileRef = 2739971625CDBD8E000C1C1B /* SmallVectorBase.hh */; };
273CD2D825E874CD00B93C59 /* Base64.hh in Headers */ = {isa = PBXBuildFile; fileRef = 273CD2D625E874CD00B93C59 /* Base64.hh */; };
273CD2D925E874CD00B93C59 /* Base64.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273CD2D725E874CD00B93C59 /* Base64.cc */; };
273F3BCE2D4AA26D00BFAD13 /* JSONSchema.hh in Headers */ = {isa = PBXBuildFile; fileRef = 273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */; };
273F3BD02D4AA26D00BFAD13 /* JSONSchema.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */; };
273F3BD42D4AA2A800BFAD13 /* SchemaTests.cc in Sources */ = {isa = PBXBuildFile; fileRef = 273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */; };
274281A4262F7CBF00862700 /* slice+ObjC.mm in Sources */ = {isa = PBXBuildFile; fileRef = 274281A3262F7CBF00862700 /* slice+ObjC.mm */; };
274D8244209A3A77008BB39F /* HeapDict.cc in Sources */ = {isa = PBXBuildFile; fileRef = 274D8242209A3A77008BB39F /* HeapDict.cc */; };
274D8245209A3A77008BB39F /* HeapDict.hh in Headers */ = {isa = PBXBuildFile; fileRef = 274D8243209A3A77008BB39F /* HeapDict.hh */; };
Expand Down Expand Up @@ -86,7 +89,6 @@
279AC5381C096B5C002C80DB /* libfleeceStatic.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 270FA25C1BF53CAD005DCB13 /* libfleeceStatic.a */; };
279AC53C1C097941002C80DB /* Value+Dump.cc in Sources */ = {isa = PBXBuildFile; fileRef = 279AC53B1C097941002C80DB /* Value+Dump.cc */; };
27A0E3DF24DCD86900380563 /* ConcurrentArena.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27A0E3DD24DCD86900380563 /* ConcurrentArena.hh */; };
27A0E3E024DCD86900380563 /* ConcurrentArena.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27A0E3DE24DCD86900380563 /* ConcurrentArena.cc */; };
27A1327B2C700D45008E84FA /* JSLexer.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27A1327A2C700D45008E84FA /* JSLexer.hh */; };
27A132812C73BF8C008E84FA /* FLEncoder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27A132802C73BF8C008E84FA /* FLEncoder.cc */; };
27A2F73B21248DA50081927B /* FLSlice.h in Headers */ = {isa = PBXBuildFile; fileRef = 27A2F73A21248DA40081927B /* FLSlice.h */; };
Expand Down Expand Up @@ -350,6 +352,10 @@
2739971625CDBD8E000C1C1B /* SmallVectorBase.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SmallVectorBase.hh; sourceTree = "<group>"; };
273CD2D625E874CD00B93C59 /* Base64.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Base64.hh; sourceTree = "<group>"; };
273CD2D725E874CD00B93C59 /* Base64.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Base64.cc; sourceTree = "<group>"; };
273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = JSONSchema.hh; sourceTree = "<group>"; };
273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = JSONSchema.cc; sourceTree = "<group>"; };
273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = SchemaTests.cc; sourceTree = "<group>"; };
273F3BD32D4AA2A800BFAD13 /* travel-schema.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = "travel-schema.json"; sourceTree = "<group>"; };
274281A3262F7CBF00862700 /* slice+ObjC.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = "slice+ObjC.mm"; sourceTree = "<group>"; };
2746DD3B1D931BE9000517BC /* Benchmark.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Benchmark.hh; sourceTree = "<group>"; };
2747D9841CFB9BC300C48211 /* 1person.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = 1person.json; sourceTree = "<group>"; };
Expand Down Expand Up @@ -560,6 +566,7 @@
274C948B2150175700F9AEA9 /* Doxyfile */,
274C948C215058BB00F9AEA9 /* Doxyfile_C++ */,
271507DE212225B3005FE6E8 /* API */,
273F3BCD2D4AA26D00BFAD13 /* Experimental */,
270FA25E1BF53CAD005DCB13 /* Fleece */,
279AC5321C096872002C80DB /* Tool */,
272E5A441BF7FD1700848580 /* Tests */,
Expand Down Expand Up @@ -686,11 +693,13 @@
272E5A5E1BF91DBE00848580 /* ObjCTests.mm */,
27AEFAC4210913C500106ED8 /* DeltaTests.cc */,
271507F6212349B8005FE6E8 /* API_ValueTests.cc */,
273F3BD22D4AA2A800BFAD13 /* SchemaTests.cc */,
278163B81CE6BB8C00B94E32 /* C_Test.c */,
27EC8D5B1CEBA72E00199FE6 /* mn_wordlist.h */,
2747D9841CFB9BC300C48211 /* 1person.json */,
2776AA232086C94B004ACE85 /* 1person-deepIterOutput.txt */,
2776AA242086CC1F004ACE85 /* 1person-shallowIterOutput.txt */,
273F3BD32D4AA2A800BFAD13 /* travel-schema.json */,
);
path = Tests;
sourceTree = "<group>";
Expand All @@ -712,6 +721,15 @@
path = Integration;
sourceTree = "<group>";
};
273F3BCD2D4AA26D00BFAD13 /* Experimental */ = {
isa = PBXGroup;
children = (
273F3BC92D4AA26D00BFAD13 /* JSONSchema.hh */,
273F3BCA2D4AA26D00BFAD13 /* JSONSchema.cc */,
);
path = Experimental;
sourceTree = "<group>";
};
2760A4DA25E96DB000E2ECB2 /* wyhash */ = {
isa = PBXGroup;
children = (
Expand Down Expand Up @@ -969,6 +987,7 @@
27AEFAC321090FF400106ED8 /* JSONDelta.hh in Headers */,
27298E661C00F8A9000CFBA8 /* jsonsl.h in Headers */,
277A06B420B36D1A00970354 /* FileUtils.hh in Headers */,
273F3BCE2D4AA26D00BFAD13 /* JSONSchema.hh in Headers */,
27E3DD4D1DB6C32400F2872D /* CatchHelper.hh in Headers */,
27AEFAC921091A8C00106ED8 /* diff_match_patch.hh in Headers */,
2734B8A51F8583FF00BE5249 /* MDict.hh in Headers */,
Expand Down Expand Up @@ -1000,7 +1019,6 @@
278343132A675A7000621050 /* function_ref.hh in Headers */,
270FA2801BF53CEA005DCB13 /* Writer.hh in Headers */,
270FA27D1BF53CEA005DCB13 /* Encoder.hh in Headers */,
27C8DF072084102900A99BFC /* MutableHashTree.hh in Headers */,
27A1327B2C700D45008E84FA /* JSLexer.hh in Headers */,
27A924D01D9C32E800086206 /* Path.hh in Headers */,
274D8245209A3A77008BB39F /* HeapDict.hh in Headers */,
Expand Down Expand Up @@ -1325,6 +1343,7 @@
buildActionMask = 2147483647;
files = (
270FA27E1BF53CEA005DCB13 /* Encoder+ObjC.mm in Sources */,
273F3BD02D4AA26D00BFAD13 /* JSONSchema.cc in Sources */,
270FA27B1BF53CEA005DCB13 /* Value+ObjC.mm in Sources */,
27C4ACAC1CE5146500938365 /* Array.cc in Sources */,
277A06B320B36D1A00970354 /* FileUtils.cc in Sources */,
Expand Down Expand Up @@ -1380,6 +1399,7 @@
272E5A5D1BF800A100848580 /* EncoderTests.cc in Sources */,
27E3DD531DB7DB1C00F2872D /* SharedKeysTests.cc in Sources */,
27AEFAC5210913C500106ED8 /* DeltaTests.cc in Sources */,
273F3BD42D4AA2A800BFAD13 /* SchemaTests.cc in Sources */,
274D824F209A8D01008BB39F /* MutableTests.cc in Sources */,
271507F7212349B8005FE6E8 /* API_ValueTests.cc in Sources */,
27298E781C01A461000CFBA8 /* PerfTests.cc in Sources */,
Expand Down
Loading

0 comments on commit 3037b5b

Please sign in to comment.