Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON Schema validator #254

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions API/fleece/Expert.hh
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ namespace fleece {
FLSharedKeys FL_NULLABLE _sk {nullptr};
};

inline Dict::iterator::iterator(Dict d, FLSharedKeys sk) {FLDictIterator_BeginShared(d, sk, this);}


//====== DEPRECATED:

Expand Down
15 changes: 14 additions & 1 deletion API/fleece/FLExpert.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#ifndef _FLOBSCURE_H
#define _FLOBSCURE_H

#include "FLValue.h"
#include "FLCollections.h"

FL_ASSUME_NONNULL_BEGIN

Expand Down Expand Up @@ -165,6 +165,14 @@ extern "C" {
FLEECE_PUBLIC void FLSharedKeys_Release(FLSharedKeys FL_NULLABLE) FLAPI;


/** Initializes a Dict iterator, providing the Dict's FLSharedKeys instance.
This is an optimization that saves FLDictIterator_GetKeyString from the overhead of having
to look up the SharedKeys when the first shared key is encountered.
@warning The FLSharedKeys MUST be the same instance associated with the Dict, or incorrect
key strings will be returned. */
FLEECE_PUBLIC void FLDictIterator_BeginShared(FLDict FL_NULLABLE, FLSharedKeys, FLDictIterator*) FLAPI;


typedef struct _FLSharedKeyScope* FLSharedKeyScope;

/** Registers a range of memory containing Fleece data that uses the given shared keys.
Expand Down Expand Up @@ -305,6 +313,11 @@ extern "C" {

/** @} */

/** Follows a JSON Pointer [RFC 6901] from a root Value
@returns the resolved Value, or NULL if not found (or the JSON Pointer is invalid.) */
FLEECE_PUBLIC FLValue FL_NULLABLE FLEvalJSONPointer(FLString jsonPointer,
FLValue root,
FLError* outError) FLAPI;

/** @} */

Expand Down
7 changes: 6 additions & 1 deletion API/fleece/Fleece.hh
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,13 @@ namespace fleece {
inline Value get(Key &key) const;
inline Value operator[] (Key &key) const {return get(key);}

inline Value get(FLDictKey&) const;
inline Value operator[] (FLDictKey& key) const {return get(key);}

class iterator : private FLDictIterator {
public:
inline iterator(Dict);
inline explicit iterator(Dict);
inline explicit iterator(Dict, FLSharedKeys);
inline iterator(const FLDictIterator &i) :FLDictIterator(i) { }
inline uint32_t count() const {return FLDictIterator_GetCount(this);}
inline Value key() const;
Expand Down Expand Up @@ -576,6 +580,7 @@ namespace fleece {
inline bool Dict::empty() const {return FLDict_IsEmpty(*this);}
inline Value Dict::get(slice_NONNULL key) const {return FLDict_Get(*this, key);}
inline Value Dict::get(Dict::Key &key) const{return FLDict_GetWithKey(*this, &key._key);}
inline Value Dict::get(FLDictKey &key) const{return FLDict_GetWithKey(*this, &key);}

inline Dict::Key::Key(alloc_slice s) :_str(std::move(s)), _key(FLDictKey_Init(_str)) { }
inline Dict::Key::Key(slice_NONNULL s) :Key(alloc_slice(s)) { }
Expand Down
189 changes: 189 additions & 0 deletions API/fleece/JSONSchema.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
//
// JSONSchema.hh
//
// Copyright 2025-Present Couchbase, Inc.
//
// Use of this software is governed by the Business Source License included
// in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
// in that file, in accordance with the Business Source License, use of this
// software will be governed by the Apache License, Version 2.0, included in
// the file licenses/APL2.txt.
//

#pragma once
#ifndef _FLEECE_JSONSCHEMA_HH
#define _FLEECE_JSONSCHEMA_HH
#include "fleece/Fleece.hh"
#include "fleece/Mutable.hh"
#include <memory>
#include <stdexcept>
#include <string>
#include <string_view>

FL_ASSUME_NONNULL_BEGIN

namespace fleece {

/** Validates Values against a JSON Schema. (See https://json-schema.org )
*
* Unsupported features (will throw an `unsupported_schema` exception if detected):
* - Path-relative `$ref`s (URIs that start with `/`)
* - `$dynamicRef`, `$dynamicAnchor`, `$vocabulary`
* - `format`, `contentEncoding`, `contentMediaType`
* - `dependencies`, `dependentRequired`, `dependentSchemas`, `extends`
* - `unevaluatedItems`, `unevaluatedProperties`
*
* Known bugs:
* - JSON Schema's equality comparisons do not distinguish between integers and floats,
* so `7` is equal to `7.0`. However, Fleece considers ints and floats distinct types.
* This implementation conforms to JSON Schema equality when making direct comparisons
* between numeric Values, bbut _not_ when the numbers are nested in collections.
* So for example `[7]` will not match `[7.0]`.
*
* @note This class does not download schemas on demand; it does no I/O at all.
* See the docs of \ref unknownSchemaID to see how to handle external schema refs.
* @note This class is thread-safe.
*/
class JSONSchema {
public:

/** Thrown if errors are discovered in a schema. */
class invalid_schema : public std::runtime_error { using runtime_error::runtime_error; };
/** Thrown if a schema is found to use unsupported/unimplemented features. */
class unsupported_schema : public std::runtime_error { using runtime_error::runtime_error; };

class Validation;


/// Constructor that takes a parsed JSON schema object.
/// @note The Value will be retained, so the caller doesn't need to keep a reference.
/// @param schemaRoot The parsed schema.
/// @param id_uri The absolute URI identifying this schema. Optional.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
explicit JSONSchema(Value schemaRoot, std::string_view id_uri = "");

/// Convenience constructor that takes a JSON schema string and parses it.
/// @param json The schema as JSON data.
/// @param id_uri The absolute URI identifying this schema. Optional.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
explicit JSONSchema(std::string_view json, std::string_view id_uri = "");

~JSONSchema();

/// The root of the parsed schema. (Almost always a Dict.)
Value schema() const;

/// Registers an external schema that the main schema may refer to.
/// @note The Dict will be retained, so the caller doesn't need to keep a reference.
/// @param schemaRoot The parsed schema.
/// @param id_uri The absolute URI identifying this schema.
/// @throws invalid_schema if the schema is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
void addSchema(Dict schemaRoot, std::string_view id_uri);

/// Validates a parsed Fleece value against the schema.
/// @returns A \ref Validation object describing the result.
/// @throws invalid_schema if the schema itself is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
Validation validate(Value value) const LIFETIMEBOUND;

/// Convenience method that parses JSON and then validates it against the schema.
/// @returns A \ref Validation object describing the result.
/// @throws std::invalid_argument if the JSON fails to parse.
/// @throws invalid_schema if the schema itself is invalid.
/// @throws unsupported_schema if the schema uses unsupported features.
Validation validate(std::string_view json) const LIFETIMEBOUND;
Validation validate(std::string_view json, SharedKeys) const LIFETIMEBOUND;


/** Errors that can occur during validation. */
enum class Error : unsigned {
ok = 0,
invalid, // value matched against a "false" in the schema
typeMismatch, // value doesn't match "type" property in schema
outOfRange, // Number is out of range of "minimum", etc.
notMultiple, // Number is not a multiple of the "multipleOf"
tooShort, // String is too short or collection has too few items
tooLong, // String is too long or collection has too many items
patternMismatch, // String doesn't match regex pattern
missingProperty, // Dict is missing a required property
unknownProperty, // Dict has an invalid property
notEnum, // Value doesn't match any "enum" or "const" value
tooFew, // Value doesn't match anything in an "anyOf" or "oneOf" array
tooMany, // "oneOf" or "maxContains" failed
notNot, // Value matched a "not" schema
notUnique, // Array items are not unique
invalidUTF8, // A string's length could not be checked because of invalid UTF-8
unknownSchemaRef, // Reference to a schema URI that's not registered
};

static bool ok(Error e) noexcept {return e == Error::ok;}
static std::string_view errorString(Error) noexcept;

private:
struct Impl;
std::unique_ptr<Impl> _impl;
};


/** The result of validating against a JSONSchema. */
class JSONSchema::Validation {
public:
/// True if validation succeeded.
bool ok() const noexcept {return _result.error == Error::ok;}
explicit operator bool() const {return ok();}

/// The specific error. (Will be `Error::ok` if there was no error.)
Error error() const noexcept { return _result.error; }

/// The specific error, as a string.
std::string errorString() const noexcept;

/// The detected invalid Value; either the one passed to \ref validate
/// or something nested in it. (Will be nullptr if there was no error.)
Value errorValue() const noexcept {return _result.value;}

/// On error, this is the path to the detected invalid Value, in \ref KeyPath syntax.
std::string errorPath() const noexcept;

/// The key and value of the item in the schema that caused the failure;
/// e.g. `{"maxLength", 5}`.
std::pair<slice,Value> errorSchema() const noexcept;

/// A URI pointing to the item in the schema that caused the failure.
std::string errorSchemaURI() const noexcept;

/// If the error is `Error::unknownSchemaRef`, this is the URI of the unknown schema.
/// If you can download or otherwise look up the schema, you can call \ref addSchema
/// to register it, then call \ref validate again to retry.
std::string const& unknownSchemaID() const noexcept {return _unknownSchema;}

struct Result {Error error; Value value; Value schema; slice schemaKey;};
static bool ok(Result const& e) noexcept {return e.error == Error::ok;}
private:
friend class JSONSchema;

Validation(JSONSchema const& schema, Value value);
Result check(Value value, Value schema, Dict schemaBase);
Result checkValue(Value value, Dict schema, Dict schemaBase);
Result checkNumber(Value value, Dict schema, Dict schemaBase);
Result checkString(Value value, Dict schema, Dict schemaBase);
Result checkArray(Array, Dict schema, Dict schemaBase);
Result checkDict(Dict, Dict schema, Dict schemaBase);

static bool isType(Value value, Value typeVal);
static bool isType(Value value, slice schemaType);

Impl const& _schemaImpl; // The guts of the owning JSONSchema
RetainedValue _value; // The root Value being validated (only after failure)
Result _result {}; // Details of validation error
std::string _unknownSchema; // Unknown schema ID found during validation
};

}

FL_ASSUME_NONNULL_END

#endif // _FLEECE_JSONSCHEMA_HH
30 changes: 30 additions & 0 deletions API/fleece/slice.hh
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@ namespace fleece {
will not overflow the buffer. Returns false if the slice was truncated. */
inline bool toCString(char *buf, size_t bufSize) const noexcept;

/** Computes the number of characters in a UTF-8 string, and detects invalid UTF-8.
@returns On success, a pair with the character count and `true`.
On failure, a pair with the byte offset of the error and `false`. */
std::pair<size_t,bool> UTF8Length() const FLPURE;


constexpr operator FLSlice () const noexcept {return {buf, size};}

#ifdef __APPLE__
Expand Down Expand Up @@ -581,6 +587,30 @@ namespace fleece {
}


inline std::pair<size_t,bool> pure_slice::UTF8Length() const {
// See <https://en.wikipedia.org/wiki/UTF-8>
size_t length = 0;
uint8_t const* cp = begin(), *e = end();
while (cp < e) {
++length;
uint8_t c = *cp;
if ((c & 0x80) == 0) [[likely]]
cp += 1;
else if ((c & 0xE0) == 0xC0)
cp += 2;
else if ((c & 0xF0) == 0xE0)
cp += 3;
else if ((c & 0xF8) == 0xF0)
cp += 4;
else [[unlikely]]
return {cp - begin(), false};
}
if (cp != e) [[unlikely]]
return {cp - begin(), false};
return {length, true};
}


#pragma mark COMPARISON & FIND:


Expand Down
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ else()
message(FATAL_ERROR "Unsupported platform ${CMAKE_SYSTEM_NAME}!")
endif()


if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
include(CheckIPOSupported)
check_ipo_supported(RESULT supported OUTPUT error)
if( supported )
message(STATUS "LTO enabled")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "LTO not supported: <${error}>")
endif()
endif()


set(FLEECE_CXX_WARNINGS "")
if(FLEECE_WARNINGS_HARDCORE)
if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
Expand Down Expand Up @@ -137,9 +150,11 @@ add_executable(FleeceTests EXCLUDE_FROM_ALL
${FLEECE_TEST_SRC}
vendor/catch/catch_amalgamated.cpp
vendor/catch/CaseListReporter.cc
Tests/SchemaTests.cc
)
setup_test_build()
target_include_directories(FleeceTests PRIVATE
Experimental
Tests
vendor/catch
)
Expand Down
Empty file added Experimental/JSONSchema.cc
Empty file.
Loading