Skip to content

Commit

Permalink
Add method Schema::parse_str_with_list to parse root schemas with r…
Browse files Browse the repository at this point in the history
…eferences (#104)

* Add method `Schema::parse_str_with_list`

Add a new method `Schema::parse_str_with_list` that can parse
a root schema along with a list of named schemas.  This method
can be used to parse a root union that has named schema references,
for example.

This new method combines the functionality of `Schema::parse_str`
with `Schema::parse_list`.

* Run cargo fmt

* Issue #104 - Minor cleanup and add more tests

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

---------

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
Co-authored-by: Martin Tzvetanov Grigorov <[email protected]>
  • Loading branch information
rayokota and martin-g authored Jan 18, 2025
1 parent 5422f25 commit f7208b0
Showing 1 changed file with 152 additions and 11 deletions.
163 changes: 152 additions & 11 deletions avro/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ impl Schema {
parser.parse_str(input)
}

/// Create a array of `Schema`'s from a list of named JSON Avro schemas (Record, Enum, and
/// Create an array of `Schema`'s from a list of named JSON Avro schemas (Record, Enum, and
/// Fixed).
///
/// It is allowed that the schemas have cross-dependencies; these will be resolved
Expand Down Expand Up @@ -1096,6 +1096,45 @@ impl Schema {
parser.parse_list()
}

/// Create a `Schema` from a string representing a JSON Avro schema,
/// along with an array of `Schema`'s from a list of named JSON Avro schemas (Record, Enum, and
/// Fixed).
///
/// It is allowed that the schemas have cross-dependencies; these will be resolved
/// during parsing.
///
/// If two of the named input schemas have the same fullname, an Error will be returned.
///
/// # Arguments
/// * `schema` - the JSON string of the schema to parse
/// * `schemata` - a slice of additional schemas that is used to resolve cross-references
pub fn parse_str_with_list(schema: &str, schemata: &[&str]) -> AvroResult<Schema> {
let mut input_schemas: HashMap<Name, Value> = HashMap::with_capacity(schemata.len());
let mut input_order: Vec<Name> = Vec::with_capacity(schemata.len());
for json in schemata {
let schema: Value = serde_json::from_str(json).map_err(Error::ParseSchemaJson)?;
if let Value::Object(inner) = &schema {
let name = Name::parse(inner, &None)?;
if let Some(_previous) = input_schemas.insert(name.clone(), schema) {
return Err(Error::NameCollision(name.fullname(None)));
}
input_order.push(name);
} else {
return Err(Error::GetNameField);
}
}
let mut parser = Parser {
input_schemas,
resolving_schemas: HashMap::default(),
input_order,
parsed_schemas: HashMap::with_capacity(schemata.len()),
};
parser.parse_input_schemas()?;

let value = serde_json::from_str(schema).map_err(Error::ParseSchemaJson)?;
parser.parse(&value, &None)
}

/// Create a `Schema` from a reader which implements [`Read`].
pub fn parse_reader(reader: &mut (impl Read + ?Sized)) -> AvroResult<Schema> {
let mut buf = String::new();
Expand Down Expand Up @@ -1214,6 +1253,21 @@ impl Parser {
/// Create an array of `Schema`'s from an iterator of JSON Avro schemas. It is allowed that
/// the schemas have cross-dependencies; these will be resolved during parsing.
fn parse_list(&mut self) -> Result<Vec<Schema>, Error> {
self.parse_input_schemas()?;

let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
for name in self.input_order.drain(0..) {
let parsed = self
.parsed_schemas
.remove(&name)
.expect("One of the input schemas was unexpectedly not parsed");
parsed_schemas.push(parsed);
}
Ok(parsed_schemas)
}

/// Convert the input schemas to parsed_schemas
fn parse_input_schemas(&mut self) -> Result<(), Error> {
while !self.input_schemas.is_empty() {
let next_name = self
.input_schemas
Expand All @@ -1229,16 +1283,7 @@ impl Parser {
self.parsed_schemas
.insert(get_schema_type_name(name, value), parsed);
}

let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len());
for name in self.input_order.drain(0..) {
let parsed = self
.parsed_schemas
.remove(&name)
.expect("One of the input schemas was unexpectedly not parsed");
parsed_schemas.push(parsed);
}
Ok(parsed_schemas)
Ok(())
}

/// Create a `Schema` from a `serde_json::Value` representing a JSON Avro
Expand Down Expand Up @@ -2695,6 +2740,102 @@ mod tests {
Ok(())
}

#[test]
fn avro_rs_104_test_root_union_of_records() -> TestResult {
// A and B are the same except the name.
let schema_str_a = r#"{
"name": "A",
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

let schema_str_b = r#"{
"name": "B",
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

let schema_str_c = r#"["A", "B"]"#;

let schema_c = Schema::parse_str_with_list(schema_str_c, &[schema_str_a, schema_str_b])?;

let schema_c_expected = Schema::Union(UnionSchema::new(vec![
Schema::Ref {
name: Name::new("A")?,
},
Schema::Ref {
name: Name::new("B")?,
},
])?);

assert_eq!(schema_c, schema_c_expected);
Ok(())
}

#[test]
fn avro_rs_104_test_root_union_of_records_name_collision() -> TestResult {
// A and B are exactly the same.
let schema_str_a1 = r#"{
"name": "A",
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

let schema_str_a2 = r#"{
"name": "A",
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

let schema_str_c = r#"["A", "A"]"#;

match Schema::parse_str_with_list(schema_str_c, &[schema_str_a1, schema_str_a2]) {
Ok(_) => unreachable!("Expected an error that the name is already defined"),
Err(e) => assert_eq!(
e.to_string(),
"Two schemas with the same fullname were given: \"A\""
),
}

Ok(())
}

#[test]
fn avro_rs_104_test_root_union_of_records_no_name() -> TestResult {
let schema_str_a = r#"{
"name": "A",
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

// B has no name field.
let schema_str_b = r#"{
"type": "record",
"fields": [
{"name": "field_one", "type": "float"}
]
}"#;

let schema_str_c = r#"["A", "A"]"#;

match Schema::parse_str_with_list(schema_str_c, &[schema_str_a, schema_str_b]) {
Ok(_) => unreachable!("Expected an error that schema_str_b is missing a name field"),
Err(e) => assert_eq!(e.to_string(), "No `name` field"),
}

Ok(())
}

#[test]
fn avro_3584_test_recursion_records() -> TestResult {
// A and B are the same except the name.
Expand Down

0 comments on commit f7208b0

Please sign in to comment.