Skip to content

Commit

Permalink
Updating the version with new pipelines and error handing (#32)
Browse files Browse the repository at this point in the history
* adding glue module to enable error handling (#27)

* added batching runner (#28)

* Kings college london integration (#30)

* adding build using binary downloads (#8)

* adding build using binary downloads

* sorting out the build.rs

* updating build.rs for surrealml package

* prepping version for release

* now has target tracking (#10)

* adding check in build.rs for docs.rs

* removing build.rs for main surrealml to ensure that libraries using the core do not need to do anything in their build.rs

* adding machine learning pipelines for bioengineering projects at Kings College London

* Remove integrated_training_runner/run_env/ from tracking

* adding machine learning pipelines for bioengineering projects at Kings College London

* Update FFmpeg data access module and README (#29)

* adding run_env to the gitignore

---------

Co-authored-by: Yang Li <[email protected]>

* bumping the version

---------

Co-authored-by: Sam Hillman <[email protected]>
Co-authored-by: Yang Li <[email protected]>
  • Loading branch information
3 people authored Mar 14, 2024
1 parent be18536 commit 09536a8
Show file tree
Hide file tree
Showing 41 changed files with 1,116 additions and 185 deletions.
8 changes: 6 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,9 @@ modules/core/onnx_driver/
*.so
surrealml/rust_surrealml.cpython-310-darwin.so
.surmlcache
modules/core/model_stash/
modules/pipelines/runners/integrated_training_runner/run_env/
./modules/core/model_stash/
./modules/pipelines/runners/integrated_training_runner/run_env/
./modules/pipelines/runners/batch_training_runner/run_env/
./modules/pipelines/data_access/target/
./modules/pipelines/runners/integrated_training_runner/run_env/
modules/pipelines/runners/integrated_training_runner/run_env/
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
workspace = { members = ["modules/glue"] }
[package]
name = "surrealml"
version = "0.1.0"
Expand Down
3 changes: 2 additions & 1 deletion modules/core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "surrealml-core"
version = "0.0.8"
version = "0.0.9"
edition = "2021"
build = "./build.rs"
description = "The core machine learning library for SurrealML that enables SurrealDB to store and load ML models"
Expand All @@ -16,6 +16,7 @@ once_cell = "1.18.0"
bytes = "1.5.0"
futures-util = "0.3.28"
futures-core = "0.3.28"
glue = { path = "../glue" }

[dev-dependencies]
tokio = { version = "1.12.0", features = ["full"] }
Expand Down
Binary file added modules/core/model_stash/sklearn/onnx/linear.onnx
Binary file not shown.
Binary file added modules/core/model_stash/sklearn/surml/linear.surml
Binary file not shown.
Binary file added modules/core/model_stash/torch/surml/linear.surml
Binary file not shown.
54 changes: 29 additions & 25 deletions modules/core/src/execution/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ use std::collections::HashMap;
use ndarray::{ArrayD, CowArray};
use ort::{SessionBuilder, Value, session::Input};
use super::onnx_environment::ENVIRONMENT;
use glue::{
safe_eject,
errors::error::{SurrealError, SurrealErrorStatus}
};


/// A wrapper for the loaded machine learning model so we can perform computations on the loaded model.
Expand All @@ -24,9 +28,9 @@ impl <'a>ModelComputation<'a> {
///
/// # Returns
/// A Tensor that can be used as input to the loaded model.
pub fn input_tensor_from_key_bindings(&self, input_values: HashMap<String, f32>) -> ArrayD<f32> {
let buffer = self.input_vector_from_key_bindings(input_values);
ndarray::arr1::<f32>(&buffer).into_dyn()
pub fn input_tensor_from_key_bindings(&self, input_values: HashMap<String, f32>) -> Result<ArrayD<f32>, SurrealError> {
let buffer = self.input_vector_from_key_bindings(input_values)?;
Ok(ndarray::arr1::<f32>(&buffer).into_dyn())
}

/// Creates a vector of dimensions for the input tensor from the loaded model.
Expand Down Expand Up @@ -54,16 +58,18 @@ impl <'a>ModelComputation<'a> {
///
/// # Returns
/// A Vector that can be used manipulated with other operations such as normalisation.
pub fn input_vector_from_key_bindings(&self, mut input_values: HashMap<String, f32>) -> Vec<f32> {
pub fn input_vector_from_key_bindings(&self, mut input_values: HashMap<String, f32>) -> Result<Vec<f32>, SurrealError> {
let mut buffer = Vec::with_capacity(self.surml_file.header.keys.store.len());

for key in &self.surml_file.header.keys.store {
let value = match input_values.get_mut(key) {
Some(value) => value,
None => panic!("Key {} not found in input values", key)
None => return Err(SurrealError::new(format!("src/execution/compute.rs 67: Key {} not found in input values", key), SurrealErrorStatus::NotFound))
};
buffer.push(std::mem::take(value));
}
buffer

Ok(buffer)
}

/// Performs a raw computation on the loaded model.
Expand All @@ -73,15 +79,15 @@ impl <'a>ModelComputation<'a> {
///
/// # Returns
/// The computed output tensor from the loaded model.
pub fn raw_compute(&self, tensor: ArrayD<f32>, _dims: Option<(i32, i32)>) -> Result<Vec<f32>, String> {
let session = SessionBuilder::new(&ENVIRONMENT).map_err(|e| e.to_string())?
.with_model_from_memory(&self.surml_file.model)
.map_err(|e| e.to_string())?;
pub fn raw_compute(&self, tensor: ArrayD<f32>, _dims: Option<(i32, i32)>) -> Result<Vec<f32>, SurrealError> {
let session = safe_eject!(SessionBuilder::new(&ENVIRONMENT), SurrealErrorStatus::Unknown);
let session = safe_eject!(session.with_model_from_memory(&self.surml_file.model), SurrealErrorStatus::Unknown);
let unwrapped_dims = ModelComputation::process_input_dims(&session.inputs[0]);
let tensor = tensor.into_shape(unwrapped_dims).map_err(|e| e.to_string())?;
let tensor = safe_eject!(tensor.into_shape(unwrapped_dims), SurrealErrorStatus::Unknown);

let x = CowArray::from(tensor).into_dyn();
let outputs = session.run(vec![Value::from_array(session.allocator(), &x).unwrap()]).map_err(|e| e.to_string())?;
let input_values = safe_eject!(Value::from_array(session.allocator(), &x), SurrealErrorStatus::Unknown);
let outputs = safe_eject!(session.run(vec![input_values]), SurrealErrorStatus::Unknown);

let mut buffer: Vec<f32> = Vec::new();

Expand All @@ -93,13 +99,8 @@ impl <'a>ModelComputation<'a> {
}
},
Err(_) => {
match outputs[0].try_extract::<i64>() {
Ok(y) => {
for i in y.view().clone().into_iter() {
buffer.push(*i as f32);
}
},
Err(e) => return Err(e.to_string())
for i in safe_eject!(outputs[0].try_extract::<i64>(), SurrealErrorStatus::Unknown).view().clone().into_iter() {
buffer.push(*i as f32);
}
}
};
Expand All @@ -117,18 +118,18 @@ impl <'a>ModelComputation<'a> {
///
/// # Returns
/// The computed output tensor from the loaded model.
pub fn buffered_compute(&self, input_values: &mut HashMap<String, f32>) -> Result<Vec<f32>, String> {
pub fn buffered_compute(&self, input_values: &mut HashMap<String, f32>) -> Result<Vec<f32>, SurrealError> {
// applying normalisers if present
for (key, value) in &mut *input_values {
let value_ref = value.clone();
match self.surml_file.header.get_normaliser(&key.to_string()) {
match self.surml_file.header.get_normaliser(&key.to_string())? {
Some(normaliser) => {
*value = normaliser.normalise(value_ref);
},
None => {}
}
}
let tensor = self.input_tensor_from_key_bindings(input_values.clone());
let tensor = self.input_tensor_from_key_bindings(input_values.clone())?;
let output = self.raw_compute(tensor, None)?;

// if no normaliser is present, return the output
Expand All @@ -139,7 +140,10 @@ impl <'a>ModelComputation<'a> {
// apply the normaliser to the output
let output_normaliser = match self.surml_file.header.output.normaliser.as_ref() {
Some(normaliser) => normaliser,
None => return Err(String::from("No normaliser present for output which shouldn't happen as passed initial check for"))
None => return Err(SurrealError::new(
String::from("No normaliser present for output which shouldn't happen as passed initial check for").to_string(),
SurrealErrorStatus::Unknown
))
};
let mut buffer = Vec::with_capacity(output.len());

Expand Down Expand Up @@ -168,7 +172,7 @@ mod tests {
input_values.insert(String::from("squarefoot"), 1000.0);
input_values.insert(String::from("num_floors"), 2.0);

let raw_input = model_computation.input_tensor_from_key_bindings(input_values);
let raw_input = model_computation.input_tensor_from_key_bindings(input_values).unwrap();

let output = model_computation.raw_compute(raw_input, Some((1, 2))).unwrap();
assert_eq!(output.len(), 1);
Expand Down Expand Up @@ -202,7 +206,7 @@ mod tests {
input_values.insert(String::from("squarefoot"), 1000.0);
input_values.insert(String::from("num_floors"), 2.0);

let raw_input = model_computation.input_tensor_from_key_bindings(input_values);
let raw_input = model_computation.input_tensor_from_key_bindings(input_values).unwrap();

let output = model_computation.raw_compute(raw_input, None).unwrap();
assert_eq!(output.len(), 1);
Expand Down
26 changes: 13 additions & 13 deletions modules/core/src/storage/header/keys.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
//! Defines the key bindings for input data.
use std::collections::HashMap;
use glue::{
errors::error::{SurrealError, SurrealErrorStatus},
safe_eject_internal
};


/// Defines the key bindings for input data.
Expand Down Expand Up @@ -46,9 +50,9 @@ impl KeyBindings {
///
/// # Returns
/// The key bindings constructed from the string.
pub fn from_string(data: String) -> Result<Self, String> {
pub fn from_string(data: String) -> Self {
if data.len() == 0 {
return Ok(KeyBindings::fresh())
return KeyBindings::fresh()
}
let mut store = Vec::new();
let mut reference = HashMap::new();
Expand All @@ -61,11 +65,10 @@ impl KeyBindings {
reference.insert(line.to_string(), count);
count += 1;
}

Ok(KeyBindings {
KeyBindings {
store,
reference,
})
}
}

/// converts the key bindings to a string.
Expand All @@ -83,12 +86,9 @@ impl KeyBindings {
///
/// # Returns
/// The key bindings constructed from the bytes.
pub fn from_bytes(data: &[u8]) -> Result<Self, String> {
let data = match String::from_utf8(data.to_vec()) {
Ok(data) => data,
Err(_) => return Err("Error converting bytes to string for key bindings".to_string())
};
Self::from_string(data)
pub fn from_bytes(data: &[u8]) -> Result<Self, SurrealError> {
let data = safe_eject_internal!(String::from_utf8(data.to_vec()));
Ok(Self::from_string(data))
}

/// Converts the key bindings to bytes.
Expand Down Expand Up @@ -135,15 +135,15 @@ pub mod tests {
#[test]
fn test_from_string_with_empty_string() {
let data = "".to_string();
let bindings = KeyBindings::from_string(data).unwrap();
let bindings = KeyBindings::from_string(data);
assert_eq!(bindings.store.len(), 0);
assert_eq!(bindings.reference.len(), 0);
}

#[test]
fn test_from_string() {
let data = generate_string();
let bindings = KeyBindings::from_string(data).unwrap();
let bindings = KeyBindings::from_string(data);
assert_eq!(bindings.store[0], "a");
assert_eq!(bindings.store[1], "b");
assert_eq!(bindings.store[2], "c");
Expand Down
58 changes: 31 additions & 27 deletions modules/core/src/storage/header/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ use version::Version;
use engine::Engine;
use origin::Origin;
use input_dims::InputDims;
use glue::{
safe_eject,
errors::error::{
SurrealError,
SurrealErrorStatus
}
};


/// The header of the model file.
Expand Down Expand Up @@ -76,8 +83,9 @@ impl Header {
///
/// # Arguments
/// * `version` - The version to be added.
pub fn add_version(&mut self, version: String) {
self.version = Version::from_string(version);
pub fn add_version(&mut self, version: String) -> Result<(), SurrealError> {
self.version = Version::from_string(version)?;
Ok(())
}

/// Adds a description to the `self.description` field.
Expand All @@ -102,8 +110,9 @@ impl Header {
/// # Arguments
/// * `column_name` - The name of the column to which the normaliser will be applied.
/// * `normaliser` - The normaliser to be applied to the column.
pub fn add_normaliser(&mut self, column_name: String, normaliser: NormaliserType) {
self.normalisers.add_normaliser(normaliser, column_name, &self.keys);
pub fn add_normaliser(&mut self, column_name: String, normaliser: NormaliserType) -> Result<(), SurrealError> {
let _ = self.normalisers.add_normaliser(normaliser, column_name, &self.keys)?;
Ok(())
}

/// Gets the normaliser for a given column name.
Expand All @@ -113,7 +122,7 @@ impl Header {
///
/// # Returns
/// The normaliser for the given column name.
pub fn get_normaliser(&self, column_name: &String) -> Option<&NormaliserType> {
pub fn get_normaliser(&self, column_name: &String) -> Result<Option<&NormaliserType>, SurrealError> {
self.normalisers.get_normaliser(column_name.to_string(), &self.keys)
}

Expand Down Expand Up @@ -147,8 +156,8 @@ impl Header {
///
/// # Arguments
/// * `origin` - The origin to be added.
pub fn add_origin(&mut self, origin: String) {
self.origin.add_origin(origin);
pub fn add_origin(&mut self, origin: String) -> Result<(), SurrealError> {
self.origin.add_origin(origin)
}

/// The standard delimiter used to seperate each field in the header.
Expand All @@ -163,25 +172,20 @@ impl Header {
///
/// # Returns
/// The `Header` struct.
pub fn from_bytes(data: Vec<u8>) -> Result<Self, std::io::Error> {
let string_data = match String::from_utf8(data) {
Ok(data) => data,
Err(e) => {
let message = format!("Error converting bytes to string for header: {}", e);
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, message))
}
};
pub fn from_bytes(data: Vec<u8>) -> Result<Self, SurrealError> {

let string_data = safe_eject!(String::from_utf8(data), SurrealErrorStatus::BadRequest);

let buffer = string_data.split(Self::delimiter()).collect::<Vec<&str>>();
let keys = KeyBindings::from_string(buffer.get(1).unwrap_or(&"").to_string()).map_err(|e|
std::io::Error::new(std::io::ErrorKind::InvalidData, e)
)?;
let normalisers = NormaliserMap::from_string(buffer.get(2).unwrap_or(&"").to_string(), &keys);
let output = Output::from_string(buffer.get(3).unwrap_or(&"").to_string());

let keys: KeyBindings = KeyBindings::from_string(buffer.get(1).unwrap_or(&"").to_string());
let normalisers = NormaliserMap::from_string(buffer.get(2).unwrap_or(&"").to_string(), &keys)?;
let output = Output::from_string(buffer.get(3).unwrap_or(&"").to_string())?;
let name = StringValue::from_string(buffer.get(4).unwrap_or(&"").to_string());
let version = Version::from_string(buffer.get(5).unwrap_or(&"").to_string());
let version = Version::from_string(buffer.get(5).unwrap_or(&"").to_string())?;
let description = StringValue::from_string(buffer.get(6).unwrap_or(&"").to_string());
let engine = Engine::from_string(buffer.get(7).unwrap_or(&"").to_string());
let origin = Origin::from_string(buffer.get(8).unwrap_or(&"").to_string());
let origin = Origin::from_string(buffer.get(8).unwrap_or(&"").to_string())?;
let input_dims = InputDims::from_string(buffer.get(9).unwrap_or(&"").to_string());
Ok(Header {keys, normalisers, output, name, version, description, engine, origin, input_dims})
}
Expand Down Expand Up @@ -245,7 +249,7 @@ mod tests {
Header::delimiter(),
Engine::PyTorch.to_string(),
Header::delimiter(),
Origin::from_string("author=>local".to_string()).to_string(),
Origin::from_string("author=>local".to_string()).unwrap().to_string(),
Header::delimiter(),
InputDims::from_string("1,2".to_string()).to_string(),
Header::delimiter(),
Expand Down Expand Up @@ -340,19 +344,19 @@ mod tests {
header.add_column("e".to_string());
header.add_column("f".to_string());

header.add_normaliser(
let _ = header.add_normaliser(
"a".to_string(),
NormaliserType::LinearScaling(LinearScaling { min: 0.0, max: 1.0 })
);
header.add_normaliser(
let _ = header.add_normaliser(
"b".to_string(),
NormaliserType::Clipping(Clipping { min: Some(0.0), max: Some(1.5) })
);
header.add_normaliser(
let _ = header.add_normaliser(
"c".to_string(),
NormaliserType::LogScaling(LogScaling { base: 10.0, min: 0.0 })
);
header.add_normaliser(
let _ = header.add_normaliser(
"e".to_string(),
NormaliserType::ZScore(ZScore { mean: 0.0, std_dev: 1.0 })
);
Expand Down
Loading

0 comments on commit 09536a8

Please sign in to comment.