Skip to content

Commit

Permalink
Improve the numcodecs API and PyCodec wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Jul 31, 2024
1 parent cde6d05 commit e512d43
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 48 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ unsafe_code = "deny"
unstable_features = "forbid"
unused_crate_dependencies = "warn"

missing_docs = "warn"

[workspace.lints.clippy]
complexity = { priority = -1, level = "deny" }
correctness = { priority = -1, level = "deny" }
Expand Down
2 changes: 1 addition & 1 deletion crates/numcodecs-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ mod registry;

pub use codec::{Codec, CodecMethods};
pub use codec_class::{CodecClass, CodecClassMethods};
pub use pycodec::PyCodec;
pub use pycodec::{PyCodec, PyCodecClass};
pub use registry::Registry;

mod sealed {
Expand Down
164 changes: 140 additions & 24 deletions crates/numcodecs-python/src/pycodec.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{borrow::Cow, sync::Arc};
use std::sync::Arc;

use numcodecs::{AnyCowArray, Codec, DynCodec};
use numcodecs::{AnyCowArray, Codec, DynCodec, DynCodecType};
use numpy::{ndarray::ArrayD, PyArray};
use pyo3::{
buffer::PyBuffer,
Expand All @@ -15,31 +15,73 @@ use serde_transcode::transcode;

use crate::{CodecClassMethods, CodecMethods, Registry};

/// Wrapper around Python [`Codec`][`crate::Codec`]s to use the Rust [`Codec`]
/// API.
pub struct PyCodec {
codec: Py<crate::Codec>,
class: Py<crate::CodecClass>,
codec_id: Arc<String>,
}

impl Codec for PyCodec {
type Error = PyErr;

fn from_config<'de, D: Deserializer<'de>>(config: D) -> Result<Self, D::Error> {
impl PyCodec {
/// Instantiate a codec from the [`Registry`] with a serialized
/// `config`uration.
///
/// The config must include the `id` field with the
/// [`PyCodecClass::codec_id`].
///
/// # Errors
///
/// Errors if no codec with a matching `id` has been registered, or if
/// constructing the codec fails.
pub fn from_registry_with_config<'de, D: Deserializer<'de>>(
config: D,
) -> Result<Self, D::Error> {
Python::with_gil(|py| {
let config =
transcode(config, Pythonizer::new(py)).map_err(serde::de::Error::custom)?;
let config: Bound<PyDict> = config.extract(py).map_err(serde::de::Error::custom)?;
let config = transcode(config, Pythonizer::new(py))?;
let config: Bound<PyDict> = config.extract(py)?;

let codec =
Registry::get_codec(config.as_borrowed()).map_err(serde::de::Error::custom)?;
let codec_id = codec.class().codec_id().map_err(serde::de::Error::custom)?;
let codec = Registry::get_codec(config.as_borrowed())?;

Ok(Self {
codec: codec.unbind(),
codec_id: Arc::new(codec_id),
})
Self::from_codec(codec)
})
.map_err(serde::de::Error::custom)
}

/// Wraps a [`Codec`][`crate::Codec`] to use the Rust [`Codec`] API.
///
/// # Errors
///
/// Errors if the `codec`'s class does not provide an identifier.
pub fn from_codec(codec: Bound<crate::Codec>) -> Result<Self, PyErr> {
let class = codec.class();
let codec_id = class.codec_id()?;

Ok(Self {
codec: codec.unbind(),
class: class.unbind(),
codec_id: Arc::new(codec_id),
})
}

/// Access the wrapped [`Codec`][`crate::Codec`] to use its Python
/// [`CodecMethods`] API.
#[must_use]
pub fn as_codec<'py>(&self, py: Python<'py>) -> &Bound<'py, crate::Codec> {
self.codec.bind(py)
}

/// Unwrap the [`Codec`][`crate::Codec`] to use its Python [`CodecMethods`]
/// API.
#[must_use]
pub fn into_codec(self, py: Python) -> Bound<crate::Codec> {
self.codec.into_bound(py)
}
}

impl Codec for PyCodec {
type Error = PyErr;

fn encode<'a>(&self, data: AnyCowArray<'a>) -> Result<AnyCowArray<'a>, Self::Error> {
Python::with_gil(|py| {
let this = self.codec.bind(py).clone().into_any();
Expand Down Expand Up @@ -249,12 +291,6 @@ impl Codec for PyCodec {
}
}

impl DynCodec for PyCodec {
fn codec_id(&self) -> Cow<str> {
Cow::Borrowed(&self.codec_id)
}
}

impl Clone for PyCodec {
#[allow(clippy::expect_used)] // clone is *not* fallible
fn clone(&self) -> Self {
Expand All @@ -269,16 +305,96 @@ impl Clone for PyCodec {
let _ = config.del_item(intern!(py, "id"));

let codec = self
.codec
.class
.bind(py)
.class()
.codec_from_config(config.as_borrowed())
.expect("re-creating codec from config should not fail");

Self {
codec: codec.unbind(),
class: self.class.clone_ref(py),
codec_id: self.codec_id.clone(),
}
})
}
}

impl DynCodec for PyCodec {
type Type = PyCodecClass;

fn ty(&self) -> Self::Type {
Python::with_gil(|py| PyCodecClass {
class: self.class.clone_ref(py),
codec_id: self.codec_id.clone(),
})
}
}

/// Wrapper around Python [`CodecClass`][`crate::CodecClass`]es to use the Rust
/// [`DynCodecType`] API.
pub struct PyCodecClass {
class: Py<crate::CodecClass>,
codec_id: Arc<String>,
}

impl PyCodecClass {
/// Wraps a [`CodecClass`][`crate::CodecClass`] to use the Rust
/// [`DynCodecType`] API.
///
/// # Errors
///
/// Errors if the codec `class` does not provide an identifier.
pub fn from_codec_class(class: Bound<crate::CodecClass>) -> Result<Self, PyErr> {
let codec_id = class.codec_id()?;

Ok(Self {
class: class.unbind(),
codec_id: Arc::new(codec_id),
})
}

/// Access the wrapped [`CodecClass`][`crate::CodecClass`] to use its Python
/// [`CodecClassMethods`] API.
#[must_use]
pub fn as_codec_class<'py>(&self, py: Python<'py>) -> &Bound<'py, crate::CodecClass> {
self.class.bind(py)
}

/// Unwrap the [`CodecClass`][`crate::CodecClass`] to use its Python
/// [`CodecClassMethods`] API.
#[must_use]
pub fn into_codec_class(self, py: Python) -> Bound<crate::CodecClass> {
self.class.into_bound(py)
}
}

impl DynCodecType for PyCodecClass {
type Codec = PyCodec;

fn codec_id(&self) -> &str {
&self.codec_id
}

fn codec_from_config<'de, D: Deserializer<'de>>(
&self,
config: D,
) -> Result<Self::Codec, D::Error> {
Python::with_gil(|py| {
let config =
transcode(config, Pythonizer::new(py)).map_err(serde::de::Error::custom)?;
let config: Bound<PyDict> = config.extract(py).map_err(serde::de::Error::custom)?;

let codec = self
.class
.bind(py)
.codec_from_config(config.as_borrowed())
.map_err(serde::de::Error::custom)?;

Ok(PyCodec {
codec: codec.unbind(),
class: self.class.clone_ref(py),
codec_id: self.codec_id.clone(),
})
})
}
}
12 changes: 9 additions & 3 deletions crates/numcodecs-python/tests/crc32.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use numcodecs::{AnyCowArray, Codec, DynCodec};
use numcodecs::{AnyCowArray, Codec, DynCodec, DynCodecType};
use numcodecs_python::{CodecClassMethods, CodecMethods, PyCodec, Registry};
use numpy::ndarray::ArrayView1;
use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyDict};
Expand Down Expand Up @@ -61,15 +61,21 @@ fn python_api() -> Result<(), PyErr> {
#[test]
fn rust_api() -> Result<(), PyErr> {
// create a codec using registry lookup
let codec = PyCodec::from_config(json!({
let codec = PyCodec::from_registry_with_config(json!({
"id": "crc32",
}))
.map_err(|err| PyRuntimeError::new_err(format!("{err}")))?;
assert_eq!(codec.codec_id(), "crc32");
assert_eq!(codec.ty().codec_id(), "crc32");

// clone the codec
let codec = codec.clone();

// create a codec using the type object
let codec = codec
.ty()
.codec_from_config(json!({}))
.map_err(|err| PyRuntimeError::new_err(format!("{err}")))?;

// check the codec's config
let config = codec
.get_config(serde_json::value::Serializer)
Expand Down
95 changes: 75 additions & 20 deletions crates/numcodecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,18 @@
//!
//! [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
use std::{borrow::Cow, error::Error};
use std::{error::Error, marker::PhantomData};

use ndarray::{CowArray, IxDyn};
use serde::{Deserializer, Serializer};

/// Compression codec that [`encode`][`Codec::encode`]s and
/// [`decode`][`Codec::decode`]s numeric n-dimensional arrays.
pub trait Codec: Clone {
pub trait Codec: 'static + Send + Sync + Clone {
/// Error type that may be returned during [`encode`][`Codec::encode`]ing
/// and [`decode`][`Codec::decode`]ing.
type Error: 'static + Send + Sync + Error;

/// Instantiate a codec from a serialized `config`uration.
///
/// The config must be compatible with JSON encoding.
///
/// # Errors
///
/// Errors if constructing the codec fails.
fn from_config<'de, D: Deserializer<'de>>(config: D) -> Result<Self, D::Error>;

/// Encodes the `data` and returns the result.
///
/// # Errors
Expand All @@ -56,7 +47,7 @@ pub trait Codec: Clone {

/// Serializes the configuration parameters for this codec.
///
/// The config must include an `id` field with the [`DynCodec::codec_id`].
/// The config must include an `id` field with the [`DynCodecType::codec_id`].
/// The config must be compatible with JSON encoding.
///
/// # Errors
Expand All @@ -68,6 +59,7 @@ pub trait Codec: Clone {
/// Numeric n-dimensional arrays with dynamic shapes.
#[non_exhaustive]
#[derive(Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum AnyCowArray<'a> {
U8(CowArray<'a, u8, IxDyn>),
U16(CowArray<'a, u16, IxDyn>),
Expand All @@ -81,24 +73,87 @@ pub enum AnyCowArray<'a> {
F64(CowArray<'a, f64, IxDyn>),
}

/// Compression codec whose [`CODEC_ID`](`StaticCodec::CODEC_ID`) is statically
/// known.
pub trait StaticCodec: 'static + Codec {
/// Statically typed compression codec.
pub trait StaticCodec: Codec {
/// Codec identifier.
const CODEC_ID: &'static str;

/// Instantiate a codec from a serialized `config`uration.
///
/// The config must be compatible with JSON encoding.
///
/// # Errors
///
/// Errors if constructing the codec fails.
fn from_config<'de, D: Deserializer<'de>>(config: D) -> Result<Self, D::Error>;
}

/// Compression codec whose [`codec_id`](`DynCodec::codec_id`) is dynamically
/// known.
/// Dynamically typed compression codec.
///
/// Every codec that implements [`StaticCodec`] also implements [`DynCodec`].
pub trait DynCodec: Codec {
/// Type object type for this codec.
type Type: DynCodecType;

/// Returns the type object for this codec.
fn ty(&self) -> Self::Type;
}

/// Type object for dynamically typed compression codecs.
pub trait DynCodecType: 'static + Send + Sync {
/// Type of the instances of this codec type object.
type Codec: DynCodec<Type = Self>;

/// Codec identifier.
fn codec_id(&self) -> Cow<str>;
fn codec_id(&self) -> &str;

/// Instantiate a codec of this type from a serialized `config`uration.
///
/// The config must be compatible with JSON encoding.
///
/// # Errors
///
/// Errors if constructing the codec fails.
fn codec_from_config<'de, D: Deserializer<'de>>(
&self,
config: D,
) -> Result<Self::Codec, D::Error>;
}

impl<T: StaticCodec> DynCodec for T {
fn codec_id(&self) -> Cow<str> {
Cow::Borrowed(T::CODEC_ID)
type Type = StaticCodecType<Self>;

fn ty(&self) -> Self::Type {
StaticCodecType::of()
}
}

/// Type object for statically typed compression codecs.
pub struct StaticCodecType<T: StaticCodec> {
_marker: PhantomData<T>,
}

impl<T: StaticCodec> StaticCodecType<T> {
/// Statically obtain the type for a statically typed codec.
#[must_use]
pub const fn of() -> Self {
Self {
_marker: PhantomData::<T>,
}
}
}

impl<T: StaticCodec> DynCodecType for StaticCodecType<T> {
type Codec = T;

fn codec_id(&self) -> &str {
T::CODEC_ID
}

fn codec_from_config<'de, D: Deserializer<'de>>(
&self,
config: D,
) -> Result<Self::Codec, D::Error> {
T::from_config(config)
}
}

0 comments on commit e512d43

Please sign in to comment.