Skip to content

Commit

Permalink
Draft the numcodecs Rust API
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Jul 30, 2024
1 parent db116f7 commit 11d6f9d
Show file tree
Hide file tree
Showing 12 changed files with 844 additions and 354 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/rustdoc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ jobs:
run: |
RUSTDOCFLAGS="\
--enable-index-page \
--extern-html-root-url pyo3=https://docs.rs/anyhow/0.21/ \
--extern-html-root-url ndarray=https://docs.rs/ndarray/0.15/ \
--extern-html-root-url pyo3=https://docs.rs/pyo3/0.21/ \
--extern-html-root-url serde=https://docs.rs/serde/1.0/ \
-Zunstable-options\
" cargo doc \
--all-features \
Expand Down
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@ rust-version = "1.65"

[workspace.dependencies]
# workspace-internal crates
numcodecs = { path = "crates/numcodecs-python", default-features = false }
numcodecs = { path = "crates/numcodecs", default-features = false }
numcodecs-python = { path = "crates/numcodecs-python", default-features = false }

# crates.io third-party dependencies
numpy = { version = "0.21", default-features = false }
ndarray = { version = "0.15", default-features = false }
pyo3 = { version = "0.21", default-features = false }
pythonize = { version = "0.21", default-features = false }
serde = { version = "1.0", default-features = false }
serde-transcode = { version = "1.1", default-features = false }
serde_json = { version = "1.0", default-features = false }

[workspace.lints.rust]
unsafe_code = "deny"
Expand Down
9 changes: 7 additions & 2 deletions crates/numcodecs-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@ keywords = ["numcodecs", "compression", "encoding", "python", "pyo3"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
numcodecs = { workspace = true }
numpy = { workspace = true }
pyo3 = { workspace = true }
pythonize = { workspace = true }
serde = { workspace = true }
serde-transcode = { workspace = true }

[dev-dependencies]
numpy = { workspace = true }
pyo3 = { workspace = true, features = ["auto-initialize"] }
# pyo3 = { workspace = true, features = ["auto-initialize"] }
serde_json = { workspace = true, features = ["std"] }

[lints]
workspace = true
144 changes: 144 additions & 0 deletions crates/numcodecs-python/src/codec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
use pyo3::{
ffi::PyTypeObject,
intern,
prelude::*,
sync::GILOnceCell,
types::{DerefToPyAny, IntoPyDict, PyDict, PyType},
PyTypeInfo,
};

use crate::{sealed::Sealed, CodecClass};

/// Represents a [`numcodecs.abc.Codec`] *instance* object.
///
/// The [`Bound<Codec>`] type implements the [`CodecMethods`] API.
///
/// [`numcodecs.abc.Codec`]: https://numcodecs.readthedocs.io/en/stable/abc.html#module-numcodecs.abc
#[repr(transparent)]
pub struct Codec {
_codec: PyAny,
}

/// Methods implemented for [`Codec`]s.
pub trait CodecMethods<'py>: Sealed {
/// Encodes the data in the buffer `buf` and returns the result.
///
/// The input and output buffers be any objects supporting the
/// [new-style buffer protocol].
///
/// # Errors
///
/// Errors if encoding the buffer fails.
///
/// [new-style buffer protocol]: https://docs.python.org/3/c-api/buffer.html
fn encode(&self, buf: Borrowed<'_, 'py, PyAny>) -> Result<Bound<'py, PyAny>, PyErr>;

/// Decodes the data in the buffer `buf` and returns the result.
///
/// The input and output buffers be any objects supporting the
/// [new-style buffer protocol].
///
/// If the optional output buffer `out` is provided, the decoded data is
/// written into `out` and the `out` buffer is returned. Note that this
/// buffer must be exactly the right size to store the decoded data.
///
/// If the optional output buffer `out` is *not* provided, a new output
/// buffer is allocated.
///
/// # Errors
///
/// Errors if decoding the buffer fails.
///
/// [new-style buffer protocol]: https://docs.python.org/3/c-api/buffer.html
fn decode(
&self,
buf: Borrowed<'_, 'py, PyAny>,
out: Option<Borrowed<'_, 'py, PyAny>>,
) -> Result<Bound<'py, PyAny>, PyErr>;

/// Returns a dictionary holding configuration parameters for this codec.
///
/// The dict must include an `id` field with the
/// [`CodecClassMethods::codec_id`]. The dict must be compatible with JSON
/// encoding.
///
/// # Errors
///
/// Errors if getting the codec configuration fails.
fn get_config(&self) -> Result<Bound<'py, PyDict>, PyErr>;

/// Returns the [`CodecClass`] of this codec.
fn class(&self) -> Bound<'py, CodecClass>;
}

impl<'py> CodecMethods<'py> for Bound<'py, Codec> {
fn encode(&self, buf: Borrowed<'_, 'py, PyAny>) -> Result<Bound<'py, PyAny>, PyErr> {
let py = self.py();

self.as_any().call_method1(intern!(py, "encode"), (buf,))
}

fn decode(
&self,
buf: Borrowed<'_, 'py, PyAny>,
out: Option<Borrowed<'_, 'py, PyAny>>,
) -> Result<Bound<'py, PyAny>, PyErr> {
let py = self.as_any().py();

self.as_any().call_method(
intern!(py, "decode"),
(buf,),
Some(&[(intern!(py, "out"), out)].into_py_dict_bound(py)),
)
}

fn get_config(&self) -> Result<Bound<'py, PyDict>, PyErr> {
let py = self.as_any().py();

self.as_any()
.call_method0(intern!(py, "get_config"))?
.extract()
}

#[allow(clippy::expect_used)]
fn class(&self) -> Bound<'py, CodecClass> {
// extracting a codec guarantees that its class is a codec class
self.as_any()
.get_type()
.extract()
.expect("Codec's class must be a CodecClass")
}
}

impl<'py> Sealed for Bound<'py, Codec> {}

#[doc(hidden)]
impl DerefToPyAny for Codec {}

#[doc(hidden)]
#[allow(unsafe_code)]
unsafe impl PyNativeType for Codec {
type AsRefSource = Self;
}

#[doc(hidden)]
#[allow(unsafe_code)]
unsafe impl PyTypeInfo for Codec {
const MODULE: Option<&'static str> = Some("numcodecs.abc");
const NAME: &'static str = "Codec";

#[inline]
fn type_object_raw(py: Python) -> *mut PyTypeObject {
static CODEC_TYPE: GILOnceCell<Py<PyType>> = GILOnceCell::new();

let ty = CODEC_TYPE.get_or_try_init(py, || {
py.import_bound(intern!(py, "numcodecs.abc"))?
.getattr(intern!(py, "Codec"))?
.extract()
});
#[allow(clippy::expect_used)]
let ty = ty.expect("failed to access the `numpy.abc.Codec` type object");

ty.bind(py).as_type_ptr()
}
}
97 changes: 97 additions & 0 deletions crates/numcodecs-python/src/codec_class.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use pyo3::{
ffi::PyTypeObject,
intern,
prelude::*,
types::{DerefToPyAny, PyDict, PyType},
PyTypeInfo,
};

use crate::{sealed::Sealed, Codec};

/// Represents a [`numcodecs.abc.Codec`] *class* object.
///
/// The [`Bound<CodecClass>`] type implements the [`CodecClassMethods`] API.
///
/// [`numcodecs.abc.Codec`]: https://numcodecs.readthedocs.io/en/stable/abc.html#module-numcodecs.abc
#[repr(transparent)]
pub struct CodecClass {
_class: PyType,
}

/// Methods implemented for [`CodecClass`]es.
pub trait CodecClassMethods<'py>: Sealed {
/// Gets the codec identifier.
///
/// # Errors
///
/// Errors if the codec does not provide an identifier.
fn codec_id(&self) -> Result<String, PyErr>;

/// Instantiate a codec from a configuration dictionary.
///
/// # Errors
///
/// Errors if constructing the codec fails.
fn codec_from_config(
&self,
config: Borrowed<'_, 'py, PyDict>,
) -> Result<Bound<'py, Codec>, PyErr>;
}

impl<'py> CodecClassMethods<'py> for Bound<'py, CodecClass> {
fn codec_id(&self) -> Result<String, PyErr> {
let py = self.py();

let codec_id = self.as_any().getattr(intern!(py, "codec_id"))?.extract()?;

Ok(codec_id)
}

fn codec_from_config(
&self,
config: Borrowed<'_, 'py, PyDict>,
) -> Result<Bound<'py, Codec>, PyErr> {
let py = self.py();

self.as_any()
.call_method1(intern!(py, "from_config"), (config,))?
.extract()
}
}

impl<'py> Sealed for Bound<'py, CodecClass> {}

#[doc(hidden)]
impl DerefToPyAny for CodecClass {}

#[doc(hidden)]
#[allow(unsafe_code)]
unsafe impl PyNativeType for CodecClass {
type AsRefSource = Self;
}

#[doc(hidden)]
#[allow(unsafe_code)]
unsafe impl PyTypeInfo for CodecClass {
const MODULE: Option<&'static str> = Some("numcodecs.abc");
const NAME: &'static str = "Codec";

#[inline]
fn type_object_raw(py: Python) -> *mut PyTypeObject {
PyType::type_object_raw(py)
}

#[inline]
fn is_type_of_bound(object: &Bound<'_, PyAny>) -> bool {
let Ok(ty) = object.downcast::<PyType>() else {
return false;
};

ty.is_subclass_of::<Codec>().unwrap_or(false)
}

#[inline]
fn is_exact_type_of_bound(object: &Bound<'_, PyAny>) -> bool {
object.as_ptr() == Codec::type_object_raw(object.py()).cast()
}
}
Loading

0 comments on commit 11d6f9d

Please sign in to comment.