Skip to content

Commit

Permalink
Backup of progress on adding the Zlib codec
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Aug 7, 2024
1 parent 33749c1 commit 7dcc47e
Show file tree
Hide file tree
Showing 10 changed files with 619 additions and 39 deletions.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"codecs/bit-round",
"codecs/identity",
"codecs/uniform-noise",
"codecs/zlib",
]

[workspace.package]
Expand All @@ -25,17 +26,21 @@ numcodecs-python = { version = "0.2", path = "crates/numcodecs-python", default-
numcodecs-bit-round = { version = "0.1", path = "codecs/bit-round", default-features = false }
numcodecs-identity = { version = "0.1", path = "codecs/identity", default-features = false }
numcodecs-uniform-noise = { version = "0.1", path = "codecs/uniform-noise", default-features = false }
numcodecs-zlib = { version = "0.1", path = "codecs/zlib", default-features = false }

# crates.io third-party dependencies
convert_case = { version = "0.6", default-features = false }
miniz_oxide = { version = "0.7", default-features = false }
numpy = { version = "0.21", default-features = false }
ndarray = { version = "0.15", default-features = false } # keep in sync with numpy
postcard = { version = "1.0", default-features = false }
pyo3 = { version = "0.21", default-features = false }
pythonize = { version = "0.21", default-features = false }
rand = { version = "0.8", default-features = false }
serde = { version = "1.0", default-features = false }
serde-transcode = { version = "1.1", default-features = false }
serde_json = { version = "1.0", default-features = false }
serde_repr = { version = "0.1", default-features = false }
thiserror = { version = "1.0", default-features = false }
wyhash = { version = "0.5", default-features = false }

Expand Down
43 changes: 30 additions & 13 deletions codecs/bit-round/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
//!
//! Bit rounding codec implementation for the [`numcodecs`] API.
use ndarray::{Array, CowArray, Dimension};
use ndarray::{Array, ArrayViewD, ArrayViewMutD, CowArray, Dimension};
use numcodecs::{
AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
};
Expand All @@ -37,21 +37,21 @@ pub struct BitRoundCodec {
}

impl Codec for BitRoundCodec {
type Error = BitRoundError;
type Error = BitRoundCodecError;

fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
match data {
AnyCowArray::F32(data) => Ok(AnyArray::F32(bit_round(data, self.keepbits)?)),
AnyCowArray::F64(data) => Ok(AnyArray::F64(bit_round(data, self.keepbits)?)),
encoded => Err(BitRoundError::UnsupportedDtype(encoded.dtype())),
encoded => Err(BitRoundCodecError::UnsupportedDtype(encoded.dtype())),
}
}

fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
match encoded {
AnyCowArray::F32(encoded) => Ok(AnyArray::F32(encoded.into_owned())),
AnyCowArray::F64(encoded) => Ok(AnyArray::F64(encoded.into_owned())),
encoded => Err(BitRoundError::UnsupportedDtype(encoded.dtype())),
encoded => Err(BitRoundCodecError::UnsupportedDtype(encoded.dtype())),
}
}

Expand All @@ -60,23 +60,31 @@ impl Codec for BitRoundCodec {
encoded: AnyArrayView,
mut decoded: AnyArrayViewMut,
) -> Result<(), Self::Error> {
#[allow(clippy::unit_arg)]
fn shape_checked_assign<T: Copy>(encoded: &ArrayViewD<T>, decoded: &mut ArrayViewMutD<T>) -> Result<(), BitRoundCodecError> {
#[allow(clippy::unit_arg)]
if encoded.shape() == decoded.shape() {
Ok(decoded.assign(encoded))
} else {
Err(BitRoundCodecError::MismatchedDecodeIntoShape { decoded: encoded.shape().to_vec(), provided: decoded.shape().to_vec() })
}
}

match (&encoded, &mut decoded) {
(AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F32(_), decoded) => Err(BitRoundError::MismatchedDecodeIntoDtype {
(AnyArrayView::F32(_), decoded) => Err(BitRoundCodecError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F32,
provided: decoded.dtype(),
}),
(AnyArrayView::F64(_), decoded) => Err(BitRoundError::MismatchedDecodeIntoDtype {
(AnyArrayView::F64(_), decoded) => Err(BitRoundCodecError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F64,
provided: decoded.dtype(),
}),
(encoded, _decoded) => Err(BitRoundError::UnsupportedDtype(encoded.dtype())),
(encoded, _decoded) => Err(BitRoundCodecError::UnsupportedDtype(encoded.dtype())),
}
}

Expand All @@ -95,7 +103,7 @@ impl StaticCodec for BitRoundCodec {

#[derive(Debug, Error)]
/// Errors that may occur when applying the [`BitRoundCodec`].
pub enum BitRoundError {
pub enum BitRoundCodecError {
/// [`BitRoundCodec`] does not support the dtype
#[error("BitRound does not support the dtype {0}")]
UnsupportedDtype(AnyArrayDType),
Expand All @@ -108,6 +116,15 @@ pub enum BitRoundError {
/// Dtype of the `provided` array into which the data is to be decoded
provided: AnyArrayDType,
},
/// [`BitRoundCodec`] cannot decode the decoded array into the provided
/// array of a different shape
#[error("BitRound cannot decode the decoded array of shape {decoded:?} into the provided array of shape {provided:?}")]
MismatchedDecodeIntoShape {
/// Shape of the `decoded` data
decoded: Vec<usize>,
/// Shape of the `provided` array into which the data is to be decoded
provided: Vec<usize>,
},
/// [`BitRoundCodec`] encode `keepbits` exceed the mantissa size for `dtype`
#[error("BitRound encode {keepbits} bits exceed the mantissa size for {dtype}")]
ExcessiveKeepBits {
Expand Down Expand Up @@ -138,9 +155,9 @@ pub enum BitRoundError {
pub fn bit_round<T: Float, D: Dimension>(
data: CowArray<T, D>,
keepbits: u8,
) -> Result<Array<T, D>, BitRoundError> {
) -> Result<Array<T, D>, BitRoundCodecError> {
if u32::from(keepbits) > T::MANITSSA_BITS {
return Err(BitRoundError::ExcessiveKeepBits {
return Err(BitRoundCodecError::ExcessiveKeepBits {
keepbits,
dtype: T::TY,
});
Expand Down
1 change: 1 addition & 0 deletions codecs/identity/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ keywords = ["identity", "numcodecs", "compression", "encoding"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ndarray = { workspace = true }
numcodecs = { workspace = true }
serde = { workspace = true, features = ["std", "derive"] }
thiserror = { workspace = true }
Expand Down
40 changes: 29 additions & 11 deletions codecs/identity/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
//!
//! Bit rounding codec implementation for the [`numcodecs`] API.
use ndarray::{ArrayViewD, ArrayViewMutD};
use numcodecs::{
AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
};
Expand Down Expand Up @@ -44,37 +45,45 @@ impl Codec for IdentityCodec {
encoded: AnyArrayView,
mut decoded: AnyArrayViewMut,
) -> Result<(), Self::Error> {
#[allow(clippy::unit_arg)]
fn shape_checked_assign<T: Copy>(encoded: &ArrayViewD<T>, decoded: &mut ArrayViewMutD<T>) -> Result<(), IdentityCodecError> {
#[allow(clippy::unit_arg)]
if encoded.shape() == decoded.shape() {
Ok(decoded.assign(encoded))
} else {
Err(IdentityCodecError::MismatchedDecodeIntoShape { decoded: encoded.shape().to_vec(), provided: decoded.shape().to_vec() })
}
}

match (&encoded, &mut decoded) {
(AnyArrayView::U8(encoded), AnyArrayViewMut::U8(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::U16(encoded), AnyArrayViewMut::U16(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::U32(encoded), AnyArrayViewMut::U32(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::U64(encoded), AnyArrayViewMut::U64(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::I8(encoded), AnyArrayViewMut::I8(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::I16(encoded), AnyArrayViewMut::I16(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::I32(encoded), AnyArrayViewMut::I32(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::I64(encoded), AnyArrayViewMut::I64(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(encoded, decoded) => Err(IdentityCodecError::MismatchedDecodeIntoDtype {
decoded: encoded.dtype(),
Expand Down Expand Up @@ -108,4 +117,13 @@ pub enum IdentityCodecError {
/// Dtype of the `provided` array into which the data is to be decoded
provided: AnyArrayDType,
},
/// [`IdentityCodec`] cannot decode the decoded array into the provided
/// array of a different shape
#[error("Identity cannot decode the decoded array of shape {decoded:?} into the provided array of shape {provided:?}")]
MismatchedDecodeIntoShape {
/// Shape of the `decoded` data
decoded: Vec<usize>,
/// Shape of the `provided` array into which the data is to be decoded
provided: Vec<usize>,
},
}
39 changes: 28 additions & 11 deletions codecs/uniform-noise/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use std::hash::{Hash, Hasher};

use ndarray::{Array, CowArray, Dimension};
use ndarray::{Array, ArrayViewD, ArrayViewMutD, CowArray, Dimension};
use numcodecs::{
AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
};
Expand All @@ -43,7 +43,7 @@ pub struct UniformNoiseCodec {
}

impl Codec for UniformNoiseCodec {
type Error = UniformNoiseError;
type Error = UniformNoiseCodecError;

fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
match data {
Expand All @@ -56,15 +56,15 @@ impl Codec for UniformNoiseCodec {
AnyCowArray::F64(data) => Ok(AnyArray::F64(add_uniform_noise(
data, self.scale, self.seed,
))),
encoded => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
encoded => Err(UniformNoiseCodecError::UnsupportedDtype(encoded.dtype())),
}
}

fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
match encoded {
AnyCowArray::F32(encoded) => Ok(AnyArray::F32(encoded.into_owned())),
AnyCowArray::F64(encoded) => Ok(AnyArray::F64(encoded.into_owned())),
encoded => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
encoded => Err(UniformNoiseCodecError::UnsupportedDtype(encoded.dtype())),
}
}

Expand All @@ -73,23 +73,31 @@ impl Codec for UniformNoiseCodec {
encoded: AnyArrayView,
mut decoded: AnyArrayViewMut,
) -> Result<(), Self::Error> {
#[allow(clippy::unit_arg)]
fn shape_checked_assign<T: Copy>(encoded: &ArrayViewD<T>, decoded: &mut ArrayViewMutD<T>) -> Result<(), UniformNoiseCodecError> {
#[allow(clippy::unit_arg)]
if encoded.shape() == decoded.shape() {
Ok(decoded.assign(encoded))
} else {
Err(UniformNoiseCodecError::MismatchedDecodeIntoShape { decoded: encoded.shape().to_vec(), provided: decoded.shape().to_vec() })
}
}

match (&encoded, &mut decoded) {
(AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
Ok(decoded.assign(encoded))
shape_checked_assign(encoded, decoded)
}
(AnyArrayView::F32(_), decoded) => Err(UniformNoiseError::MismatchedDecodeIntoDtype {
(AnyArrayView::F32(_), decoded) => Err(UniformNoiseCodecError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F32,
provided: decoded.dtype(),
}),
(AnyArrayView::F64(_), decoded) => Err(UniformNoiseError::MismatchedDecodeIntoDtype {
(AnyArrayView::F64(_), decoded) => Err(UniformNoiseCodecError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F64,
provided: decoded.dtype(),
}),
(encoded, _decoded) => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
(encoded, _decoded) => Err(UniformNoiseCodecError::UnsupportedDtype(encoded.dtype())),
}
}

Expand All @@ -108,7 +116,7 @@ impl StaticCodec for UniformNoiseCodec {

#[derive(Debug, Error)]
/// Errors that may occur when applying the [`UniformNoiseCodec`].
pub enum UniformNoiseError {
pub enum UniformNoiseCodecError {
/// [`UniformNoiseCodec`] does not support the dtype
#[error("UniformNoise does not support the dtype {0}")]
UnsupportedDtype(AnyArrayDType),
Expand All @@ -121,6 +129,15 @@ pub enum UniformNoiseError {
/// Dtype of the `provided` array into which the data is to be decoded
provided: AnyArrayDType,
},
/// [`UniformNoiseCodec`] cannot decode the decoded array into the provided
/// array of a different shape
#[error("UniformNoise cannot decode the decoded array of shape {decoded:?} into the provided array of shape {provided:?}")]
MismatchedDecodeIntoShape {
/// Shape of the `decoded` data
decoded: Vec<usize>,
/// Shape of the `provided` array into which the data is to be decoded
provided: Vec<usize>,
},
}

/// Uniform noise codec which adds `U(-scale/2, scale/2)` uniform random noise
Expand Down
27 changes: 27 additions & 0 deletions codecs/zlib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[package]
name = "numcodecs-zlib"
version = "0.1.0"
edition = { workspace = true }
authors = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
rust-version = { workspace = true }

description = "Zlib codec implementation for the numcodecs API"
readme = "README.md"
categories = ["compression", "encoding"]
keywords = ["zlib", "numcodecs", "compression", "encoding"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ndarray = { workspace = true }
numcodecs = { workspace = true }
miniz_oxide = { workspace = true, features = ["with-alloc"] }
postcard = { workspace = true }
serde = { workspace = true, features = ["std", "derive"] }
serde_repr = { workspace = true }
thiserror = { workspace = true }

[lints]
workspace = true
1 change: 1 addition & 0 deletions codecs/zlib/LICENSE
32 changes: 32 additions & 0 deletions codecs/zlib/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]

[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain

[MSRV]: https://img.shields.io/badge/MSRV-1.64.0-blue
[repo]: https://github.com/juntyr/numcodecs-rs

[Latest Version]: https://img.shields.io/crates/v/numcodecs-zlib
[crates.io]: https://crates.io/crates/numcodecs-zlib

[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-zlib
[docs.rs]: https://docs.rs/numcodecs-zlib/

[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs-zlib

# numcodecs-zlib

Zlib codec implementation for the [`numcodecs`] API.

[`numcodecs`]: https://docs.rs/numcodecs/0.1/numcodecs/

## License

Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/).

## Funding

The `numcodecs-zlib` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe.

Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054.
Loading

0 comments on commit 7dcc47e

Please sign in to comment.