Skip to content

Commit

Permalink
Add the UniformNoise codec
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Aug 6, 2024
1 parent cdd38a4 commit 26de1e9
Show file tree
Hide file tree
Showing 5 changed files with 264 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ members = [

"codecs/bit-round",
"codecs/identity",
"codecs/uniform-noise",
]

[workspace.package]
Expand All @@ -23,17 +24,20 @@ numcodecs-python = { version = "0.2", path = "crates/numcodecs-python", default-
# workspace-internal codecs crates
numcodecs-bit-round = { version = "0.1", path = "codecs/bit-round", default-features = false }
numcodecs-identity = { version = "0.1", path = "codecs/identity", default-features = false }
numcodecs-uniform-noise = { version = "0.1", path = "codecs/uniform-noise", default-features = false }

# crates.io third-party dependencies
convert_case = { version = "0.6", default-features = false }
numpy = { version = "0.21", default-features = false }
ndarray = { version = "0.15", default-features = false } # keep in sync with numpy
pyo3 = { version = "0.21", default-features = false }
pythonize = { version = "0.21", default-features = false }
rand = { version = "0.8", default-features = false }
serde = { version = "1.0", default-features = false }
serde-transcode = { version = "1.1", default-features = false }
serde_json = { version = "1.0", default-features = false }
thiserror = { version = "1.0", default-features = false }
wyhash = { version = "0.5", default-features = false }

[workspace.lints.rust]
unsafe_code = "deny"
Expand Down
26 changes: 26 additions & 0 deletions codecs/uniform-noise/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "numcodecs-uniform-noise"
version = "0.1.0"
edition = { workspace = true }
authors = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
rust-version = { workspace = true }

description = "Uniform noise codec implementation for the numcodecs API"
readme = "README.md"
categories = ["compression", "encoding"]
keywords = ["noise", "numcodecs", "compression", "encoding"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ndarray = { workspace = true }
numcodecs = { workspace = true }
rand = { workspace = true }
wyhash = { workspace = true }
serde = { workspace = true, features = ["std", "derive"] }
thiserror = { workspace = true }

[lints]
workspace = true
1 change: 1 addition & 0 deletions codecs/uniform-noise/LICENSE
32 changes: 32 additions & 0 deletions codecs/uniform-noise/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]

[CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
[workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain

[MSRV]: https://img.shields.io/badge/MSRV-1.64.0-blue
[repo]: https://github.com/juntyr/numcodecs-rs

[Latest Version]: https://img.shields.io/crates/v/numcodecs-uniform-noise
[crates.io]: https://crates.io/crates/numcodecs-uniform-noise

[Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-uniform-noise
[docs.rs]: https://docs.rs/numcodecs-uniform-noise/

[Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
[docs]: https://juntyr.github.io/numcodecs-rs/numcodecs-uniform-noise

# numcodecs-uniform-noise

Uniform noise codec implementation for the [`numcodecs`] API.

[`numcodecs`]: https://docs.rs/numcodecs/0.1/numcodecs/

## License

Licensed under the Mozilla Public License, Version 2.0 ([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/).

## Funding

The `numcodecs-uniform-noise` crate has been developed as part of [ESiWACE3](https://www.esiwace.eu), the third phase of the Centre of Excellence in Simulation of Weather and Climate in Europe.

Funded by the European Union. This work has received funding from the European High Performance Computing Joint Undertaking (JU) under grant agreement No 101093054.
201 changes: 201 additions & 0 deletions codecs/uniform-noise/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
//! [![CI Status]][workflow] [![MSRV]][repo] [![Latest Version]][crates.io] [![Rust Doc Crate]][docs.rs] [![Rust Doc Main]][docs]
//!
//! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/numcodecs-rs/ci.yml?branch=main
//! [workflow]: https://github.com/juntyr/numcodecs-rs/actions/workflows/ci.yml?query=branch%3Amain
//!
//! [MSRV]: https://img.shields.io/badge/MSRV-1.64.0-blue
//! [repo]: https://github.com/juntyr/numcodecs-rs
//!
//! [Latest Version]: https://img.shields.io/crates/v/numcodecs-uniform-noise
//! [crates.io]: https://crates.io/crates/numcodecs-uniform-noise
//!
//! [Rust Doc Crate]: https://img.shields.io/docsrs/numcodecs-uniform-noise
//! [docs.rs]: https://docs.rs/numcodecs-uniform-noise/
//!
//! [Rust Doc Main]: https://img.shields.io/badge/docs-main-blue
//! [docs]: https://juntyr.github.io/numcodecs-rs/numcodecs-uniform-noise
//!
//! Bit rounding codec implementation for the [`numcodecs`] API.
use std::hash::{Hash, Hasher};

use ndarray::{Array, CowArray, Dimension};
use numcodecs::{
AnyArray, AnyArrayDType, AnyArrayView, AnyArrayViewMut, AnyCowArray, Codec, StaticCodec,
};
use rand::{
distributions::{Distribution, Open01},
SeedableRng,
};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use thiserror::Error;
use wyhash::{WyHash, WyRng};

#[derive(Clone, Serialize, Deserialize)]
/// Codec that adds `seed`ed uniform noise of the given `scale` and with
/// [`add_uniform_noise`].
pub struct UniformNoiseCodec {
/// Scale of the uniform noise, which is sampled from
/// `U(-scale/2, +scale/2)`
pub scale: f64,
/// Seed for the random noise generator
pub seed: u64,
}

impl Codec for UniformNoiseCodec {
type Error = UniformNoiseError;

fn encode(&self, data: AnyCowArray) -> Result<AnyArray, Self::Error> {
match data {
#[allow(clippy::cast_possible_truncation)]
AnyCowArray::F32(data) => Ok(AnyArray::F32(add_uniform_noise(
data,
self.scale as f32,
self.seed,
))),
AnyCowArray::F64(data) => Ok(AnyArray::F64(add_uniform_noise(
data, self.scale, self.seed,
))),
encoded => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
}
}

fn decode(&self, encoded: AnyCowArray) -> Result<AnyArray, Self::Error> {
match encoded {
AnyCowArray::F32(encoded) => Ok(AnyArray::F32(encoded.into_owned())),
AnyCowArray::F64(encoded) => Ok(AnyArray::F64(encoded.into_owned())),
encoded => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
}
}

fn decode_into(
&self,
encoded: AnyArrayView,
mut decoded: AnyArrayViewMut,
) -> Result<(), Self::Error> {
#[allow(clippy::unit_arg)]
match (&encoded, &mut decoded) {
(AnyArrayView::F32(encoded), AnyArrayViewMut::F32(decoded)) => {
Ok(decoded.assign(encoded))
}
(AnyArrayView::F64(encoded), AnyArrayViewMut::F64(decoded)) => {
Ok(decoded.assign(encoded))
}
(AnyArrayView::F32(_), decoded) => Err(UniformNoiseError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F32,
provided: decoded.dtype(),
}),
(AnyArrayView::F64(_), decoded) => Err(UniformNoiseError::MismatchedDecodeIntoDtype {
decoded: AnyArrayDType::F64,
provided: decoded.dtype(),
}),
(encoded, _decoded) => Err(UniformNoiseError::UnsupportedDtype(encoded.dtype())),
}
}

fn get_config<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.serialize(serializer)
}
}

impl StaticCodec for UniformNoiseCodec {
const CODEC_ID: &'static str = "uniform-noise";

fn from_config<'de, D: Deserializer<'de>>(config: D) -> Result<Self, D::Error> {
Self::deserialize(config)
}
}

#[derive(Debug, Error)]
/// Errors that may occur when applying the [`UniformNoiseCodec`].
pub enum UniformNoiseError {
/// [`UniformNoiseCodec`] does not support the dtype
#[error("UniformNoise does not support the dtype {0}")]
UnsupportedDtype(AnyArrayDType),
/// [`UniformNoiseCodec`] cannot decode the `decoded` dtype into the `provided`
/// array
#[error("UniformNoise cannot decode the dtype {decoded} into the provided {provided} array")]
MismatchedDecodeIntoDtype {
/// Dtype of the `decoded` data
decoded: AnyArrayDType,
/// Dtype of the `provided` array into which the data is to be decoded
provided: AnyArrayDType,
},
}

/// Uniform noise codec which adds `U(-scale/2, scale/2)` uniform random noise
/// to the input on encoding and passes through the input unchanged during
/// decoding.
///
/// This codec first hashes the input and its shape to then seed a pseudo-random
/// number generator that generates the uniform noise. Therefore, encoding the
/// same data with the same seed will produce the same noise and thus the same
/// encoded data.
#[must_use]
pub fn add_uniform_noise<T: Float, D: Dimension>(
data: CowArray<T, D>,
scale: T,
seed: u64,
) -> Array<T, D>
where
Open01: Distribution<T>,
{
let mut hasher = WyHash::with_seed(seed);
// hashing the shape provides a prefix for the flattened data
data.shape().hash(&mut hasher);
// the data must be visited in a defined order
data.iter().copied().for_each(|x| x.hash_bits(&mut hasher));
let seed = hasher.finish();

let mut rng: WyRng = WyRng::seed_from_u64(seed);

let mut encoded = data.into_owned();

// the data must be visited in a defined order
for x in &mut encoded {
// x = U(0,1)*scale + (scale*-0.5 + x)
// x += U(-scale/2, +scale/2)
*x = Open01
.sample(&mut rng)
.mul_add(scale, scale.mul_add(T::NEG_HALF, *x));
}

encoded
}

/// Floating point types
pub trait Float: Copy {
/// -0.5
const NEG_HALF: Self;

#[must_use]
/// Compute (self * a) + b
fn mul_add(self, a: Self, b: Self) -> Self;

/// Hash the binary representation of the floating point value
fn hash_bits<H: Hasher>(self, hasher: &mut H);
}

impl Float for f32 {
const NEG_HALF: Self = -0.5;

fn mul_add(self, a: Self, b: Self) -> Self {
Self::mul_add(self, a, b)
}

fn hash_bits<H: Hasher>(self, hasher: &mut H) {
hasher.write_u32(self.to_bits());
}
}

impl Float for f64 {
const NEG_HALF: Self = -0.5;

fn mul_add(self, a: Self, b: Self) -> Self {
Self::mul_add(self, a, b)
}

fn hash_bits<H: Hasher>(self, hasher: &mut H) {
hasher.write_u64(self.to_bits());
}
}

0 comments on commit 26de1e9

Please sign in to comment.