From 5d50924305d41fc178c18016ebf9cf1228a08ef5 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 12 Jan 2024 16:38:25 +0100
Subject: [PATCH 01/51] Added distance_metrics preprocessor (RMSE)

---
 esmvalcore/preprocessor/_bias.py           | 255 ++++++++++++++++++++-
 tests/unit/preprocessor/_bias/test_bias.py | 226 +++++++++++++++++-
 2 files changed, 478 insertions(+), 3 deletions(-)

diff --git a/esmvalcore/preprocessor/_bias.py b/esmvalcore/preprocessor/_bias.py
index 816baa8635..5802b76a3c 100644
--- a/esmvalcore/preprocessor/_bias.py
+++ b/esmvalcore/preprocessor/_bias.py
@@ -1,10 +1,20 @@
 """Preprocessor functions to calculate biases from data."""
+from __future__ import annotations
+
 import logging
+from typing import TYPE_CHECKING, Iterable, Literal, Optional
 
 import dask.array as da
-import iris.cube
+import iris.analysis
+import numpy as np
+from iris.common.metadata import CubeMetadata
+from iris.coords import CellMethod, Coord
+from iris.cube import Cube, CubeList
+
+from esmvalcore.preprocessor._io import concatenate
 
-from ._io import concatenate
+if TYPE_CHECKING:
+    from esmvalcore.preprocessor import PreprocessorFile
 
 logger = logging.getLogger(__name__)
 
@@ -116,3 +126,244 @@ def bias(products, bias_type='absolute', denominator_mask_threshold=1e-3,
         output_products.add(reference_product)
 
     return output_products
+
+
+MetricType = Literal['rmse', 'pearsonr']
+
+
+def distance_metric(
+    products: set[PreprocessorFile] | Iterable[Cube],
+    metric: MetricType,
+    ref_cube: Optional[Cube] = None,
+    coords: Iterable[Coord] | Iterable[str] | None = None,
+    keep_reference_dataset: bool = True,
+) -> set[PreprocessorFile] | CubeList:
+    """Calculate distance metrics.
+
+    All input datasets need to have identical dimensional coordinates. This can
+    for example be ensured with the preprocessors
+    :func:`esmvalcore.preprocessor.regrid` and/or
+    :func:`esmvalcore.preprocessor.regrid_time`.
+
+    Notes
+    -----
+    This preprocessor requires a reference dataset, which can be specified with
+    the `ref_cube` argument. If `ref_cube` is ``None``, exactly one input
+    dataset in the `products` set needs to have the facet
+    ``reference_for_metric: true`` defined in the recipe. Please do **not**
+    specify the option `ref_cube` when using this preprocessor function in a
+    recipe.
+
+    Parameters
+    ----------
+    products:
+        Input datasets/cubes for which the distance metric is calculated
+        relative to a reference dataset/cube.
+    metric:
+        Distance metric that is calculated. Must be one of ``'rmse'``
+        (calculates the root mean square error) ``'pearsonr'`` (calculates the
+        Pearson correlation coefficient).
+    ref_cube:
+        Cube which is used as reference for the distance metric calculation. If
+        ``None``, `products` needs to be a :obj:`set` of
+        `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
+        dataset in `products` needs the facet ``reference_for_metric: true``.
+    coords:
+        Coordinates over which the distance metric is calculated. If ``None``,
+        calculate the metric over all coordinates, which results in a scalar
+        cube.
+    keep_reference_dataset:
+        If ``True``, also calculate the distance of the reference dataset with
+        itself. If ``False``, drop the reference dataset.
+
+    Returns
+    -------
+    set of esmvalcore.preprocessor.PreprocessorFile or iris.cube.CubeList
+        Output datasets/cubes. Will be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
+        `products` is also one, a :class:`~iris.cube.CubeList` otherwise.
+
+    Raises
+    ------
+    ValueError
+        Shape and coordinates of products and reference data does not match;
+        not exactly one input datasets contains the facet
+        ``reference_for_metric: true`` if ``ref_cube=None`; ``ref_cube=None``
+        and the input products are given as iterable of
+        :class:`~iris.cube.Cube` objects; ``metric`` is not one of ``'rmse'``
+        or ``'pearsonr'``.
+
+    """
+    reference_product = None
+    all_cubes_given = all(isinstance(p, Cube) for p in products)
+
+    # Get reference cube if not explicitly given
+    if ref_cube is None:
+        if all_cubes_given:
+            raise ValueError(
+                "`ref_cube` cannot be `None` when `products` is an iterable "
+                "of Cubes"
+            )
+        reference_products = []
+        for product in products:
+            if product.attributes.get('reference_for_metric', False):
+                reference_products.append(product)
+        if len(reference_products) != 1:
+            raise ValueError(
+                f"Expected exactly 1 dataset with 'reference_for_metric: "
+                f"true', found {len(reference_products):d}"
+            )
+        reference_product = reference_products[0]
+
+        # Extract reference cube
+        # Note: For technical reasons, product objects contain the member
+        # ``cubes``, which is a list of cubes. However, this is expected to be
+        # a list with exactly one element due to the call of concatenate
+        # earlier in the preprocessing chain of ESMValTool. To make sure that
+        # this preprocessor can also be used outside the ESMValTool
+        # preprocessing chain, an additional concatenate call is added here.
+        ref_cube = concatenate(reference_product.cubes)
+
+    # If input is an Iterable of Cube objects, calculate distance metric for
+    # each element
+    if all_cubes_given:
+        cubes = [
+            _calculate_metric(c, ref_cube, metric, coords) for c in products
+        ]
+        return CubeList(cubes)
+
+    # Otherwise, iterate over all input products, calculate bias and adapt
+    # metadata and provenance information accordingly
+    output_products = set()
+    for product in products:
+        if not keep_reference_dataset and product == reference_product:
+            continue
+        cube = concatenate(product.cubes)
+
+        # Calculate distance metric
+        cube = _calculate_metric(cube, ref_cube, metric, coords)
+
+        # Adapt metadata and provenance information
+        product.attributes['standard_name'] = cube.standard_name
+        product.attributes['long_name'] = cube.long_name
+        product.attributes['short_name'] = cube.var_name
+        product.attributes['units'] = str(cube.units)
+        if product != reference_product:
+            product.wasderivedfrom(reference_product)
+
+        product.cubes = CubeList([cube])
+        output_products.add(product)
+
+    return output_products
+
+
+def _get_coords(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None,
+) -> Iterable[Coord] | Iterable[str]:
+    """Get coordinates over which distance metric is calculated."""
+    if coords is None:
+        coords = [c.name() for c in cube.dim_coords]
+        if len(coords) != cube.ndim:
+            raise ValueError(
+                f"If coords=None is specified, the cube "
+                f"{cube.summary(shorten=True)} must not have unnamed "
+                f"dimensions"
+            )
+    return coords
+
+
+def _get_all_coord_dims(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[int, ...]:
+    all_coord_dims = []
+    for coord in coords:
+        all_coord_dims.extend(cube.coord_dims(coord))
+    return tuple(set(all_coord_dims))
+
+
+def _calculate_metric(
+    cube: Cube,
+    ref_cube: Cube,
+    metric: MetricType,
+    coords: Iterable[Coord] | Iterable[str] | None,
+) -> Cube:
+    """Calculate metric for a single cube relative to a reference cube."""
+    # Make sure that dimensional metadata of data and ref data is compatible
+    if cube.shape != ref_cube.shape:
+        raise ValueError(
+            f"Expected identical shapes of cube and reference cube for "
+            f"distance metric calculation, got {cube.shape} and "
+            f"{ref_cube.shape}, respectively"
+        )
+    try:
+        cube + ref_cube  # dummy operation to check if cubes are compatible
+    except Exception as exc:
+        raise ValueError(
+            f"Cannot calculate distance metric between cube and reference "
+            f"cube: {str(exc)}"
+        )
+
+    # Perform the actual calculation of the distance metric
+    # Note: we work on arrays here instead of cube to stay as flexible as
+    # possible since some operations (e.g., sqrt()) are not available for cubes
+    coords = _get_coords(cube, coords)
+    metrics_funcs = {
+        'rmse': _calculate_rmse,
+        'pearsonr': _calculate_pearsonr,
+    }
+    if metric not in metrics_funcs:
+        raise ValueError(
+            f"Expected one of {list(metrics_funcs)} for metric, got '{metric}'"
+        )
+    (res_data, res_metadata) = metrics_funcs[metric](cube, ref_cube, coords)
+
+    # Get result cube with correct dimensional metadata by using dummy
+    # operation (max)
+    res_cube = cube.collapsed(coords, iris.analysis.MAX)
+    res_cube.cell_methods = [
+        *res_cube.cell_methods[:-1], CellMethod(metric, coords)
+    ]
+    res_cube.data = res_data
+    res_cube.metadata = res_metadata
+
+    return res_cube
+
+
+def _calculate_rmse(
+    cube: Cube,
+    ref_cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate root mean square error."""
+    # Data
+    axis = _get_all_coord_dims(cube, coords)
+    squared_error = (cube.core_data() - ref_cube.core_data())**2
+    rmse = np.sqrt(  # handles dask arrays properly through numpy dispatch
+        np.mean(squared_error, axis=axis)
+    )
+
+    # Metadata
+    metadata = CubeMetadata(
+        None,
+        'RMSE' if cube.long_name is None else f'RMSE of {cube.long_name}',
+        'rmse' if cube.var_name is None else f'rmse_{cube.var_name}',
+        cube.units,
+        cube.attributes,
+        cube.cell_methods,
+    )
+
+    return (rmse, metadata)
+
+
+def _calculate_pearsonr(
+    cube: Cube,
+    ref_cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate Pearson correlation coefficient."""
+    # TODO: change!!!
+    data = cube.collapsed(coords, iris.analysis.MEAN)
+    metadata = cube.metadata
+    return (data, metadata)
diff --git a/tests/unit/preprocessor/_bias/test_bias.py b/tests/unit/preprocessor/_bias/test_bias.py
index 42ab9f95a4..eaf324e843 100644
--- a/tests/unit/preprocessor/_bias/test_bias.py
+++ b/tests/unit/preprocessor/_bias/test_bias.py
@@ -1,12 +1,14 @@
 """Unit tests for :mod:`esmvalcore.preprocessor._bias`."""
 
+import dask.array as da
 import iris
 import iris.cube
 import numpy as np
 import pytest
 from cf_units import Unit
+from iris.cube import Cube, CubeList
 
-from esmvalcore.preprocessor._bias import bias
+from esmvalcore.preprocessor._bias import bias, distance_metric
 from tests import PreprocessorFile
 
 
@@ -259,3 +261,225 @@ def test_keep_reference_dataset(regular_cubes, ref_cubes):
     assert out_cube.units == 'K'
     assert out_cube.dim_coords == ref_cubes[0].dim_coords
     assert out_cube.aux_coords == ref_cubes[0].aux_coords
+
+
+def test_rmse(regular_cubes, ref_cubes):
+    """Test calculation of RMSE."""
+    ref_product = PreprocessorFile(
+        ref_cubes, 'REF', {'reference_for_metric': True}
+    )
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        PreprocessorFile(regular_cubes, 'B', {'dataset': 'b'}),
+        ref_product,
+    }
+
+    out_products = distance_metric(products, 'rmse')
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 3
+    expected_attrs = {
+        'standard_name': None,
+        'long_name': 'RMSE',
+        'short_name': 'rmse_tas',
+        'units': 'K',
+    }
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {'dataset': 'a', **expected_attrs}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.shape == ()
+    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
+    assert out_cube.var_name == 'rmse_tas'
+    assert out_cube.long_name == 'RMSE'
+    assert out_cube.standard_name is None
+    assert out_cube.units == 'K'
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+    product_b = out_dict['B']
+    assert product_b.filename == 'B'
+    assert product_b.attributes == {'dataset': 'b', **expected_attrs}
+    assert len(product_b.cubes) == 1
+    out_cube = product_b.cubes[0]
+    assert out_cube.shape == ()
+    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
+    assert out_cube.var_name == 'rmse_tas'
+    assert out_cube.long_name == 'RMSE'
+    assert out_cube.standard_name is None
+    assert out_cube.units == 'K'
+    product_b.wasderivedfrom.assert_called_once()
+    assert product_b.mock_ancestors == {ref_product}
+
+    product_ref = out_dict['REF']
+    assert product_ref.filename == 'REF'
+    assert product_ref.attributes == {
+        'reference_for_metric': True, **expected_attrs
+    }
+    assert len(product_ref.cubes) == 1
+    out_cube = product_ref.cubes[0]
+    assert out_cube.shape == ()
+    assert_array_equal(out_cube.data, 0.0)
+    assert out_cube.var_name == 'rmse_tas'
+    assert out_cube.long_name == 'RMSE'
+    assert out_cube.standard_name is None
+    assert out_cube.units == 'K'
+    product_ref.wasderivedfrom.assert_not_called()
+    assert product_ref.mock_ancestors == set()
+
+
+def test_rmse_lazy(regular_cubes, ref_cubes):
+    """Test calculation of RMSE."""
+    regular_cubes[0].data = da.array(regular_cubes[0].data)
+    ref_cubes[0].data = da.array(ref_cubes[0].data)
+    ref_product = PreprocessorFile(
+        ref_cubes, 'REF', {'reference_for_metric': True}
+    )
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        ref_product,
+    }
+
+    out_products = distance_metric(
+        products,
+        'rmse',
+        coords=['latitude', 'longitude'],
+        keep_reference_dataset=False,
+    )
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 1
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {
+        'dataset': 'a',
+        'standard_name': None,
+        'long_name': 'RMSE',
+        'short_name': 'rmse_tas',
+        'units': 'K',
+    }
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.shape == (2,)
+    assert out_cube.has_lazy_data()
+    assert_array_equal(
+        out_cube.data, np.array([1.224744871, 3.082207001], dtype=np.float32),
+    )
+    assert out_cube.coord('time') == regular_cubes[0].coord('time')
+    assert out_cube.var_name == 'rmse_tas'
+    assert out_cube.long_name == 'RMSE'
+    assert out_cube.standard_name is None
+    assert out_cube.units == 'K'
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+
+def test_rmse_cubes(regular_cubes, ref_cubes):
+    """Test calculation of RMSE with cubes."""
+    out_cubes = distance_metric(regular_cubes, 'rmse', ref_cube=ref_cubes[0])
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
+    assert out_cube.var_name == 'rmse_tas'
+    assert out_cube.long_name == 'RMSE'
+    assert out_cube.standard_name is None
+    assert out_cube.units == 'K'
+
+
+def test_no_reference_for_metric(regular_cubes, ref_cubes):
+    """Test fail when no reference_for_metric is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {}),
+    }
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true', found 0"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, 'rmse')
+
+
+def test_two_reference_for_metric(regular_cubes, ref_cubes):
+    """Test fail when two reference_for_metric is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'reference_for_metric': False}),
+        PreprocessorFile(ref_cubes, 'REF1', {'reference_for_metric': True}),
+        PreprocessorFile(ref_cubes, 'REF2', {'reference_for_metric': True}),
+    }
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true', found 2"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, 'rmse')
+
+
+def test_invalid_metric(regular_cubes, ref_cubes):
+    """Test fail when invalid metric is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {'reference_for_metric': True}),
+    }
+    msg = (
+        r"Expected one of \['rmse', 'pearsonr'\] for metric, got 'invalid'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, 'invalid')
+
+
+@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+def test_distance_metric_ref_cube_non_cubes(regular_cubes, metric):
+    """Test distance metric with ref_cube=None with with cubes."""
+    msg = "`ref_cube` cannot be `None` when `products` is an iterable of Cubes"
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric)
+
+
+@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+def test_distance_metric_no_named_dimensions(metric):
+    """Test distance metric with ref_cube=None with with cubes."""
+    ref_cube = Cube([0, 1])
+    cubes = CubeList([ref_cube])
+    msg = (
+        "If coords=None is specified, the cube .* must not have unnamed "
+        "dimensions"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(cubes, metric, ref_cube=ref_cube)
+
+
+@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+def test_distance_metric_non_matching_shapes(regular_cubes, metric):
+    """Test distance metric with ref_cube=None with with cubes."""
+    ref_cube = Cube(0)
+    msg = (
+        r"Expected identical shapes of cube and reference cube for distance "
+        r"metric calculation, got \(2, 2, 2\) and \(\), respectively"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric, ref_cube=ref_cube)
+
+
+@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+def test_distance_metric_non_matching_dims(regular_cubes, metric):
+    """Test distance metric with ref_cube=None with with cubes."""
+    ref_cube = regular_cubes[0].copy()
+    ref_cube.remove_coord('time')
+    new_coord = iris.coords.DimCoord([0.0, 1.0], var_name='not_time')
+    ref_cube.add_dim_coord(new_coord, 0)
+    msg = (
+        "Cannot calculate distance metric between cube and reference cube: "
+        "Insufficient matching coordinate metadata to resolve cubes"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric, ref_cube=ref_cube)

From 7b402f21fe63daebedbf06098fbafedf434448fe Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 12 Jan 2024 17:11:02 +0100
Subject: [PATCH 02/51] Make distance_metric usable in recipe

---
 esmvalcore/preprocessor/__init__.py        |  6 ++++--
 esmvalcore/preprocessor/_bias.py           |  4 +---
 tests/unit/preprocessor/_bias/test_bias.py | 13 +++++++++++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index 2144e5edce..1c449ee5a0 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -22,7 +22,7 @@
     meridional_statistics,
     zonal_statistics,
 )
-from ._bias import bias
+from ._bias import bias, distance_metric
 from ._cycles import amplitude
 from ._derive import derive
 from ._detrend import detrend
@@ -179,8 +179,9 @@
     'ensemble_statistics',
     # Multi model statistics
     'multi_model_statistics',
-    # Bias calculation
+    # Bias/metrics calculation
     'bias',
+    'distance_metric',
     # Remove supplementary variables from cube
     'remove_supplementary_variables',
     # Save to file
@@ -215,6 +216,7 @@
 
 MULTI_MODEL_FUNCTIONS = {
     'bias',
+    'distance_metric',
     'ensemble_statistics',
     'multi_model_statistics',
     'mask_multimodel',
diff --git a/esmvalcore/preprocessor/_bias.py b/esmvalcore/preprocessor/_bias.py
index 5802b76a3c..ece2426aae 100644
--- a/esmvalcore/preprocessor/_bias.py
+++ b/esmvalcore/preprocessor/_bias.py
@@ -322,11 +322,9 @@ def _calculate_metric(
     # Get result cube with correct dimensional metadata by using dummy
     # operation (max)
     res_cube = cube.collapsed(coords, iris.analysis.MAX)
-    res_cube.cell_methods = [
-        *res_cube.cell_methods[:-1], CellMethod(metric, coords)
-    ]
     res_cube.data = res_data
     res_cube.metadata = res_metadata
+    res_cube.cell_methods = [*cube.cell_methods, CellMethod(metric, coords)]
 
     return res_cube
 
diff --git a/tests/unit/preprocessor/_bias/test_bias.py b/tests/unit/preprocessor/_bias/test_bias.py
index eaf324e843..fd17c34ed1 100644
--- a/tests/unit/preprocessor/_bias/test_bias.py
+++ b/tests/unit/preprocessor/_bias/test_bias.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pytest
 from cf_units import Unit
+from iris.coords import CellMethod
 from iris.cube import Cube, CubeList
 
 from esmvalcore.preprocessor._bias import bias, distance_metric
@@ -297,6 +298,9 @@ def test_rmse(regular_cubes, ref_cubes):
     assert out_cube.long_name == 'RMSE'
     assert out_cube.standard_name is None
     assert out_cube.units == 'K'
+    assert out_cube.cell_methods == (
+        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+    )
     product_a.wasderivedfrom.assert_called_once()
     assert product_a.mock_ancestors == {ref_product}
 
@@ -311,6 +315,9 @@ def test_rmse(regular_cubes, ref_cubes):
     assert out_cube.long_name == 'RMSE'
     assert out_cube.standard_name is None
     assert out_cube.units == 'K'
+    assert out_cube.cell_methods == (
+        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+    )
     product_b.wasderivedfrom.assert_called_once()
     assert product_b.mock_ancestors == {ref_product}
 
@@ -327,6 +334,9 @@ def test_rmse(regular_cubes, ref_cubes):
     assert out_cube.long_name == 'RMSE'
     assert out_cube.standard_name is None
     assert out_cube.units == 'K'
+    assert out_cube.cell_methods == (
+        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+    )
     product_ref.wasderivedfrom.assert_not_called()
     assert product_ref.mock_ancestors == set()
 
@@ -375,6 +385,9 @@ def test_rmse_lazy(regular_cubes, ref_cubes):
     assert out_cube.long_name == 'RMSE'
     assert out_cube.standard_name is None
     assert out_cube.units == 'K'
+    assert out_cube.cell_methods == (
+        CellMethod('rmse', ['latitude', 'longitude']),
+    )
     product_a.wasderivedfrom.assert_called_once()
     assert product_a.mock_ancestors == {ref_product}
 

From 960965528ed98c7e0718738fa7d0410993ae79da Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 24 Jan 2024 13:27:00 +0100
Subject: [PATCH 03/51] Renamed _bias.py -> _compare_with_refs.py

---
 esmvalcore/preprocessor/__init__.py                           | 4 ++--
 esmvalcore/preprocessor/{_bias.py => _compare_with_refs.py}   | 4 ++--
 .../test_compare_with_refs.py}                                | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)
 rename esmvalcore/preprocessor/{_bias.py => _compare_with_refs.py} (99%)
 rename tests/unit/preprocessor/{_bias/test_bias.py => _compare_with_refs/test_compare_with_refs.py} (99%)

diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index 1c449ee5a0..a27b265b44 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -22,7 +22,7 @@
     meridional_statistics,
     zonal_statistics,
 )
-from ._bias import bias, distance_metric
+from ._compare_with_refs import bias, distance_metric
 from ._cycles import amplitude
 from ._derive import derive
 from ._detrend import detrend
@@ -179,7 +179,7 @@
     'ensemble_statistics',
     # Multi model statistics
     'multi_model_statistics',
-    # Bias/metrics calculation
+    # Comparison with reference datasets
     'bias',
     'distance_metric',
     # Remove supplementary variables from cube
diff --git a/esmvalcore/preprocessor/_bias.py b/esmvalcore/preprocessor/_compare_with_refs.py
similarity index 99%
rename from esmvalcore/preprocessor/_bias.py
rename to esmvalcore/preprocessor/_compare_with_refs.py
index 54b99a3234..111a27c55a 100644
--- a/esmvalcore/preprocessor/_bias.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -1,4 +1,4 @@
-"""Preprocessor functions to calculate biases from data."""
+"""Preprocessor functions for comparisons of data with reference datasets."""
 from __future__ import annotations
 
 import logging
@@ -296,7 +296,7 @@ def distance_metric(
         ]
         return CubeList(cubes)
 
-    # Otherwise, iterate over all input products, calculate bias and adapt
+    # Otherwise, iterate over all input products, calculate metric and adapt
     # metadata and provenance information accordingly
     output_products = set()
     for product in products:
diff --git a/tests/unit/preprocessor/_bias/test_bias.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
similarity index 99%
rename from tests/unit/preprocessor/_bias/test_bias.py
rename to tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index a73b48e19d..e41139ee36 100644
--- a/tests/unit/preprocessor/_bias/test_bias.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -1,4 +1,4 @@
-"""Unit tests for :mod:`esmvalcore.preprocessor._bias`."""
+"""Unit tests for :mod:`esmvalcore.preprocessor._compare_with_refs`."""
 
 import dask.array as da
 import iris
@@ -8,7 +8,7 @@
 from iris.coords import CellMethod
 from iris.cube import Cube, CubeList
 
-from esmvalcore.preprocessor._bias import bias, distance_metric
+from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
 from tests import PreprocessorFile
 
 

From a8d1ac0bec6fd1328cdaf3d2afa35681cfbf83f0 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 24 Jan 2024 19:20:16 +0100
Subject: [PATCH 04/51] Added weighted distance metrics

---
 doc/recipe/preprocessor.rst                   |  40 ++---
 esmvalcore/_recipe/to_datasets.py             |   2 +
 esmvalcore/preprocessor/_compare_with_refs.py |  98 +++++++++--
 .../test_compare_with_refs.py                 | 153 ++++++++++++------
 4 files changed, 212 insertions(+), 81 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 271641c857..5f3ae8691d 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -91,13 +91,13 @@ supported too if proper keyword arguments are specified:
 .. _supported_stat_operator:
 
 ============================== ================================================= =====================================
-`operator`                     Corresponding :class:`~iris.analysis.Aggregator`  Weighted? [1]_
+`operator`                     Corresponding :class:`~iris.analysis.Aggregator`  Weighted? [#f1]_
 ============================== ================================================= =====================================
 ``gmean``                      :const:`iris.analysis.GMEAN`                      no
 ``hmean``                      :const:`iris.analysis.HMEAN`                      no
 ``max``                        :const:`iris.analysis.MAX`                        no
 ``mean``                       :const:`iris.analysis.MEAN`                       yes
-``median``                     :const:`iris.analysis.MEDIAN` [2]_                no
+``median``                     :const:`iris.analysis.MEDIAN` [#f2]_                no
 ``min``                        :const:`iris.analysis.MIN`                        no
 ``peak``                       :const:`iris.analysis.PEAK`                       no
 ``percentile``                 :const:`iris.analysis.PERCENTILE`                 no
@@ -108,7 +108,7 @@ supported too if proper keyword arguments are specified:
 ``wpercentile``                :const:`iris.analysis.WPERCENTILE`                yes
 ============================== ================================================= =====================================
 
-.. [1] The following preprocessor support weighted statistics by default:
+.. [#f1] The following preprocessor support weighted statistics by default:
     :func:`~esmvalcore.preprocessor.area_statistics`: weighted by grid cell
     areas (see also :ref:`preprocessors_using_supplementary_variables`);
     :func:`~esmvalcore.preprocessor.climate_statistics`: weighted by lengths of
@@ -117,7 +117,7 @@ supported too if proper keyword arguments are specified:
     :ref:`preprocessors_using_supplementary_variables`);
     :func:`~esmvalcore.preprocessor.axis_statistics`: weighted by
     corresponding coordinate bounds.
-.. [2] :const:`iris.analysis.MEDIAN` is not lazy, but much faster than
+.. [#f2] :const:`iris.analysis.MEDIAN` is not lazy, but much faster than
     :const:`iris.analysis.PERCENTILE`. For a lazy median, use ``percentile``
     with the keyword argument ``percent: 50``.
 
@@ -301,23 +301,27 @@ or
 to perform their computations.
 In ESMValCore we call both types of variables "supplementary variables".
 
-============================================================== ============================== =====================================
-Preprocessor                                                   Variable short name            Variable standard name
-============================================================== ============================== =====================================
-:ref:`area_statistics<area_statistics>`                        ``areacella``, ``areacello``   cell_area
-:ref:`mask_landsea<land/sea/ice masking>`                      ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
-:ref:`mask_landseaice<ice masking>`                            ``sftgif``                     land_ice_area_fraction
-:ref:`volume_statistics<volume_statistics>`                    ``volcello``                   ocean_volume
-:ref:`weighting_landsea_fraction<land/sea fraction weighting>` ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
-============================================================== ============================== =====================================
+===================================================================== ============================== =====================================
+Preprocessor                                                          Variable short name            Variable standard name
+===================================================================== ============================== =====================================
+:ref:`area_statistics<area_statistics>` [#f4]                         ``areacella``, ``areacello``   cell_area
+:ref:`mask_landsea<land/sea/ice masking>` [#f4]                       ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`mask_landseaice<ice masking>` [#f3]                             ``sftgif``                     land_ice_area_fraction
+:ref:`volume_statistics<volume_statistics>` [#f4]                     ``volcello``                   ocean_volume
+:ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]  ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+===================================================================== ============================== =====================================
+
+.. [#f3] This preprocessor requires at least one of the mentioned supplementary
+    variables. If none is defined in the recipe, automatically look for them.
+    If none is found, an error will be raised.
+.. [#f4] This preprocessor prefers at least one of the mentioned supplementary
+    variables. If none is defined in the recipe, automatically look for them.
+    If none is found, a warning will be raised (but no error).
+.. [#f5] This preprocessor optionally takes one of the mentioned supplementary
+    variables. If none is defined in the recipe, none is added.
 
 Only one of the listed variables is required. Supplementary variables can be
 defined in the recipe as described in :ref:`supplementary_variables`.
-In some cases, preprocessor functions may work without supplementary variables,
-this is documented case by case in the preprocessor function definition.
-If a preprocessor function requiring supplementary variables is used
-without specifying these in the recipe, these will be automatically
-added.
 If the automatic selection does not give the desired result, specify the
 supplementary variables in the recipe as described in
 :ref:`supplementary_variables`.
diff --git a/esmvalcore/_recipe/to_datasets.py b/esmvalcore/_recipe/to_datasets.py
index 91ebed1179..7928940be5 100644
--- a/esmvalcore/_recipe/to_datasets.py
+++ b/esmvalcore/_recipe/to_datasets.py
@@ -358,6 +358,7 @@ def _get_datasets_for_variable(
         for template1 in template0.from_ranges():
             for supplementary_facets in supplementaries:
                 template1.add_supplementary(**supplementary_facets)
+            print(supplementary_facets)
             for supplementary_ds in template1.supplementaries:
                 supplementary_ds.facets.pop('preprocessor', None)
             for dataset in _dataset_from_files(template1):
@@ -367,6 +368,7 @@ def _get_datasets_for_variable(
                 logger.debug("Found %s", dataset.summary(shorten=True))
                 datasets.append(dataset)
                 idx += 1
+            print(dataset)
 
     return datasets
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 111a27c55a..ac82283c9c 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -3,6 +3,7 @@
 
 import logging
 from collections.abc import Iterable
+from functools import partial
 from typing import TYPE_CHECKING, Literal, Optional
 
 import dask.array as da
@@ -11,8 +12,13 @@
 from iris.common.metadata import CubeMetadata
 from iris.coords import CellMethod, Coord
 from iris.cube import Cube, CubeList
+from iris.exceptions import CoordinateNotFoundError
+from iris.util import broadcast_to_shape
 
+from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._io import concatenate
+from esmvalcore.preprocessor._other import get_array_module
+from esmvalcore.preprocessor._time import get_time_weights
 
 if TYPE_CHECKING:
     from esmvalcore.preprocessor import PreprocessorFile
@@ -192,7 +198,12 @@ def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
     return cube
 
 
-MetricType = Literal['rmse', 'pearsonr']
+MetricType = Literal[
+    'weighted_rmse',
+    'rmse',
+    'weighted_pearsonr',
+    'pearsonr',
+]
 
 
 def distance_metric(
@@ -224,9 +235,24 @@ def distance_metric(
         Input datasets/cubes for which the distance metric is calculated
         relative to a reference dataset/cube.
     metric:
-        Distance metric that is calculated. Must be one of ``'rmse'``
-        (calculates the root mean square error) ``'pearsonr'`` (calculates the
-        Pearson correlation coefficient).
+        Distance metric that is calculated. Must be one of ``'weighted_rmse'``
+        (weighted root mean square error), ``'rmse'`` (unweighted root mean
+        square error), ``'weighted_pearsonr'`` (weighted Pearson correlation
+        coefficient), ``'pearsonr'`` (unweighted Pearson correlation
+        coefficient).
+
+        .. note::
+            Metrics starting with `weighted_` will calculate weighted distance
+            metrics if possible. Currently, the following `coords` (or any
+            combinations that include them) will trigger weighting: `time`
+            (will use lengths of time intervals as weights) and `latitude`
+            (will use cell area weights). Time weights are always calculated
+            from the input data. Area weights can be given as supplementary
+            variables to the recipe (`areacella` or `areacello`, see
+            :ref:`supplementary_variables`) or calculated from the input data
+            (this only works for regular grids). By default, **NO**
+            supplementary variables will be used; they need to be explicitly
+            requested.
     ref_cube:
         Cube which is used as reference for the distance metric calculation. If
         ``None``, `products` needs to be a :obj:`set` of
@@ -254,8 +280,11 @@ def distance_metric(
         not exactly one input datasets contains the facet
         ``reference_for_metric: true`` if ``ref_cube=None`; ``ref_cube=None``
         and the input products are given as iterable of
-        :class:`~iris.cube.Cube` objects; ``metric`` is not one of ``'rmse'``
-        or ``'pearsonr'``.
+        :class:`~iris.cube.Cube` objects; an invalid ``metric`` has been given.
+    iris.exceptions.CoordinateNotFoundError
+        `longitude` is not found in cube if a weighted metric shall be
+        calculated, `latitude` is in `coords`, and no `cell_area` is given
+        as:ref:`supplementary_variables`.
 
     """
     reference_product = None
@@ -265,8 +294,8 @@ def distance_metric(
     if ref_cube is None:
         if all_cubes_given:
             raise ValueError(
-                "`ref_cube` cannot be `None` when `products` is an iterable "
-                "of Cubes"
+                "A list of Cubes is given to this preprocessor; please "
+                "specify a `ref_cube`"
             )
         reference_products = []
         for product in products:
@@ -374,8 +403,10 @@ def _calculate_metric(
     # possible since some operations (e.g., sqrt()) are not available for cubes
     coords = _get_coords(cube, coords)
     metrics_funcs = {
-        'rmse': _calculate_rmse,
-        'pearsonr': _calculate_pearsonr,
+        'weighted_rmse': partial(_calculate_rmse, weighted=True),
+        'rmse': partial(_calculate_rmse, weighted=False),
+        'weighted_pearsonr': partial(_calculate_pearsonr, weighted=True),
+        'pearsonr': partial(_calculate_pearsonr, weighted=False),
     }
     if metric not in metrics_funcs:
         raise ValueError(
@@ -392,19 +423,58 @@ def _calculate_metric(
 
     return res_cube
 
+def _get_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> da.Array:
+    """Calculate weights for weighted distance metrics."""
+    weights = da.ones(cube.shape, dtype=cube.dtype)
+
+    # Time weights: lengths of time interval
+    if 'time' in coords:
+        weights *= broadcast_to_shape(
+            da.array(get_time_weights(cube)),
+            cube.shape,
+            cube.coord_dims('time'),
+        )
+
+    # Latitude weights: cell areas
+    if 'latitude' in coords:
+        cube = cube.copy()  # avoid overwriting input cube
+        if (
+                not cube.cell_measures('cell_area') and
+                not cube.coord('longitude')
+        ):
+            raise CoordinateNotFoundError(
+                f"Cube {cube.summary(shorten=True)} need a 'longitude' "
+                f"coordinate to calculate weighted distance metric over "
+                f"coordinates {coords} (alternatively, a `cell_area` can be "
+                f"given to the cube)"
+            )
+        _try_adding_calculated_cell_area(cube)
+        weights *= broadcast_to_shape(
+            cube.cell_measure('cell_area').core_data(),
+            cube.shape,
+            cube.cell_measure_dims('cell_area'),
+        )
+
+    return weights
+
 
 def _calculate_rmse(
     cube: Cube,
     ref_cube: Cube,
     coords: Iterable[Coord] | Iterable[str],
+    *,
+    weighted: bool,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate root mean square error."""
     # Data
     axis = _get_all_coord_dims(cube, coords)
+    weights = _get_weights(cube, coords) if weighted else None
     squared_error = (cube.core_data() - ref_cube.core_data())**2
-    rmse = np.sqrt(  # handles dask arrays properly through numpy dispatch
-        np.mean(squared_error, axis=axis)
-    )
+    npx = get_array_module(squared_error)
+    rmse = npx.sqrt(npx.ma.average(squared_error, axis=axis, weights=weights))
 
     # Metadata
     metadata = CubeMetadata(
@@ -423,6 +493,8 @@ def _calculate_pearsonr(
     cube: Cube,
     ref_cube: Cube,
     coords: Iterable[Coord] | Iterable[str],
+    *,
+    weighted: bool,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
     # TODO: change!!!
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index e41139ee36..375177d89e 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 from cf_units import Unit
-from iris.coords import CellMethod
+from iris.coords import CellMeasure, CellMethod
 from iris.cube import Cube, CubeList
 
 from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
@@ -34,7 +34,9 @@ def products_set_to_dict(products):
 def get_3d_cube(data, **cube_kwargs):
     """Create 3D cube."""
     time_units = Unit('days since 1850-01-01 00:00:00')
-    times = iris.coords.DimCoord([0.0, 1.0], standard_name='time',
+    times = iris.coords.DimCoord([3.0, 7.0],
+                                 bounds=[[0.0, 6.0], [6.0, 8.0]],
+                                 standard_name='time',
                                  var_name='time', long_name='time',
                                  units=time_units)
     lats = iris.coords.DimCoord([0.0, 10.0], standard_name='latitude',
@@ -301,7 +303,7 @@ def test_no_reference_for_bias(regular_cubes, ref_cubes):
 
 
 def test_two_references_for_bias(regular_cubes, ref_cubes):
-    """Test fail when two reference_for_bias products is given."""
+    """Test fail when two reference_for_bias products are given."""
     products = {
         PreprocessorFile(regular_cubes, 'A', {'reference_for_bias': False}),
         PreprocessorFile(ref_cubes, 'REF1', {'reference_for_bias': True}),
@@ -335,8 +337,25 @@ def test_ref_cube_non_cubes(regular_cubes):
         bias(regular_cubes)
 
 
-def test_rmse(regular_cubes, ref_cubes):
-    """Test calculation of RMSE."""
+TEST_DISTANCE_METRICS = [
+    ('weighted_rmse', 2.0, 'RMSE', 'rmse_tas', 'K'),
+    ('rmse', 2.34520788, 'RMSE', 'rmse_tas', 'K'),
+]
+AREA_WEIGHTS = CellMeasure(
+    np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
+    standard_name='cell_area',
+    units='m2',
+)
+
+
+@pytest.mark.parametrize(
+    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric(
+    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+):
+    """Test `distance_metric`."""
+    regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
     ref_product = PreprocessorFile(
         ref_cubes, 'REF', {'reference_for_metric': True}
     )
@@ -346,16 +365,16 @@ def test_rmse(regular_cubes, ref_cubes):
         ref_product,
     }
 
-    out_products = distance_metric(products, 'rmse')
+    out_products = distance_metric(products, metric)
 
     assert isinstance(out_products, set)
     out_dict = products_set_to_dict(out_products)
     assert len(out_dict) == 3
     expected_attrs = {
         'standard_name': None,
-        'long_name': 'RMSE',
-        'short_name': 'rmse_tas',
-        'units': 'K',
+        'long_name': long_name,
+        'short_name': var_name,
+        'units': units,
     }
 
     product_a = out_dict['A']
@@ -364,13 +383,13 @@ def test_rmse(regular_cubes, ref_cubes):
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.shape == ()
-    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
-    assert out_cube.var_name == 'rmse_tas'
-    assert out_cube.long_name == 'RMSE'
+    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
-    assert out_cube.units == 'K'
+    assert out_cube.units == units
     assert out_cube.cell_methods == (
-        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
     )
     product_a.wasderivedfrom.assert_called_once()
     assert product_a.mock_ancestors == {ref_product}
@@ -381,13 +400,13 @@ def test_rmse(regular_cubes, ref_cubes):
     assert len(product_b.cubes) == 1
     out_cube = product_b.cubes[0]
     assert out_cube.shape == ()
-    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
-    assert out_cube.var_name == 'rmse_tas'
-    assert out_cube.long_name == 'RMSE'
+    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
-    assert out_cube.units == 'K'
+    assert out_cube.units == units
     assert out_cube.cell_methods == (
-        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
     )
     product_b.wasderivedfrom.assert_called_once()
     assert product_b.mock_ancestors == {ref_product}
@@ -401,19 +420,30 @@ def test_rmse(regular_cubes, ref_cubes):
     out_cube = product_ref.cubes[0]
     assert out_cube.shape == ()
     assert_array_equal(out_cube.data, 0.0)
-    assert out_cube.var_name == 'rmse_tas'
-    assert out_cube.long_name == 'RMSE'
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
-    assert out_cube.units == 'K'
+    assert out_cube.units == units
     assert out_cube.cell_methods == (
-        CellMethod('rmse', ['time', 'latitude', 'longitude']),
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
     )
     product_ref.wasderivedfrom.assert_not_called()
     assert product_ref.mock_ancestors == set()
 
 
-def test_rmse_lazy(regular_cubes, ref_cubes):
-    """Test calculation of RMSE."""
+TEST_DISTANCE_METRICS_LAZY = [
+    # ('weighted_rmse', 2.0, 'RMSE', 'rmse_tas', 'K'),
+    ('rmse', [1.224744871, 3.082207001], 'RMSE', 'rmse_tas', 'K'),
+]
+
+
+@pytest.mark.parametrize(
+    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS_LAZY
+)
+def test_distance_metric_lazy(
+    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+):
+    """Test `distance_metric` with lazy data."""
     regular_cubes[0].data = da.array(regular_cubes[0].data)
     ref_cubes[0].data = da.array(ref_cubes[0].data)
     ref_product = PreprocessorFile(
@@ -426,7 +456,7 @@ def test_rmse_lazy(regular_cubes, ref_cubes):
 
     out_products = distance_metric(
         products,
-        'rmse',
+        metric,
         coords=['latitude', 'longitude'],
         keep_reference_dataset=False,
     )
@@ -440,46 +470,64 @@ def test_rmse_lazy(regular_cubes, ref_cubes):
     assert product_a.attributes == {
         'dataset': 'a',
         'standard_name': None,
-        'long_name': 'RMSE',
-        'short_name': 'rmse_tas',
-        'units': 'K',
+        'long_name': long_name,
+        'short_name': var_name,
+        'units': units,
     }
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.shape == (2,)
     assert out_cube.has_lazy_data()
     assert_array_equal(
-        out_cube.data, np.array([1.224744871, 3.082207001], dtype=np.float32),
+        out_cube.data, np.array(data, dtype=np.float32),
     )
     assert out_cube.coord('time') == regular_cubes[0].coord('time')
-    assert out_cube.var_name == 'rmse_tas'
-    assert out_cube.long_name == 'RMSE'
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
-    assert out_cube.units == 'K'
+    assert out_cube.units == units
     assert out_cube.cell_methods == (
-        CellMethod('rmse', ['latitude', 'longitude']),
+        CellMethod(metric, ['latitude', 'longitude']),
     )
     product_a.wasderivedfrom.assert_called_once()
     assert product_a.mock_ancestors == {ref_product}
 
 
-def test_rmse_cubes(regular_cubes, ref_cubes):
-    """Test calculation of RMSE with cubes."""
-    out_cubes = distance_metric(regular_cubes, 'rmse', ref_cube=ref_cubes[0])
+@pytest.mark.parametrize(
+    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_cubes(
+    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+):
+    """Test `distance_metric` with cubes."""
+    regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
+    out_cubes = distance_metric(regular_cubes, metric, ref_cube=ref_cubes[0])
 
     assert isinstance(out_cubes, CubeList)
     assert len(out_cubes) == 1
     out_cube = out_cubes[0]
 
     assert out_cube.shape == ()
-    assert_array_equal(out_cube.data, np.array(2.34520788, dtype=np.float32))
-    assert out_cube.var_name == 'rmse_tas'
-    assert out_cube.long_name == 'RMSE'
+    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
-    assert out_cube.units == 'K'
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
 
 
-def test_no_reference_for_metric(regular_cubes, ref_cubes):
+TEST_METRICS = [
+    'weighted_rmse',
+    'rmse',
+    'weighted_pearsonr',
+    'pearsonr',
+]
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_no_reference_for_metric(regular_cubes, ref_cubes, metric):
     """Test fail when no reference_for_metric is given."""
     products = {
         PreprocessorFile(regular_cubes, 'A', {}),
@@ -490,11 +538,12 @@ def test_no_reference_for_metric(regular_cubes, ref_cubes):
         "Expected exactly 1 dataset with 'reference_for_metric: true', found 0"
     )
     with pytest.raises(ValueError, match=msg):
-        distance_metric(products, 'rmse')
+        distance_metric(products, metric)
 
 
-def test_two_reference_for_metric(regular_cubes, ref_cubes):
-    """Test fail when two reference_for_metric is given."""
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_two_references_for_metric(regular_cubes, ref_cubes, metric):
+    """Test fail when two reference_for_metric products are given."""
     products = {
         PreprocessorFile(regular_cubes, 'A', {'reference_for_metric': False}),
         PreprocessorFile(ref_cubes, 'REF1', {'reference_for_metric': True}),
@@ -515,7 +564,8 @@ def test_invalid_metric(regular_cubes, ref_cubes):
         PreprocessorFile(ref_cubes, 'REF', {'reference_for_metric': True}),
     }
     msg = (
-        r"Expected one of \['rmse', 'pearsonr'\] for metric, got 'invalid'"
+        r"Expected one of \['weighted_rmse', 'rmse', 'weighted_pearsonr', "
+        r"'pearsonr'\] for metric, got 'invalid'"
     )
     with pytest.raises(ValueError, match=msg):
         distance_metric(products, 'invalid')
@@ -524,12 +574,15 @@ def test_invalid_metric(regular_cubes, ref_cubes):
 @pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
 def test_distance_metric_ref_cube_non_cubes(regular_cubes, metric):
     """Test distance metric with ref_cube=None with with cubes."""
-    msg = "`ref_cube` cannot be `None` when `products` is an iterable of Cubes"
+    msg = (
+        "A list of Cubes is given to this preprocessor; please specify a "
+        "`ref_cube`"
+    )
     with pytest.raises(ValueError, match=msg):
         distance_metric(regular_cubes, metric)
 
 
-@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+@pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_no_named_dimensions(metric):
     """Test distance metric with ref_cube=None with with cubes."""
     ref_cube = Cube([0, 1])
@@ -542,7 +595,7 @@ def test_distance_metric_no_named_dimensions(metric):
         distance_metric(cubes, metric, ref_cube=ref_cube)
 
 
-@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+@pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_non_matching_shapes(regular_cubes, metric):
     """Test distance metric with ref_cube=None with with cubes."""
     ref_cube = Cube(0)
@@ -554,7 +607,7 @@ def test_distance_metric_non_matching_shapes(regular_cubes, metric):
         distance_metric(regular_cubes, metric, ref_cube=ref_cube)
 
 
-@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+@pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_non_matching_dims(regular_cubes, metric):
     """Test distance metric with ref_cube=None with with cubes."""
     ref_cube = regular_cubes[0].copy()

From b60963c9963610f34f4ef1acb4278f976e644ad9 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 24 Jan 2024 19:21:21 +0100
Subject: [PATCH 05/51] flake8

---
 esmvalcore/preprocessor/_compare_with_refs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index ac82283c9c..f3997da48c 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -423,6 +423,7 @@ def _calculate_metric(
 
     return res_cube
 
+
 def _get_weights(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str],

From 45b5d7799f4b9e86f6ce5b60142b18aced3eb887 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 09:20:25 +0100
Subject: [PATCH 06/51] Removed prints

---
 esmvalcore/_recipe/to_datasets.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/esmvalcore/_recipe/to_datasets.py b/esmvalcore/_recipe/to_datasets.py
index 7928940be5..91ebed1179 100644
--- a/esmvalcore/_recipe/to_datasets.py
+++ b/esmvalcore/_recipe/to_datasets.py
@@ -358,7 +358,6 @@ def _get_datasets_for_variable(
         for template1 in template0.from_ranges():
             for supplementary_facets in supplementaries:
                 template1.add_supplementary(**supplementary_facets)
-            print(supplementary_facets)
             for supplementary_ds in template1.supplementaries:
                 supplementary_ds.facets.pop('preprocessor', None)
             for dataset in _dataset_from_files(template1):
@@ -368,7 +367,6 @@ def _get_datasets_for_variable(
                 logger.debug("Found %s", dataset.summary(shorten=True))
                 datasets.append(dataset)
                 idx += 1
-            print(dataset)
 
     return datasets
 

From ab7da9c3cb83c56644cbf440f2717e62dae326a9 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 10:43:52 +0100
Subject: [PATCH 07/51] Make sure that dtype is preserved and added tests for
 masked data

---
 esmvalcore/preprocessor/_compare_with_refs.py |  25 ++--
 .../test_compare_with_refs.py                 | 115 +++++++++++++++++-
 2 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index f3997da48c..4fd7e0c6d5 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -114,7 +114,8 @@ def bias(
     # Mask reference cube appropriately for relative biases
     if bias_type == 'relative':
         ref_cube = ref_cube.copy()
-        ref_cube.data = da.ma.masked_inside(
+        npx = get_array_module(ref_cube.core_data())
+        ref_cube.data = npx.ma.masked_inside(
             ref_cube.core_data(),
             -denominator_mask_threshold,
             denominator_mask_threshold,
@@ -417,7 +418,7 @@ def _calculate_metric(
     # Get result cube with correct dimensional metadata by using dummy
     # operation (max)
     res_cube = cube.collapsed(coords, iris.analysis.MAX)
-    res_cube.data = res_data
+    res_cube.data = res_data.astype(cube.dtype)
     res_cube.metadata = res_metadata
     res_cube.cell_methods = [*cube.cell_methods, CellMethod(metric, coords)]
 
@@ -429,12 +430,13 @@ def _get_weights(
     coords: Iterable[Coord] | Iterable[str],
 ) -> da.Array:
     """Calculate weights for weighted distance metrics."""
-    weights = da.ones(cube.shape, dtype=cube.dtype)
+    npx = get_array_module(cube.core_data())
+    weights = npx.ones(cube.shape, dtype=cube.dtype)
 
     # Time weights: lengths of time interval
     if 'time' in coords:
         weights *= broadcast_to_shape(
-            da.array(get_time_weights(cube)),
+            npx.array(get_time_weights(cube)),
             cube.shape,
             cube.coord_dims('time'),
         )
@@ -444,13 +446,14 @@ def _get_weights(
         cube = cube.copy()  # avoid overwriting input cube
         if (
                 not cube.cell_measures('cell_area') and
-                not cube.coord('longitude')
+                not cube.coords('longitude')
         ):
             raise CoordinateNotFoundError(
-                f"Cube {cube.summary(shorten=True)} need a 'longitude' "
-                f"coordinate to calculate weighted distance metric over "
-                f"coordinates {coords} (alternatively, a `cell_area` can be "
-                f"given to the cube)"
+                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
+                f"coordinate to calculate cell area weights for weighted "
+                f"distance metric over coordinates {coords} (alternatively, "
+                f"a `cell_area` can be given to the cube as supplementary "
+                f"variable)"
             )
         _try_adding_calculated_cell_area(cube)
         weights *= broadcast_to_shape(
@@ -498,7 +501,9 @@ def _calculate_pearsonr(
     weighted: bool,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
+    axis = _get_all_coord_dims(cube, coords)
+    weights = _get_weights(cube, coords) if weighted else None
     # TODO: change!!!
-    data = cube.collapsed(coords, iris.analysis.MEAN)
+    data = cube.collapsed(coords, iris.analysis.MEAN).core_data()
     metadata = cube.metadata
     return (data, metadata)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 375177d89e..65ba226c0d 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -1,5 +1,7 @@
 """Unit tests for :mod:`esmvalcore.preprocessor._compare_with_refs`."""
 
+import contextlib
+
 import dask.array as da
 import iris
 import numpy as np
@@ -7,6 +9,7 @@
 from cf_units import Unit
 from iris.coords import CellMeasure, CellMethod
 from iris.cube import Cube, CubeList
+from iris.exceptions import CoordinateNotFoundError
 
 from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
 from tests import PreprocessorFile
@@ -97,6 +100,7 @@ def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
     assert product_a.attributes == {'units': units, 'dataset': 'a'}
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
@@ -111,6 +115,7 @@ def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
     assert product_b.attributes == {'units': units, 'dataset': 'b'}
     assert len(product_b.cubes) == 1
     out_cube = product_b.cubes[0]
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
@@ -131,6 +136,7 @@ def test_bias_cubes(regular_cubes, ref_cubes, bias_type, data, units):
     assert len(out_cubes) == 1
     out_cube = out_cubes[0]
 
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
@@ -157,6 +163,7 @@ def test_bias_cubes_broadcastable(
     assert len(out_cubes) == 1
     out_cube = out_cubes[0]
 
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
@@ -186,6 +193,7 @@ def test_denominator_mask_threshold_products(regular_cubes, ref_cubes):
     assert product_a.attributes == {'units': '1', 'dataset': 'a'}
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
     expected_data = np.ma.masked_equal([[[42.0, 42.0],
                                          [42.0, 42.0]],
                                         [[42.0, 42.0],
@@ -213,7 +221,7 @@ def test_denominator_mask_threshold_cubes(regular_cubes, ref_cubes):
     assert isinstance(out_cubes, CubeList)
     assert len(out_cubes) == 1
     out_cube = out_cubes[0]
-
+    assert out_cube.dtype == np.float32
     expected_data = np.ma.masked_equal([[[42.0, 42.0],
                                          [42.0, 42.0]],
                                         [[42.0, 42.0],
@@ -246,6 +254,7 @@ def test_keep_reference_dataset(regular_cubes, ref_cubes, bias_type):
     assert product_ref.attributes == {'reference_for_bias': True}
     assert len(product_ref.cubes) == 1
     out_cube = product_ref.cubes[0]
+    assert out_cube.dtype == np.float32
     expected_data = [[[2.0, 2.0], [2.0, 2.0]], [[2.0, 2.0], [2.0, 4.0]]]
     assert_array_equal(out_cube.data, expected_data)
     assert out_cube.var_name == 'tas'
@@ -280,6 +289,7 @@ def test_bias_products_and_ref_cube(
     assert product_a.attributes == {'units': units, 'dataset': 'a'}
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
@@ -383,6 +393,7 @@ def test_distance_metric(
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
@@ -400,6 +411,7 @@ def test_distance_metric(
     assert len(product_b.cubes) == 1
     out_cube = product_b.cubes[0]
     assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
@@ -419,6 +431,7 @@ def test_distance_metric(
     assert len(product_ref.cubes) == 1
     out_cube = product_ref.cubes[0]
     assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
     assert_array_equal(out_cube.data, 0.0)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
@@ -477,6 +490,7 @@ def test_distance_metric_lazy(
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.shape == (2,)
+    assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
     assert_array_equal(
         out_cube.data, np.array(data, dtype=np.float32),
@@ -508,6 +522,73 @@ def test_distance_metric_cubes(
     out_cube = out_cubes[0]
 
     assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
+    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+
+
+@pytest.mark.parametrize('lazy', [True, False])
+@pytest.mark.parametrize(
+    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_masked_data(
+    regular_cubes, ref_cubes, metric, data, long_name, var_name, units, lazy
+):
+    """Test `distance_metric` with masked data."""
+    # Test cube
+    time_units = Unit('days since 1850-01-01 00:00:00')
+    times = iris.coords.DimCoord([3.0, 7.0, 9.0],
+                                 bounds=[[0.0, 6.0], [6.0, 8.0], [8.0, 10.0]],
+                                 standard_name='time',
+                                 var_name='time', long_name='time',
+                                 units=time_units)
+    lats = regular_cubes[0].coord('latitude')
+    lons = regular_cubes[0].coord('longitude')
+    coord_specs = [(times, 0), (lats, 1), (lons, 2)]
+    cube_data = np.pad(
+        regular_cubes[0].data,
+        ((0, 1), (0, 0), (0, 0)),
+        'constant',
+        constant_values=np.nan,
+    )
+    cube = Cube(
+        np.ma.masked_invalid(cube_data), dim_coords_and_dims=coord_specs
+    )
+    cube.metadata = regular_cubes[0].metadata
+    cube.add_cell_measure(AREA_WEIGHTS, (1, 2))
+
+    # Ref cube
+    ref_cube = cube.copy()
+    ref_data = np.pad(
+        ref_cubes[0].data,
+        ((0, 1), (0, 0), (0, 0)),
+        'constant',
+        constant_values=np.nan,
+    )
+    ref_cube.data = np.ma.masked_invalid(ref_data)
+    ref_cube.metadata = ref_cubes[0].metadata
+
+    if lazy:
+        cube.data = da.array(cube.data)
+        ref_cube.data = da.array(ref_cube.data)
+
+    out_cubes = distance_metric([cube], metric, ref_cube=ref_cube)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    if lazy:
+        assert out_cube.has_lazy_data()
+    else:
+        assert not out_cube.has_lazy_data()
     assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
@@ -620,3 +701,35 @@ def test_distance_metric_non_matching_dims(regular_cubes, metric):
     )
     with pytest.raises(ValueError, match=msg):
         distance_metric(regular_cubes, metric, ref_cube=ref_cube)
+
+
+@pytest.mark.parametrize(
+    'metric,error',
+    [
+        ('weighted_rmse', True),
+        ('rmse', False),
+        ('weighted_pearsonr', True),
+        ('pearsonr', False),
+    ]
+)
+def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):
+    """Test distance metric with cubes that have no longitude."""
+    regular_cubes[0].remove_coord('longitude')
+    ref_cube = regular_cubes[0].copy()
+    msg = (
+        r"Cube .* needs a `longitude` coordinate to calculate cell area "
+        r"weights for weighted distance metric over coordinates \['time', "
+        r"'latitude'\] \(alternatively, a `cell_area` can be given to the "
+        r"cube as supplementary variable\)"
+    )
+    if error:
+        context = pytest.raises(CoordinateNotFoundError, match=msg)
+    else:
+        context = contextlib.nullcontext()
+    with context:
+        distance_metric(
+            regular_cubes,
+            metric,
+            ref_cube=ref_cube,
+            coords=['time', 'latitude']
+        )

From 6f37ccbf2d316ad610a94a76f591e590a8ba3527 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 12:11:48 +0100
Subject: [PATCH 08/51] Added doc and allowed arbitrary kwargs for
 distance_metric

---
 doc/recipe/preprocessor.rst                   | 118 ++++++++++++++++--
 esmvalcore/preprocessor/_compare_with_refs.py |  51 ++++++--
 .../test_compare_with_refs.py                 |   2 +-
 3 files changed, 147 insertions(+), 24 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 5f3ae8691d..77727cbb35 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -27,7 +27,7 @@ roughly following the default order in which preprocessor functions are applied:
 * :ref:`Detrend`
 * :ref:`Rolling window statistics`
 * :ref:`Unit conversion`
-* :ref:`Bias`
+* :ref:`Comparison with reference dataset`
 * :ref:`Other`
 
 See :ref:`preprocessor_functions` for implementation details and the exact default order.
@@ -304,11 +304,12 @@ In ESMValCore we call both types of variables "supplementary variables".
 ===================================================================== ============================== =====================================
 Preprocessor                                                          Variable short name            Variable standard name
 ===================================================================== ============================== =====================================
-:ref:`area_statistics<area_statistics>` [#f4]                         ``areacella``, ``areacello``   cell_area
-:ref:`mask_landsea<land/sea/ice masking>` [#f4]                       ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
-:ref:`mask_landseaice<ice masking>` [#f3]                             ``sftgif``                     land_ice_area_fraction
-:ref:`volume_statistics<volume_statistics>` [#f4]                     ``volcello``                   ocean_volume
-:ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]  ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`area_statistics<area_statistics>` [#f4]_                        ``areacella``, ``areacello``   cell_area
+:ref:`mask_landsea<land/sea/ice masking>` [#f4]_                      ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`mask_landseaice<ice masking>` [#f3]_                            ``sftgif``                     land_ice_area_fraction
+:ref:`volume_statistics<volume_statistics>` [#f4]_                    ``volcello``                   ocean_volume
+:ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]_ ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`distance_metric<distance_metric>` [#f5]_                        ``areacella``, ``areacello``   cell_area
 ===================================================================== ============================== =====================================
 
 .. [#f3] This preprocessor requires at least one of the mentioned supplementary
@@ -2377,15 +2378,17 @@ the time units in the variable.
 See also :func:`esmvalcore.preprocessor.accumulate_coordinate.`
 
 
-.. _bias:
+.. _comparison_with_refs:
 
-Bias
-====
+Comparison with reference dataset
+=================================
 
-The bias module contains the following preprocessor functions:
+This module contains the following preprocessor functions:
 
 * ``bias``: Calculate absolute or relative biases with respect to a reference
-  dataset
+  dataset.
+* ``distance_metric``: Calculate absolute or relative biases with respect to a
+  reference dataset.
 
 ``bias``
 --------
@@ -2450,6 +2453,99 @@ Example:
 
 See also :func:`esmvalcore.preprocessor.bias`.
 
+.. _distance_metric:
+
+``distance_metric``
+-------------------
+
+This function calculates a distance metric with respect to a given reference
+dataset.
+For this, exactly one input dataset needs to be declared as
+``reference_for_metric: true`` in the recipe, e.g.,
+
+.. code-block:: yaml
+
+  datasets:
+    - {dataset: CanESM5, project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: CESM2,   project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: MIROC6,  project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: ERA-Interim, project: OBS6, tier: 3, type: reanaly, version: 1,
+       reference_for_metric: true}
+
+In the example above, ERA-Interim is used as reference dataset for the distance
+metric calculation.
+All datasets need to have the same shape and coordinates.
+To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
+:func:`esmvalcore.preprocessor.regrid_time` might be helpful.
+
+The ``distance_metric`` preprocessor supports the following arguments in the
+recipe:
+
+* ``metric`` (:obj:`str`): Distance metric that is calculated.
+  Must be one of ``'weighted_rmse'`` (weighted root mean square error),
+  ``'rmse'`` (unweighted root mean square error), ``'weighted_pearsonr'``
+  (weighted Pearson correlation coefficient), ``'pearsonr'`` (unweighted
+  Pearson correlation coefficient).
+
+  .. note::
+    Metrics starting with `weighted_` will calculate weighted distance metrics
+    if possible.
+    Currently, the following `coords` (or any combinations that include them)
+    will trigger weighting: `time` (will use lengths of time intervals as
+    weights) and `latitude` (will use cell area weights).
+    Time weights are always calculated from the input data.
+    Area weights can be given as supplementary variables to the recipe
+    (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+    calculated from the input data (this only works for regular grids).
+    By default, **NO** supplementary variables will be used; they need to be
+    explicitly requested in the recipe.
+* ``coords`` (:obj:`list` of :obj:`str`, default: ``None``): Coordinates over
+  which the distance metric is calculated.
+  If ``None``, calculate the metric over all coordinates, which results in a
+  scalar cube.
+* ``keep_reference_dataset`` (:obj:`bool`, default: ``True``): If ``True``,
+  also calculate the distance of the reference dataset with itself.
+  If ``False``, drop the reference dataset.
+* ``exclude`` (:obj:`list` of :obj:`str`): Exclude specific datasets from
+  this preprocessor.
+  Note that this option is only available in the recipe, not when using
+  :func:`esmvalcore.preprocessor.distance_metric` directly (e.g., in another
+  python script).
+  If the reference dataset has been excluded, an error is raised.
+* Other parameters are directly used for the metric calculation:
+  The following keyword arguments are supported:
+
+  - `weighted_rmse` and `rmse`: none.
+  - `weighted_pearsonr` and `pearsonr`:
+    - ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
+      The missing data fraction is calculated based on the number of grid cells
+      masked in both cubes.
+      If this fraction exceed ``mdtol``, the returned value in the
+      corresponding cell is masked.
+      ``mdtol=0`` means no missing data is tolerated while ``mdtol=1`` means
+      the resulting element will be masked if and only if all contributing
+      elements are masked in both cubes.
+    - ``common_mask`` (:obj:`bool`, default=``False``): If ``True``, applies a
+      common mask to both cubes so only cells which are unmasked in both cubes
+      contribute to the calculation.
+      If ``False``, the variance for each cube is calculated from all available
+      cells.
+
+Example:
+
+.. code-block:: yaml
+
+    preprocessors:
+      preproc_pearsonr:
+        distance_metric:
+          metric: weighted_pearsonr
+          coords: [latitude, longitude]
+          keep_reference_dataset: true
+          exclude: [CanESM2]
+          common_mask: true
+
+See also :func:`esmvalcore.preprocessor.distance_metric`.
+
 
 .. _Memory use:
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 4fd7e0c6d5..18d91f52b7 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -62,7 +62,8 @@ def bias(
         Cube which is used as reference for the bias calculation. If ``None``,
         `products` needs to be a :obj:`set` of
         `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
-        dataset in `products` needs the facet ``reference_for_bias: true``.
+        dataset in `products` needs the facet ``reference_for_bias: true``. Do
+        not specify this argument in a recipe.
     bias_type:
         Bias type that is calculated. Must be one of ``'absolute'`` (dataset -
         ref) or ``'relative'`` ((dataset - ref) / ref).
@@ -213,6 +214,7 @@ def distance_metric(
     ref_cube: Optional[Cube] = None,
     coords: Iterable[Coord] | Iterable[str] | None = None,
     keep_reference_dataset: bool = True,
+    **kwargs,
 ) -> set[PreprocessorFile] | CubeList:
     """Calculate distance metrics.
 
@@ -253,12 +255,13 @@ def distance_metric(
             :ref:`supplementary_variables`) or calculated from the input data
             (this only works for regular grids). By default, **NO**
             supplementary variables will be used; they need to be explicitly
-            requested.
+            requested in the recipe.
     ref_cube:
         Cube which is used as reference for the distance metric calculation. If
         ``None``, `products` needs to be a :obj:`set` of
-        `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
-        dataset in `products` needs the facet ``reference_for_metric: true``.
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects and exactly
+        one dataset in `products` needs the facet ``reference_for_metric:
+        true``. Do not specify this argument in a recipe.
     coords:
         Coordinates over which the distance metric is calculated. If ``None``,
         calculate the metric over all coordinates, which results in a scalar
@@ -266,6 +269,26 @@ def distance_metric(
     keep_reference_dataset:
         If ``True``, also calculate the distance of the reference dataset with
         itself. If ``False``, drop the reference dataset.
+    **kwargs:
+        Additional options for the metric calculation. The following keyword
+        arguments are supported:
+
+        - `weighted_rmse` and `rmse`: none.
+        - `weighted_pearsonr` and `pearsonr`:
+          - ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
+            The missing data fraction is calculated based on the number of grid
+            cells masked in both cubes.
+            If this fraction exceed ``mdtol``, the returned value in the
+            corresponding cell is masked.
+            ``mdtol=0`` means no missing data is tolerated while ``mdtol=1``
+            means the resulting element will be masked if and only if all
+            contributing elements are masked in both cubes.
+          - ``common_mask`` (:obj:`bool`, default=``False``): If ``True``,
+            applies a common mask to both cubes so only cells which are
+            unmasked in both cubes contribute to the calculation.
+            If ``False``, the variance for each cube is calculated from all
+            available cells.
+
 
     Returns
     -------
@@ -285,7 +308,7 @@ def distance_metric(
     iris.exceptions.CoordinateNotFoundError
         `longitude` is not found in cube if a weighted metric shall be
         calculated, `latitude` is in `coords`, and no `cell_area` is given
-        as:ref:`supplementary_variables`.
+        as :ref:`supplementary_variables`.
 
     """
     reference_product = None
@@ -322,7 +345,8 @@ def distance_metric(
     # each element
     if all_cubes_given:
         cubes = [
-            _calculate_metric(c, ref_cube, metric, coords) for c in products
+            _calculate_metric(c, ref_cube, metric, coords, **kwargs)
+            for c in products
         ]
         return CubeList(cubes)
 
@@ -335,7 +359,7 @@ def distance_metric(
         cube = concatenate(product.cubes)
 
         # Calculate distance metric
-        cube = _calculate_metric(cube, ref_cube, metric, coords)
+        cube = _calculate_metric(cube, ref_cube, metric, coords, **kwargs)
 
         # Adapt metadata and provenance information
         product.attributes['standard_name'] = cube.standard_name
@@ -382,6 +406,7 @@ def _calculate_metric(
     ref_cube: Cube,
     metric: MetricType,
     coords: Iterable[Coord] | Iterable[str] | None,
+    **kwargs,
 ) -> Cube:
     """Calculate metric for a single cube relative to a reference cube."""
     # Make sure that dimensional metadata of data and ref data is compatible
@@ -404,10 +429,12 @@ def _calculate_metric(
     # possible since some operations (e.g., sqrt()) are not available for cubes
     coords = _get_coords(cube, coords)
     metrics_funcs = {
-        'weighted_rmse': partial(_calculate_rmse, weighted=True),
-        'rmse': partial(_calculate_rmse, weighted=False),
-        'weighted_pearsonr': partial(_calculate_pearsonr, weighted=True),
-        'pearsonr': partial(_calculate_pearsonr, weighted=False),
+        'weighted_rmse': partial(_calculate_rmse, weighted=True, **kwargs),
+        'rmse': partial(_calculate_rmse, weighted=False, **kwargs),
+        'weighted_pearsonr': partial(
+            _calculate_pearsonr, weighted=True, **kwargs
+        ),
+        'pearsonr': partial(_calculate_pearsonr, weighted=False, **kwargs),
     }
     if metric not in metrics_funcs:
         raise ValueError(
@@ -499,9 +526,9 @@ def _calculate_pearsonr(
     coords: Iterable[Coord] | Iterable[str],
     *,
     weighted: bool,
+    **kwargs,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
-    axis = _get_all_coord_dims(cube, coords)
     weights = _get_weights(cube, coords) if weighted else None
     # TODO: change!!!
     data = cube.collapsed(coords, iris.analysis.MEAN).core_data()
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 65ba226c0d..75c9472c31 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -445,7 +445,7 @@ def test_distance_metric(
 
 
 TEST_DISTANCE_METRICS_LAZY = [
-    # ('weighted_rmse', 2.0, 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_rmse', [1.2278657, 3.0784798], 'RMSE', 'rmse_tas', 'K'),
     ('rmse', [1.224744871, 3.082207001], 'RMSE', 'rmse_tas', 'K'),
 ]
 

From 9a8481bf6b19feb25374f03ee38d42230067ede4 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 13:08:36 +0100
Subject: [PATCH 09/51] Added pearson r

---
 doc/recipe/preprocessor.rst                   |  4 +-
 esmvalcore/preprocessor/_compare_with_refs.py | 25 ++++++---
 .../test_compare_with_refs.py                 | 51 +++++++++++++------
 3 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 77727cbb35..947dd708ba 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2517,7 +2517,7 @@ recipe:
 
   - `weighted_rmse` and `rmse`: none.
   - `weighted_pearsonr` and `pearsonr`:
-    - ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
+    - ``mdtol`` (:obj:`float`, default: 1.0): Tolerance of missing data.
       The missing data fraction is calculated based on the number of grid cells
       masked in both cubes.
       If this fraction exceed ``mdtol``, the returned value in the
@@ -2525,7 +2525,7 @@ recipe:
       ``mdtol=0`` means no missing data is tolerated while ``mdtol=1`` means
       the resulting element will be masked if and only if all contributing
       elements are masked in both cubes.
-    - ``common_mask`` (:obj:`bool`, default=``False``): If ``True``, applies a
+    - ``common_mask`` (:obj:`bool`, default: ``False``): If ``True``, applies a
       common mask to both cubes so only cells which are unmasked in both cubes
       contribute to the calculation.
       If ``False``, the variance for each cube is calculated from all available
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 18d91f52b7..48c108a2d8 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -8,6 +8,7 @@
 
 import dask.array as da
 import iris.analysis
+import iris.analysis.stats
 import numpy as np
 from iris.common.metadata import CubeMetadata
 from iris.coords import CellMethod, Coord
@@ -420,9 +421,8 @@ def _calculate_metric(
         cube + ref_cube  # dummy operation to check if cubes are compatible
     except Exception as exc:
         raise ValueError(
-            f"Cannot calculate distance metric between cube and reference "
-            f"cube: {str(exc)}"
-        )
+            "Cannot calculate distance metric between cube and reference cube "
+        ) from exc
 
     # Perform the actual calculation of the distance metric
     # Note: we work on arrays here instead of cube to stay as flexible as
@@ -530,7 +530,18 @@ def _calculate_pearsonr(
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
     weights = _get_weights(cube, coords) if weighted else None
-    # TODO: change!!!
-    data = cube.collapsed(coords, iris.analysis.MEAN).core_data()
-    metadata = cube.metadata
-    return (data, metadata)
+    res_cube = iris.analysis.stats.pearsonr(
+        cube, ref_cube, corr_coords=coords, weights=weights, **kwargs
+    )
+    metadata = CubeMetadata(
+        None,
+        (
+            "Pearson's r" if cube.long_name is None
+            else f"Pearson's r of {cube.long_name}"
+        ),
+        'pearsonr' if cube.var_name is None else f'pearsonr_{cube.var_name}',
+        '1',
+        cube.attributes,
+        cube.cell_methods,
+    )
+    return (res_cube.core_data(), metadata)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 75c9472c31..46588ce9e7 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -348,8 +348,10 @@ def test_ref_cube_non_cubes(regular_cubes):
 
 
 TEST_DISTANCE_METRICS = [
-    ('weighted_rmse', 2.0, 'RMSE', 'rmse_tas', 'K'),
-    ('rmse', 2.34520788, 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_rmse', 2.0, 0.0, 'RMSE', 'rmse_tas', 'K'),
+    ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
 ]
 AREA_WEIGHTS = CellMeasure(
     np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
@@ -359,10 +361,17 @@ def test_ref_cube_non_cubes(regular_cubes):
 
 
 @pytest.mark.parametrize(
-    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+    'metric,data,ref_data,long_name,var_name,units', TEST_DISTANCE_METRICS
 )
 def test_distance_metric(
-    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+    regular_cubes,
+    ref_cubes,
+    metric,
+    data,
+    ref_data,
+    long_name,
+    var_name,
+    units,
 ):
     """Test `distance_metric`."""
     regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
@@ -432,7 +441,7 @@ def test_distance_metric(
     out_cube = product_ref.cubes[0]
     assert out_cube.shape == ()
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, 0.0)
+    assert_array_equal(out_cube.data, ref_data)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -447,6 +456,14 @@ def test_distance_metric(
 TEST_DISTANCE_METRICS_LAZY = [
     ('weighted_rmse', [1.2278657, 3.0784798], 'RMSE', 'rmse_tas', 'K'),
     ('rmse', [1.224744871, 3.082207001], 'RMSE', 'rmse_tas', 'K'),
+    (
+        'weighted_pearsonr',
+        [np.nan, 0.7745946],
+        "Pearson's r",
+        'pearsonr_tas',
+        '1',
+    ),
+    ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
 ]
 
 
@@ -492,6 +509,7 @@ def test_distance_metric_lazy(
     assert out_cube.shape == (2,)
     assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
+    print(out_cube.data)
     assert_array_equal(
         out_cube.data, np.array(data, dtype=np.float32),
     )
@@ -508,10 +526,10 @@ def test_distance_metric_lazy(
 
 
 @pytest.mark.parametrize(
-    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+    'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS
 )
 def test_distance_metric_cubes(
-    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+    regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units
 ):
     """Test `distance_metric` with cubes."""
     regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
@@ -535,10 +553,10 @@ def test_distance_metric_cubes(
 
 @pytest.mark.parametrize('lazy', [True, False])
 @pytest.mark.parametrize(
-    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS
+    'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS
 )
 def test_distance_metric_masked_data(
-    regular_cubes, ref_cubes, metric, data, long_name, var_name, units, lazy
+    regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units, lazy
 ):
     """Test `distance_metric` with masked data."""
     # Test cube
@@ -589,7 +607,13 @@ def test_distance_metric_masked_data(
         assert out_cube.has_lazy_data()
     else:
         assert not out_cube.has_lazy_data()
-    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+
+    # Mask handling differs for dask and numpy
+    if np.isnan(data) and not lazy:
+        expected_data = np.ma.masked_invalid(data)
+    else:
+        expected_data = np.array(data, dtype=np.float32)
+    assert_array_equal(out_cube.data, expected_data)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -652,7 +676,7 @@ def test_invalid_metric(regular_cubes, ref_cubes):
         distance_metric(products, 'invalid')
 
 
-@pytest.mark.parametrize('metric', ['rmse', 'pearsonr'])
+@pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_ref_cube_non_cubes(regular_cubes, metric):
     """Test distance metric with ref_cube=None with with cubes."""
     msg = (
@@ -695,10 +719,7 @@ def test_distance_metric_non_matching_dims(regular_cubes, metric):
     ref_cube.remove_coord('time')
     new_coord = iris.coords.DimCoord([0.0, 1.0], var_name='not_time')
     ref_cube.add_dim_coord(new_coord, 0)
-    msg = (
-        "Cannot calculate distance metric between cube and reference cube: "
-        "Insufficient matching coordinate metadata to resolve cubes"
-    )
+    msg = "Cannot calculate distance metric between cube and reference cube"
     with pytest.raises(ValueError, match=msg):
         distance_metric(regular_cubes, metric, ref_cube=ref_cube)
 

From 012d3088553cc5aad260c88fde48589d399e6b02 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 13:10:14 +0100
Subject: [PATCH 10/51] Remove print

---
 .../preprocessor/_compare_with_refs/test_compare_with_refs.py    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 46588ce9e7..814d599665 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -509,7 +509,6 @@ def test_distance_metric_lazy(
     assert out_cube.shape == (2,)
     assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
-    print(out_cube.data)
     assert_array_equal(
         out_cube.data, np.array(data, dtype=np.float32),
     )

From c8138ceaf5697f9de77d36257a3f67c411fe41f7 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 25 Jan 2024 15:06:48 +0100
Subject: [PATCH 11/51] Fixed doc build

---
 doc/recipe/preprocessor.rst                   | 15 ++++++++-------
 esmvalcore/preprocessor/_compare_with_refs.py |  9 +++++----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 947dd708ba..c608b2739b 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -27,7 +27,7 @@ roughly following the default order in which preprocessor functions are applied:
 * :ref:`Detrend`
 * :ref:`Rolling window statistics`
 * :ref:`Unit conversion`
-* :ref:`Comparison with reference dataset`
+* :ref:`comparison_with_ref`
 * :ref:`Other`
 
 See :ref:`preprocessor_functions` for implementation details and the exact default order.
@@ -2378,7 +2378,7 @@ the time units in the variable.
 See also :func:`esmvalcore.preprocessor.accumulate_coordinate.`
 
 
-.. _comparison_with_refs:
+.. _comparison_with_ref:
 
 Comparison with reference dataset
 =================================
@@ -2512,12 +2512,13 @@ recipe:
   :func:`esmvalcore.preprocessor.distance_metric` directly (e.g., in another
   python script).
   If the reference dataset has been excluded, an error is raised.
-* Other parameters are directly used for the metric calculation:
+* Other parameters are directly used for the metric calculation.
   The following keyword arguments are supported:
 
-  - `weighted_rmse` and `rmse`: none.
-  - `weighted_pearsonr` and `pearsonr`:
-    - ``mdtol`` (:obj:`float`, default: 1.0): Tolerance of missing data.
+  * `weighted_rmse` and `rmse`: none.
+  * `weighted_pearsonr` and `pearsonr`:
+
+    * ``mdtol`` (:obj:`float`, default: 1.0): Tolerance of missing data.
       The missing data fraction is calculated based on the number of grid cells
       masked in both cubes.
       If this fraction exceed ``mdtol``, the returned value in the
@@ -2525,7 +2526,7 @@ recipe:
       ``mdtol=0`` means no missing data is tolerated while ``mdtol=1`` means
       the resulting element will be masked if and only if all contributing
       elements are masked in both cubes.
-    - ``common_mask`` (:obj:`bool`, default: ``False``): If ``True``, applies a
+    * ``common_mask`` (:obj:`bool`, default: ``False``): If ``True``, applies a
       common mask to both cubes so only cells which are unmasked in both cubes
       contribute to the calculation.
       If ``False``, the variance for each cube is calculated from all available
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 48c108a2d8..a9072c9ea0 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -274,9 +274,10 @@ def distance_metric(
         Additional options for the metric calculation. The following keyword
         arguments are supported:
 
-        - `weighted_rmse` and `rmse`: none.
-        - `weighted_pearsonr` and `pearsonr`:
-          - ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
+        * `weighted_rmse` and `rmse`: none.
+        * `weighted_pearsonr` and `pearsonr`:
+
+          * ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
             The missing data fraction is calculated based on the number of grid
             cells masked in both cubes.
             If this fraction exceed ``mdtol``, the returned value in the
@@ -284,7 +285,7 @@ def distance_metric(
             ``mdtol=0`` means no missing data is tolerated while ``mdtol=1``
             means the resulting element will be masked if and only if all
             contributing elements are masked in both cubes.
-          - ``common_mask`` (:obj:`bool`, default=``False``): If ``True``,
+          * ``common_mask`` (:obj:`bool`, default=``False``): If ``True``,
             applies a common mask to both cubes so only cells which are
             unmasked in both cubes contribute to the calculation.
             If ``False``, the variance for each cube is calculated from all

From 30a576a223281d57c716f59b5a7d0b227d64d3e7 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Tue, 6 Feb 2024 13:11:36 +0100
Subject: [PATCH 12/51] Simplify doc

---
 doc/recipe/preprocessor.rst                   | 18 +++---------------
 esmvalcore/preprocessor/_compare_with_refs.py | 19 +++----------------
 2 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 730d4cba3e..29df996f4f 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2566,21 +2566,9 @@ recipe:
   The following keyword arguments are supported:
 
   * `weighted_rmse` and `rmse`: none.
-  * `weighted_pearsonr` and `pearsonr`:
-
-    * ``mdtol`` (:obj:`float`, default: 1.0): Tolerance of missing data.
-      The missing data fraction is calculated based on the number of grid cells
-      masked in both cubes.
-      If this fraction exceed ``mdtol``, the returned value in the
-      corresponding cell is masked.
-      ``mdtol=0`` means no missing data is tolerated while ``mdtol=1`` means
-      the resulting element will be masked if and only if all contributing
-      elements are masked in both cubes.
-    * ``common_mask`` (:obj:`bool`, default: ``False``): If ``True``, applies a
-      common mask to both cubes so only cells which are unmasked in both cubes
-      contribute to the calculation.
-      If ``False``, the variance for each cube is calculated from all available
-      cells.
+  * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all keyword
+    arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
+    for more details on these arguments).
 
 Example:
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index a9072c9ea0..a14f2f56ee 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -275,22 +275,9 @@ def distance_metric(
         arguments are supported:
 
         * `weighted_rmse` and `rmse`: none.
-        * `weighted_pearsonr` and `pearsonr`:
-
-          * ``mdtol`` (:obj:`float`, default=1.0): Tolerance of missing data.
-            The missing data fraction is calculated based on the number of grid
-            cells masked in both cubes.
-            If this fraction exceed ``mdtol``, the returned value in the
-            corresponding cell is masked.
-            ``mdtol=0`` means no missing data is tolerated while ``mdtol=1``
-            means the resulting element will be masked if and only if all
-            contributing elements are masked in both cubes.
-          * ``common_mask`` (:obj:`bool`, default=``False``): If ``True``,
-            applies a common mask to both cubes so only cells which are
-            unmasked in both cubes contribute to the calculation.
-            If ``False``, the variance for each cube is calculated from all
-            available cells.
-
+        * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all
+          keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
+          see that link for more details on these arguments).
 
     Returns
     -------

From e260846459ab02e9d98dcb6847c3d29a353c1c32 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Tue, 6 Feb 2024 18:35:34 +0100
Subject: [PATCH 13/51] Added first working version of EMD calculation

---
 doc/recipe/preprocessor.rst                   |   5 +-
 esmvalcore/preprocessor/_compare_with_refs.py | 123 +++++++++++++++++-
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 29df996f4f..04e87d5121 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2535,7 +2535,8 @@ recipe:
   Must be one of ``'weighted_rmse'`` (weighted root mean square error),
   ``'rmse'`` (unweighted root mean square error), ``'weighted_pearsonr'``
   (weighted Pearson correlation coefficient), ``'pearsonr'`` (unweighted
-  Pearson correlation coefficient).
+  Pearson correlation coefficient, ``'emd'`` (Earth mover's distance, also
+  known as first Wasserstein metric W<sub>1</sub>).
 
   .. note::
     Metrics starting with `weighted_` will calculate weighted distance metrics
@@ -2569,6 +2570,8 @@ recipe:
   * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all keyword
     arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
     for more details on these arguments).
+  * `emd`: ``nbins`` = number of bins used to create discrete probability
+    mass function of data before calculating the EMD (:obj:`int`, default: 30).
 
 Example:
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index a14f2f56ee..4b7b3accc3 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -6,6 +6,7 @@
 from functools import partial
 from typing import TYPE_CHECKING, Literal, Optional
 
+import dask
 import dask.array as da
 import iris.analysis
 import iris.analysis.stats
@@ -15,6 +16,7 @@
 from iris.cube import Cube, CubeList
 from iris.exceptions import CoordinateNotFoundError
 from iris.util import broadcast_to_shape
+from scipy.stats import wasserstein_distance
 
 from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._io import concatenate
@@ -206,6 +208,7 @@ def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
     'rmse',
     'weighted_pearsonr',
     'pearsonr',
+    'emd',
 ]
 
 
@@ -243,7 +246,8 @@ def distance_metric(
         (weighted root mean square error), ``'rmse'`` (unweighted root mean
         square error), ``'weighted_pearsonr'`` (weighted Pearson correlation
         coefficient), ``'pearsonr'`` (unweighted Pearson correlation
-        coefficient).
+        coefficient), ``'emd'`` (Earth mover's distance, also known as first
+        Wasserstein metric W<sub>1</sub>).
 
         .. note::
             Metrics starting with `weighted_` will calculate weighted distance
@@ -278,6 +282,9 @@ def distance_metric(
         * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
           see that link for more details on these arguments).
+        * `emd`: ``nbins`` = number of bins used to create discrete probability
+          mass function of data before calculating the EMD (:obj:`int`,
+          default: 30).
 
     Returns
     -------
@@ -423,6 +430,7 @@ def _calculate_metric(
             _calculate_pearsonr, weighted=True, **kwargs
         ),
         'pearsonr': partial(_calculate_pearsonr, weighted=False, **kwargs),
+        'emd': partial(_calculate_emd, **kwargs),
     }
     if metric not in metrics_funcs:
         raise ValueError(
@@ -517,10 +525,13 @@ def _calculate_pearsonr(
     **kwargs,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
+    # Data
     weights = _get_weights(cube, coords) if weighted else None
     res_cube = iris.analysis.stats.pearsonr(
         cube, ref_cube, corr_coords=coords, weights=weights, **kwargs
     )
+
+    # Metadata
     metadata = CubeMetadata(
         None,
         (
@@ -532,4 +543,114 @@ def _calculate_pearsonr(
         cube.attributes,
         cube.cell_methods,
     )
+
     return (res_cube.core_data(), metadata)
+
+
+def _calculate_emd(
+    cube: Cube,
+    ref_cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    *,
+    n_bins: int = 30,
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate Earth mover's distance."""
+    # Data
+    axis = _get_all_coord_dims(cube, coords)
+    (bins, bin_centers) = _get_bins(cube, ref_cube, n_bins)
+
+    emd = _calculate_emd_eager(
+        cube.data,
+        ref_cube.data,
+        axis=axis,
+        bins=bins,
+        bin_centers=bin_centers,
+    )
+
+    # Metadata
+    metadata = CubeMetadata(
+        None,
+        'EMD' if cube.long_name is None else f'EMD of {cube.long_name}',
+        'emd' if cube.var_name is None else f'emd_{cube.var_name}',
+        '1',
+        cube.attributes,
+        cube.cell_methods,
+    )
+
+    return (emd, metadata)
+
+
+def _get_bins(
+    cube: Cube,
+    ref_cube: Cube,
+    n_bins: int,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Get bins for discretization of data."""
+    all_data = da.stack([cube.core_data(), ref_cube.core_data()])
+    (min_, max_) = dask.compute(all_data.min(), all_data.max())
+    small_value = (max_ - min_) * 0.01 / n_bins
+    bins = np.linspace(min_ - small_value, max_ + small_value, n_bins + 1)
+    bin_centers = np.convolve(bins, np.ones(2), 'valid') / 2.0
+    return (bins, bin_centers)
+
+
+def _calculate_emd_eager(
+    data: np.ndarray,
+    ref_data: np.ndarray,
+    *,
+    axis: Optional[int | tuple[int, ...]],
+    bins: np.ndarray,
+    bin_centers: np.ndarray,
+) -> np.ndarray:
+    """Calculate Earth mover's distance along axis (eager version)."""
+    pmf = _get_pmf(data, axis, bins)
+    ref_pmf = _get_pmf(ref_data, axis, bins)
+
+    # Get vectorized version of scipy.stats.wasserstein_distance and calculate
+    # EMD metric (= Wasserstein distance)
+    v_wasserstein_distance = np.vectorize(
+        wasserstein_distance, signature='(n),(n),(n),(n)->()'
+    )
+    emd = v_wasserstein_distance(bin_centers, bin_centers, pmf, ref_pmf)
+
+    return emd
+
+
+def _get_pmf(
+    data: np.ndarray,
+    axis: Optional[int | tuple[int, ...]],
+    bins: np.ndarray,
+) -> np.ndarray:
+    """Get probaility mass function (PMF) of data along axis.
+
+    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
+    the dimensions of `data` not appearing in `axis` and `n_bins` is the number
+    of bins.
+
+    """
+    if axis is None:
+        axis = tuple(range(data.ndim))
+    elif isinstance(axis, int):
+        axis = tuple([axis])
+
+    # Create array with shape (x1, x2, ..., y) where the `xi` are the
+    # dimensions of `data` not in `axis` and `y` is the product of the
+    # remaining dimensions
+    remaining_dims = tuple(a for a in range(data.ndim) if a not in axis)
+    shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
+    reshaped_data = data.reshape(*shape_rem_dims, -1)
+
+    # Use vectorized version of np.histogram to get PMF (which has been
+    # normalized by number of samples that entered the histogram calculation,
+    # i.e., `y` from above)
+    def _get_hist_values(*args, **kwargs):
+        return np.histogram(*args, **kwargs)[0]
+
+    v_histogram = np.vectorize(
+        _get_hist_values, excluded=('bins', 'range'), signature='(n)->(m)'
+    )
+    pmf = v_histogram(
+        reshaped_data, bins=bins, range=(bins[0], bins[-1])
+    ) / reshaped_data.shape[-1]
+
+    return pmf

From 73c51ad93c1a576f00e62797fd8c7538c47ea261 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 7 Feb 2024 16:47:42 +0100
Subject: [PATCH 14/51] Implemented lazy EMD

---
 doc/recipe/preprocessor.rst                   |  18 +-
 esmvalcore/preprocessor/_compare_with_refs.py | 155 ++++++++++++------
 2 files changed, 115 insertions(+), 58 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 04e87d5121..b3f3614941 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2532,11 +2532,14 @@ The ``distance_metric`` preprocessor supports the following arguments in the
 recipe:
 
 * ``metric`` (:obj:`str`): Distance metric that is calculated.
-  Must be one of ``'weighted_rmse'`` (weighted root mean square error),
-  ``'rmse'`` (unweighted root mean square error), ``'weighted_pearsonr'``
-  (weighted Pearson correlation coefficient), ``'pearsonr'`` (unweighted
-  Pearson correlation coefficient, ``'emd'`` (Earth mover's distance, also
-  known as first Wasserstein metric W<sub>1</sub>).
+  Must be one of
+
+  * ``'weighted_rmse'``: Weighted root mean square error.
+  * ``'rmse'``: Unweighted root mean square error.
+  * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient.
+  * ``'pearsonr'``: Unweighted Pearson correlation coefficient.
+  * ``'emd'``: Earth mover's distance, also known as first Wasserstein metric
+    `W`$_1$.
 
   .. note::
     Metrics starting with `weighted_` will calculate weighted distance metrics
@@ -2570,8 +2573,9 @@ recipe:
   * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all keyword
     arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
     for more details on these arguments).
-  * `emd`: ``nbins`` = number of bins used to create discrete probability
-    mass function of data before calculating the EMD (:obj:`int`, default: 30).
+  * `emd`: ``n_bins`` = number of bins used to create discrete probability
+    mass function of data before calculating the EMD (:obj:`int`, default:
+    100).
 
 Example:
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 4b7b3accc3..d44ef71e29 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import logging
+import string
 from collections.abc import Iterable
 from functools import partial
 from typing import TYPE_CHECKING, Literal, Optional
@@ -18,6 +19,7 @@
 from iris.util import broadcast_to_shape
 from scipy.stats import wasserstein_distance
 
+from esmvalcore.iris_helpers import rechunk_cube
 from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._io import concatenate
 from esmvalcore.preprocessor._other import get_array_module
@@ -242,12 +244,14 @@ def distance_metric(
         Input datasets/cubes for which the distance metric is calculated
         relative to a reference dataset/cube.
     metric:
-        Distance metric that is calculated. Must be one of ``'weighted_rmse'``
-        (weighted root mean square error), ``'rmse'`` (unweighted root mean
-        square error), ``'weighted_pearsonr'`` (weighted Pearson correlation
-        coefficient), ``'pearsonr'`` (unweighted Pearson correlation
-        coefficient), ``'emd'`` (Earth mover's distance, also known as first
-        Wasserstein metric W<sub>1</sub>).
+        Distance metric that is calculated. Must be one of
+
+        * ``'weighted_rmse'``: Weighted root mean square error.
+        * ``'rmse'``: Unweighted root mean square error.
+        * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient.
+        * ``'pearsonr'``: Unweighted Pearson correlation coefficient.
+        * ``'emd'``: Earth mover's distance, also known as first Wasserstein
+          metric `W`$_1$.
 
         .. note::
             Metrics starting with `weighted_` will calculate weighted distance
@@ -282,9 +286,9 @@ def distance_metric(
         * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
           see that link for more details on these arguments).
-        * `emd`: ``nbins`` = number of bins used to create discrete probability
-          mass function of data before calculating the EMD (:obj:`int`,
-          default: 30).
+        * `emd`: ``n_bins`` = number of bins used to create discrete
+          probability mass function of data before calculating the EMD
+          (:obj:`int`, default: 100).
 
     Returns
     -------
@@ -552,20 +556,26 @@ def _calculate_emd(
     ref_cube: Cube,
     coords: Iterable[Coord] | Iterable[str],
     *,
-    n_bins: int = 30,
+    n_bins: int = 100,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Earth mover's distance."""
+    # Make sure that data is not chunked along `coords`
+    cube = rechunk_cube(cube, coords)
+    ref_cube = rechunk_cube(ref_cube, coords)
+
     # Data
-    axis = _get_all_coord_dims(cube, coords)
+    axes = _get_all_coord_dims(cube, coords)
     (bins, bin_centers) = _get_bins(cube, ref_cube, n_bins)
 
-    emd = _calculate_emd_eager(
-        cube.data,
-        ref_cube.data,
-        axis=axis,
-        bins=bins,
-        bin_centers=bin_centers,
-    )
+    if cube.has_lazy_data() and ref_cube.has_lazy_data():
+        func = partial(
+            _calculate_emd_lazy, axes=axes, bins=bins, bin_centers=bin_centers
+        )
+    else:
+        func = partial(
+            _calculate_emd_eager, axes=axes, bins=bins, bin_centers=bin_centers
+        )
+    emd = func(cube.core_data(), ref_cube.core_data())
 
     # Metadata
     metadata = CubeMetadata(
@@ -594,63 +604,106 @@ def _get_bins(
     return (bins, bin_centers)
 
 
+def _calculate_emd_lazy(
+    data: da.Array,
+    ref_data: da.Array,
+    *,
+    axes: tuple[int, ...],
+    bins: np.ndarray,
+    bin_centers: np.ndarray,
+) -> np.ndarray:
+    """Calculate Earth mover's distance along axes (eager version)."""
+    n_axes = len(axes)
+
+    # da.apply_gufunc transposes the input array so that the axes given by the
+    # `axes` argument to this function are the rightmost dimensions. Thus, we
+    # need to use `axes=(ndim-n_axes, ..., ndim-2, ndim-1)` for
+    # _calculate_emd_eager here.
+    axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
+
+    # The call signature depends also on the number of axes in `axes`, and will
+    # be (a,b,...)->()
+    input_signature = f"({','.join(list(string.ascii_lowercase)[:len(axes)])})"
+    signature = f"{input_signature},{input_signature}->()"
+
+    _calculate_emd_for_chunk = partial(
+        _calculate_emd_eager,
+        axes=axes_in_chunk,
+        bins=bins,
+        bin_centers=bin_centers,
+    )
+    emd = da.apply_gufunc(
+        _calculate_emd_for_chunk,
+        signature,
+        data,
+        ref_data,
+        axes=[axes, axes, ()],
+        output_dtypes=data.dtype,
+    )
+
+    return emd
+
+
 def _calculate_emd_eager(
     data: np.ndarray,
     ref_data: np.ndarray,
     *,
-    axis: Optional[int | tuple[int, ...]],
+    axes: tuple[int, ...],
     bins: np.ndarray,
     bin_centers: np.ndarray,
 ) -> np.ndarray:
-    """Calculate Earth mover's distance along axis (eager version)."""
-    pmf = _get_pmf(data, axis, bins)
-    ref_pmf = _get_pmf(ref_data, axis, bins)
-
-    # Get vectorized version of scipy.stats.wasserstein_distance and calculate
-    # EMD metric (= Wasserstein distance)
-    v_wasserstein_distance = np.vectorize(
-        wasserstein_distance, signature='(n),(n),(n),(n)->()'
-    )
-    emd = v_wasserstein_distance(bin_centers, bin_centers, pmf, ref_pmf)
+    """Calculate Earth mover's distance along axes (eager version)."""
+    pmf = _get_pmf(data, axes, bins)
+    ref_pmf = _get_pmf(ref_data, axes, bins)
 
-    return emd
+    # Get vectorized version of scipy.stats.wasserstein_distance that also
+    # handles masks properly and calculate EMD metric (= First Wasserstein
+    # distance)
+    def calc_emd(arr, ref_arr):
+        if np.ma.is_masked(arr) or np.ma.is_masked(ref_arr):
+            return np.nan
+        return wasserstein_distance(bin_centers, bin_centers, arr, ref_arr)
+
+    v_calc_emd = np.vectorize(calc_emd, signature='(n),(n)->()')
+    emd = v_calc_emd(pmf, ref_pmf)
+
+    return np.ma.masked_invalid(emd)
 
 
 def _get_pmf(
     data: np.ndarray,
-    axis: Optional[int | tuple[int, ...]],
+    axes: tuple[int, ...],
     bins: np.ndarray,
 ) -> np.ndarray:
-    """Get probaility mass function (PMF) of data along axis.
+    """Get probaility mass function (PMF) of data along axes.
 
     This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
-    the dimensions of `data` not appearing in `axis` and `n_bins` is the number
+    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
     of bins.
 
     """
-    if axis is None:
-        axis = tuple(range(data.ndim))
-    elif isinstance(axis, int):
-        axis = tuple([axis])
-
     # Create array with shape (x1, x2, ..., y) where the `xi` are the
-    # dimensions of `data` not in `axis` and `y` is the product of the
+    # dimensions of `data` not in `axes` and `y` is the product of the
     # remaining dimensions
-    remaining_dims = tuple(a for a in range(data.ndim) if a not in axis)
+    remaining_dims = tuple(a for a in range(data.ndim) if a not in axes)
     shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
     reshaped_data = data.reshape(*shape_rem_dims, -1)
 
     # Use vectorized version of np.histogram to get PMF (which has been
-    # normalized by number of samples that entered the histogram calculation,
-    # i.e., `y` from above)
-    def _get_hist_values(*args, **kwargs):
-        return np.histogram(*args, **kwargs)[0]
-
-    v_histogram = np.vectorize(
-        _get_hist_values, excluded=('bins', 'range'), signature='(n)->(m)'
-    )
-    pmf = v_histogram(
-        reshaped_data, bins=bins, range=(bins[0], bins[-1])
-    ) / reshaped_data.shape[-1]
+    # normalized by number of samples that entered the histogram calculation)
+    def _get_hist_values(arr):
+        mask = np.ma.getmaskarray(arr)
+        arr = arr[~mask]
+        return np.histogram(arr, bins=bins, range=(bins[0], bins[-1]))[0]
+
+    v_histogram = np.vectorize(_get_hist_values, signature='(n)->(m)')
+    pmf = v_histogram(reshaped_data)
+
+    # Mask points where all input data was masked (these are the ones where the
+    # PMF sums to 0) and normalize
+    norm = pmf.sum(axis=-1, keepdims=True)
+    mask = np.isclose(norm, 0.0)
+    mask_broadcast = np.broadcast_to(mask, pmf.shape)
+    pmf = np.ma.array(pmf, mask=mask_broadcast) / np.ma.array(norm, mask=mask)
 
     return pmf

From 2a0bf29ed4c2050cdcb1b99f3b42f50d49b014f1 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 7 Feb 2024 16:51:20 +0100
Subject: [PATCH 15/51] Fixed test

---
 .../preprocessor/_compare_with_refs/test_compare_with_refs.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 814d599665..b20d28f0c7 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -669,7 +669,7 @@ def test_invalid_metric(regular_cubes, ref_cubes):
     }
     msg = (
         r"Expected one of \['weighted_rmse', 'rmse', 'weighted_pearsonr', "
-        r"'pearsonr'\] for metric, got 'invalid'"
+        r"'pearsonr', 'emd'\] for metric, got 'invalid'"
     )
     with pytest.raises(ValueError, match=msg):
         distance_metric(products, 'invalid')

From 55ffb12c69ec5e6e4050985026fba332e1fdaf81 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 7 Feb 2024 18:25:58 +0100
Subject: [PATCH 16/51] Add detailed descriptions of all metrics

---
 doc/recipe/preprocessor.rst                   | 67 +++++++++++++++++--
 esmvalcore/preprocessor/_compare_with_refs.py |  5 +-
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index b3f3614941..99abd124e2 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2531,15 +2531,59 @@ To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
 The ``distance_metric`` preprocessor supports the following arguments in the
 recipe:
 
+.. _list_of_distance_metrics:
+
 * ``metric`` (:obj:`str`): Distance metric that is calculated.
   Must be one of
 
-  * ``'weighted_rmse'``: Weighted root mean square error.
-  * ``'rmse'``: Unweighted root mean square error.
-  * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient.
-  * ``'pearsonr'``: Unweighted Pearson correlation coefficient.
-  * ``'emd'``: Earth mover's distance, also known as first Wasserstein metric
-    `W`$_1$.
+  * ``'weighted_rmse'``: `Weighted root mean square error`_.
+
+  .. math::
+
+    WRMSE = \sqrt{\sum_{i=1}^N w_i \left( x_i - r_i \right)^2}
+
+  * ``'rmse'``: `Unweighted root mean square error`_.
+
+  .. math::
+
+    RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N \left( x_i - r_i \right)^2}
+
+  * ``'weighted_pearsonr'``: `Weighted Pearson correlation coefficient`_.
+
+  .. math::
+
+    r = \frac{
+      \sum_{i=1}^N
+      w_i \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+    }{
+      \sqrt{\sum_{i=1}^N w_i \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N w_i \left( r_i - \bar{r} \right)^2}
+    }
+
+  * ``'pearsonr'``: `Unweighted Pearson correlation coefficient`_.
+
+  .. math::
+
+    r = \frac{
+      \sum_{i=1}^N
+      \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+    }{
+      \sqrt{\sum_{i=1}^N \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N \left( r_i - \bar{r} \right)^2}
+    }
+
+  * ``'emd'``: `Earth mover's distance`_, also known as first Wasserstein
+    metric `W`\ :sub:`1`.
+
+  .. math::
+
+    W_1 = ...
+
+  Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
+  and a corresponding reference, respectively (a bar over a variable denotes
+  its arithmetic/weighted mean [the latter for weighted metrics]). `w`\
+  :sub:`i` are weights that sum to one (see note below) and `N` is the total
+  number of samples.
 
   .. note::
     Metrics starting with `weighted_` will calculate weighted distance metrics
@@ -2592,6 +2636,17 @@ Example:
 
 See also :func:`esmvalcore.preprocessor.distance_metric`.
 
+.. _Weighted root mean square error: https://en.wikipedia.org/wiki/
+  Root-mean-square_deviation
+.. _Unweighted root mean square error: https://en.wikipedia.org/wiki/
+  Root-mean-square_deviation
+.. _Unweighted Pearson correlation coefficient: https://en.wikipedia.org/
+  wiki/Pearson_correlation_coefficient
+.. _Weighted Pearson correlation coefficient: https://en.wikipedia.org/
+  wiki/Pearson_correlation_coefficient
+.. _Earth mover's distance: https://en.wikipedia.org/wiki/
+  Earth_mover%27s_distance
+
 
 .. _Memory use:
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index d44ef71e29..04da8b75f4 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -251,7 +251,10 @@ def distance_metric(
         * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient.
         * ``'pearsonr'``: Unweighted Pearson correlation coefficient.
         * ``'emd'``: Earth mover's distance, also known as first Wasserstein
-          metric `W`$_1$.
+          metric `W`\ :sub:`1`.
+
+        A detailed description of these metrics can be found :ref:`here
+        <list_of_distance_metrics>`.
 
         .. note::
             Metrics starting with `weighted_` will calculate weighted distance

From 17019b066b91d08a086040f52797fb6236d00750 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 13:36:48 +0100
Subject: [PATCH 17/51] More detailed description of EMD

---
 doc/recipe/preprocessor.rst                   | 32 ++++++++++++++-----
 esmvalcore/preprocessor/_compare_with_refs.py |  4 +--
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 99abd124e2..0f137f6926 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2574,16 +2574,33 @@ recipe:
 
   * ``'emd'``: `Earth mover's distance`_, also known as first Wasserstein
     metric `W`\ :sub:`1`.
+    The Wasserstein metric measures distances between two probability
+    distributions.
+    Here, we first create discrete probability distributions of the input data
+    through binning, which are then used as input for the Wasserstein metric.
+    The metric is also known as `Earth mover's distance` since, intuitively, it
+    can be seen as the minimum "cost" of turning one pile of earth into another
+    one (pile of earth = probability distribution).
+    This is also known as `optimal transport` problem.
+    Formally, this can be described with a joint probability distribution (or
+    `optimal transport matrix`) γ (whose marginals are the input distributions)
+    that minimizes the "transportation cost":
 
   .. math::
 
-    W_1 = ...
+    W_1 = \min_{\gamma \in \mathbb{R}^{n \times n}_{+}} \sum_{i,j}^{n}
+    \gamma_{ij} \lvert X_i - R_i \rvert \\
+    \textrm{with} ~~ \gamma 1 = p(X);~ \gamma^T 1 = p(R);~ \gamma \ge 0
 
   Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
   and a corresponding reference, respectively (a bar over a variable denotes
-  its arithmetic/weighted mean [the latter for weighted metrics]). `w`\
-  :sub:`i` are weights that sum to one (see note below) and `N` is the total
-  number of samples.
+  its arithmetic/weighted mean [the latter for weighted metrics]).
+  Capital letters (`X`\ :sub:`i` and `R`\ :sub:`i`) refer to bin centers of a
+  discrete probability distribution with values `p`\ (`X`\ :sub:`i`) or `p`\
+  (`R`\ :sub:`i`) and a number of bins `n` (see the argument ``n_bins`` below)
+  that has been derived for the variables `x` and `r` through binning.
+  `w`\ :sub:`i` are weights that sum to one (see note below) and `N` is the
+  total number of samples.
 
   .. note::
     Metrics starting with `weighted_` will calculate weighted distance metrics
@@ -2618,8 +2635,7 @@ recipe:
     arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
     for more details on these arguments).
   * `emd`: ``n_bins`` = number of bins used to create discrete probability
-    mass function of data before calculating the EMD (:obj:`int`, default:
-    100).
+    distribution of data before calculating the EMD (:obj:`int`, default: 100).
 
 Example:
 
@@ -2644,8 +2660,8 @@ See also :func:`esmvalcore.preprocessor.distance_metric`.
   wiki/Pearson_correlation_coefficient
 .. _Weighted Pearson correlation coefficient: https://en.wikipedia.org/
   wiki/Pearson_correlation_coefficient
-.. _Earth mover's distance: https://en.wikipedia.org/wiki/
-  Earth_mover%27s_distance
+.. _Earth mover's distance: https://pythonot.github.io/
+  quickstart.html#computing-wasserstein-distance
 
 
 .. _Memory use:
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 04da8b75f4..31aa04d8d9 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -222,7 +222,7 @@ def distance_metric(
     keep_reference_dataset: bool = True,
     **kwargs,
 ) -> set[PreprocessorFile] | CubeList:
-    """Calculate distance metrics.
+    r"""Calculate distance metrics.
 
     All input datasets need to have identical dimensional coordinates. This can
     for example be ensured with the preprocessors
@@ -290,7 +290,7 @@ def distance_metric(
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
           see that link for more details on these arguments).
         * `emd`: ``n_bins`` = number of bins used to create discrete
-          probability mass function of data before calculating the EMD
+          probability distribition of data before calculating the EMD
           (:obj:`int`, default: 100).
 
     Returns

From d60ba8671f2107ede8d41f5139e83b6093abb5bf Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 15:35:39 +0100
Subject: [PATCH 18/51] Fixed bug in EMD calculation for masked input

---
 esmvalcore/preprocessor/_compare_with_refs.py | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 31aa04d8d9..778f88d29f 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -398,10 +398,12 @@ def _get_all_coord_dims(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str],
 ) -> tuple[int, ...]:
+    """Get sorted list of all coordinate dimensions from coordinates."""
     all_coord_dims = []
     for coord in coords:
         all_coord_dims.extend(cube.coord_dims(coord))
-    return tuple(set(all_coord_dims))
+    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
+    return tuple(sorted_all_coord_dims)
 
 
 def _calculate_metric(
@@ -685,28 +687,28 @@ def _get_pmf(
     of bins.
 
     """
-    # Create array with shape (x1, x2, ..., y) where the `xi` are the
-    # dimensions of `data` not in `axes` and `y` is the product of the
-    # remaining dimensions
+    # Create array with shape (x1, x2, ..., y) where `y` is the product of all
+    # dimensions in `axes` and the `xi` are the remaining dimensions
     remaining_dims = tuple(a for a in range(data.ndim) if a not in axes)
+    reshaped_data = np.transpose(data, axes=(*remaining_dims, *axes))
     shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
-    reshaped_data = data.reshape(*shape_rem_dims, -1)
+    reshaped_data = reshaped_data.reshape(*shape_rem_dims, -1)
 
-    # Use vectorized version of np.histogram to get PMF (which has been
-    # normalized by number of samples that entered the histogram calculation)
+    # Apply vectorized version of np.histogram
     def _get_hist_values(arr):
         mask = np.ma.getmaskarray(arr)
         arr = arr[~mask]
         return np.histogram(arr, bins=bins, range=(bins[0], bins[-1]))[0]
 
     v_histogram = np.vectorize(_get_hist_values, signature='(n)->(m)')
-    pmf = v_histogram(reshaped_data)
+    hist = v_histogram(reshaped_data)
 
-    # Mask points where all input data was masked (these are the ones where the
-    # PMF sums to 0) and normalize
-    norm = pmf.sum(axis=-1, keepdims=True)
+    # Mask points where all input data were masked (these are the ones where
+    # the histograms sums to 0) and normalize histrogram by number of samples
+    # that entered the calculation to get PMF
+    norm = hist.sum(axis=-1, keepdims=True)
     mask = np.isclose(norm, 0.0)
-    mask_broadcast = np.broadcast_to(mask, pmf.shape)
-    pmf = np.ma.array(pmf, mask=mask_broadcast) / np.ma.array(norm, mask=mask)
+    mask_broadcast = np.broadcast_to(mask, hist.shape)
+    pmf = np.ma.array(hist, mask=mask_broadcast) / np.ma.array(norm, mask=mask)
 
     return pmf

From 494f328906100692fd736e1d4ab5d752d4f98bef Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 15:50:03 +0100
Subject: [PATCH 19/51] Added tests for EMD calculation

---
 .../test_compare_with_refs.py                 | 44 ++++++++++---------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index b20d28f0c7..a0f3bef8d2 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -15,15 +15,15 @@
 from tests import PreprocessorFile
 
 
-def assert_array_equal(array_1, array_2):
-    """Assert that (masked) array 1 equals (masked) array 2."""
+def assert_allclose(array_1, array_2):
+    """Assert that (masked) array 1 is close to (masked) array 2."""
     if np.ma.is_masked(array_1) or np.ma.is_masked(array_2):
         np.testing.assert_array_equal(np.ma.getmaskarray(array_1),
                                       np.ma.getmaskarray(array_2))
         mask = np.ma.getmaskarray(array_1)
-        np.testing.assert_array_equal(array_1[~mask], array_2[~mask])
+        np.testing.assert_allclose(array_1[~mask], array_2[~mask])
     else:
-        np.testing.assert_array_equal(array_1, array_2)
+        np.testing.assert_allclose(array_1, array_2)
 
 
 def products_set_to_dict(products):
@@ -101,7 +101,7 @@ def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, data)
+    assert_allclose(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == units
@@ -116,7 +116,7 @@ def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
     assert len(product_b.cubes) == 1
     out_cube = product_b.cubes[0]
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, data)
+    assert_allclose(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == units
@@ -137,7 +137,7 @@ def test_bias_cubes(regular_cubes, ref_cubes, bias_type, data, units):
     out_cube = out_cubes[0]
 
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, data)
+    assert_allclose(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == units
@@ -164,7 +164,7 @@ def test_bias_cubes_broadcastable(
     out_cube = out_cubes[0]
 
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, data)
+    assert_allclose(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == units
@@ -198,7 +198,7 @@ def test_denominator_mask_threshold_products(regular_cubes, ref_cubes):
                                          [42.0, 42.0]],
                                         [[42.0, 42.0],
                                          [42.0, 0.75]]], 42.0)
-    assert_array_equal(out_cube.data, expected_data)
+    assert_allclose(out_cube.data, expected_data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == '1'
@@ -226,7 +226,7 @@ def test_denominator_mask_threshold_cubes(regular_cubes, ref_cubes):
                                          [42.0, 42.0]],
                                         [[42.0, 42.0],
                                          [42.0, 0.75]]], 42.0)
-    assert_array_equal(out_cube.data, expected_data)
+    assert_allclose(out_cube.data, expected_data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == '1'
@@ -256,7 +256,7 @@ def test_keep_reference_dataset(regular_cubes, ref_cubes, bias_type):
     out_cube = product_ref.cubes[0]
     assert out_cube.dtype == np.float32
     expected_data = [[[2.0, 2.0], [2.0, 2.0]], [[2.0, 2.0], [2.0, 4.0]]]
-    assert_array_equal(out_cube.data, expected_data)
+    assert_allclose(out_cube.data, expected_data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == 'K'
@@ -290,7 +290,7 @@ def test_bias_products_and_ref_cube(
     assert len(product_a.cubes) == 1
     out_cube = product_a.cubes[0]
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, data)
+    assert_allclose(out_cube.data, data)
     assert out_cube.var_name == 'tas'
     assert out_cube.standard_name == 'air_temperature'
     assert out_cube.units == units
@@ -352,6 +352,7 @@ def test_ref_cube_non_cubes(regular_cubes):
     ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
     ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
     ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('emd', 1.9866472482681274, 0.0, 'EMD', 'emd_tas', '1'),
 ]
 AREA_WEIGHTS = CellMeasure(
     np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
@@ -403,7 +404,7 @@ def test_distance_metric(
     out_cube = product_a.cubes[0]
     assert out_cube.shape == ()
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -421,7 +422,7 @@ def test_distance_metric(
     out_cube = product_b.cubes[0]
     assert out_cube.shape == ()
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -441,7 +442,7 @@ def test_distance_metric(
     out_cube = product_ref.cubes[0]
     assert out_cube.shape == ()
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, ref_data)
+    assert_allclose(out_cube.data, ref_data)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -464,6 +465,7 @@ def test_distance_metric(
         '1',
     ),
     ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
+    ('emd', [0.980196, 2.9930985], 'EMD', 'emd_tas', '1'),
 ]
 
 
@@ -509,9 +511,8 @@ def test_distance_metric_lazy(
     assert out_cube.shape == (2,)
     assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
-    assert_array_equal(
-        out_cube.data, np.array(data, dtype=np.float32),
-    )
+    print(out_cube.data)
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.coord('time') == regular_cubes[0].coord('time')
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
@@ -540,7 +541,7 @@ def test_distance_metric_cubes(
 
     assert out_cube.shape == ()
     assert out_cube.dtype == np.float32
-    assert_array_equal(out_cube.data, np.array(data, dtype=np.float32))
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -606,13 +607,14 @@ def test_distance_metric_masked_data(
         assert out_cube.has_lazy_data()
     else:
         assert not out_cube.has_lazy_data()
+    assert out_cube.dtype == np.float32
 
     # Mask handling differs for dask and numpy
     if np.isnan(data) and not lazy:
         expected_data = np.ma.masked_invalid(data)
     else:
         expected_data = np.array(data, dtype=np.float32)
-    assert_array_equal(out_cube.data, expected_data)
+    assert_allclose(out_cube.data, expected_data)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name
     assert out_cube.standard_name is None
@@ -627,6 +629,7 @@ def test_distance_metric_masked_data(
     'rmse',
     'weighted_pearsonr',
     'pearsonr',
+    'emd',
 ]
 
 
@@ -730,6 +733,7 @@ def test_distance_metric_non_matching_dims(regular_cubes, metric):
         ('rmse', False),
         ('weighted_pearsonr', True),
         ('pearsonr', False),
+        ('emd', False),
     ]
 )
 def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):

From 3713a7b537f7d07698cdbf5f2bbcf17750a4b441 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 16:29:00 +0100
Subject: [PATCH 20/51] Test distance_metric settings early when running
 recipes

---
 esmvalcore/_recipe/check.py                   |  68 ++++++++---
 esmvalcore/_recipe/recipe.py                  |   2 +
 tests/integration/recipe/test_recipe.py       | 106 +++++++++++++++++-
 .../test_compare_with_refs.py                 |   1 -
 4 files changed, 160 insertions(+), 17 deletions(-)

diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index 9b4b2abb37..54c99422aa 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -4,6 +4,7 @@
 import logging
 import os
 import subprocess
+from functools import partial
 from inspect import getfullargspec
 from pprint import pformat
 from shutil import which
@@ -391,31 +392,56 @@ def differing_timeranges(timeranges, required_vars):
             "Set `timerange` to a common value.")
 
 
-def bias_type(settings: dict) -> None:
-    """Check that bias_type for bias preprocessor is valid."""
-    if 'bias' not in settings:
+def _check_literal(
+    settings: dict,
+    *,
+    step: str,
+    option: str,
+    allowed_values: Iterable[str],
+) -> None:
+    """Check that an option for a preprocessor has a valid value."""
+    if step not in settings:
         return
-    valid_options = ('absolute', 'relative')
-    user_bias_type = settings['bias'].get('bias_type', 'absolute')
-    if user_bias_type not in valid_options:
+    user_value = settings[step].get(option, allowed_values[0])
+    if user_value not in allowed_values:
         raise RecipeError(
-            f"Expected one of {valid_options} for `bias_type`, got "
-            f"'{user_bias_type}'"
+            f"Expected one of {allowed_values} for `{option}`, got "
+            f"'{user_value}'"
         )
 
 
-def reference_for_bias_preproc(products):
-    """Check that exactly one reference dataset for bias preproc is given."""
-    step = 'bias'
+bias_type = partial(
+    _check_literal,
+    step='bias',
+    option='bias_type',
+    allowed_values=('absolute', 'relative'),
+)
+
+
+metric_type = partial(
+    _check_literal,
+    step='distance_metric',
+    option='metric',
+    allowed_values=(
+        'weighted_rmse',
+        'rmse',
+        'weighted_pearsonr',
+        'pearsonr',
+        'emd',
+    ),
+)
+
+
+def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
+    """Check that exactly one reference dataset is given."""
     products = {p for p in products if step in p.settings}
     if not products:
         return
 
-    # Check that exactly one dataset contains the facet ``reference_for_bias:
-    # true``
+    # Check that exactly one dataset contains the specified facet
     reference_products = []
     for product in products:
-        if product.attributes.get('reference_for_bias', False):
+        if product.attributes.get(attr_name, False):
             reference_products.append(product)
     if len(reference_products) != 1:
         products_str = [p.filename for p in products]
@@ -425,13 +451,25 @@ def reference_for_bias_preproc(products):
             ref_products_str = [p.filename for p in reference_products]
             ref_products_str = f":\n{pformat(ref_products_str)}.\n"
         raise RecipeError(
-            f"Expected exactly 1 dataset with 'reference_for_bias: true' in "
+            f"Expected exactly 1 dataset with '{attr_name}: true' in "
             f"products\n{pformat(products_str)},\nfound "
             f"{len(reference_products):d}{ref_products_str}Please also "
             f"ensure that the reference dataset is not excluded with the "
             f"'exclude' option")
 
 
+reference_for_bias_preproc = partial(
+    _check_ref_attributes, step='bias', attr_name='reference_for_bias'
+)
+
+
+reference_for_distance_metric_preproc = partial(
+    _check_ref_attributes,
+    step='distance_metric',
+    attr_name='reference_for_metric',
+)
+
+
 def statistics_preprocessors(settings: dict) -> None:
     """Check options of statistics preprocessors."""
     mm_stats = (
diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
index 4369a7d4ac..8d77724a83 100644
--- a/esmvalcore/_recipe/recipe.py
+++ b/esmvalcore/_recipe/recipe.py
@@ -557,6 +557,7 @@ def _get_preprocessor_products(
             f'{separator.join(sorted(missing_vars))}')
 
     check.reference_for_bias_preproc(products)
+    check.reference_for_distance_metric_preproc(products)
 
     _configure_multi_product_preprocessor(
         products=products,
@@ -658,6 +659,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
     check.statistics_preprocessors(settings)
     check.regridding_schemes(settings)
     check.bias_type(settings)
+    check.metric_type(settings)
 
 
 def _get_preprocessor_task(datasets, profiles, task_name):
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
index f8c6c3535f..63913da2d9 100644
--- a/tests/integration/recipe/test_recipe.py
+++ b/tests/integration/recipe/test_recipe.py
@@ -2841,7 +2841,7 @@ def test_bias_two_refs(tmp_path, patched_datafinder, session):
     assert "found 2" in exc.value.failed_tasks[0].message
 
 
-def test_inlvaid_bias_type(tmp_path, patched_datafinder, session):
+def test_invalid_bias_type(tmp_path, patched_datafinder, session):
     content = dedent("""
         preprocessors:
           test_bias:
@@ -2991,3 +2991,107 @@ def test_deprecated_regridding_scheme(tmp_path, patched_datafinder, session):
             scripts: null
         """)
     get_recipe(tmp_path, content, session)
+
+
+def test_distance_metric_no_ref(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: emd
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5}
+                  - {dataset: CESM2}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true' in "
+        "products"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert msg in exc.value.failed_tasks[0].message
+    assert "found 0" in exc.value.failed_tasks[0].message
+
+
+def test_distance_metric_two_refs(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: emd
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5, reference_for_metric: true}
+                  - {dataset: CESM2, reference_for_metric: true}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true' in "
+        "products"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert msg in exc.value.failed_tasks[0].message
+    assert "found 2" in exc.value.failed_tasks[0].message
+
+
+def test_invalid_metric(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: INVALID
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5}
+                  - {dataset: CESM2, reference_for_metric: true}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected one of ('weighted_rmse', 'rmse', 'weighted_pearsonr', "
+        "'pearsonr', 'emd') for `metric`, got 'INVALID'"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert exc.value.failed_tasks[0].message == msg
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index a0f3bef8d2..2bf350b2e3 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -511,7 +511,6 @@ def test_distance_metric_lazy(
     assert out_cube.shape == (2,)
     assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
-    print(out_cube.data)
     assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
     assert out_cube.coord('time') == regular_cubes[0].coord('time')
     assert out_cube.var_name == var_name

From 2b2b43e131258fd58da9dbeb605e9a79b43a364e Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 16:29:12 +0100
Subject: [PATCH 21/51] Fix mypy

---
 esmvalcore/_recipe/check.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index 54c99422aa..a6099fcd44 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -397,7 +397,7 @@ def _check_literal(
     *,
     step: str,
     option: str,
-    allowed_values: Iterable[str],
+    allowed_values: tuple[str],
 ) -> None:
     """Check that an option for a preprocessor has a valid value."""
     if step not in settings:
@@ -448,8 +448,9 @@ def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
         if not reference_products:
             ref_products_str = ". "
         else:
-            ref_products_str = [p.filename for p in reference_products]
-            ref_products_str = f":\n{pformat(ref_products_str)}.\n"
+            ref_products_str = (
+                f":\n{pformat([p.filename for p in reference_products])}.\n"
+            )
         raise RecipeError(
             f"Expected exactly 1 dataset with '{attr_name}: true' in "
             f"products\n{pformat(products_str)},\nfound "

From 1b9f9d770d1887f5a2a1d3872fc78301afdb1255 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 8 Feb 2024 16:52:59 +0100
Subject: [PATCH 22/51] Added tests for fully masked data

---
 .../test_compare_with_refs.py                 | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 2bf350b2e3..5b299ce4c9 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -623,6 +623,47 @@ def test_distance_metric_masked_data(
     )
 
 
+@pytest.mark.parametrize('lazy', [True, False])
+@pytest.mark.parametrize(
+    'metric,_,__,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_fully_masked_data(
+    regular_cubes, ref_cubes, metric, _, __, long_name, var_name, units, lazy
+):
+    """Test `distance_metric` with fully_masked data."""
+    cube = regular_cubes[0]
+    cube.data = np.ma.masked_invalid(np.full(cube.shape, np.nan))
+    cube.add_cell_measure(AREA_WEIGHTS, (1, 2))
+    ref_cube = ref_cubes[0]
+
+    if lazy:
+        cube.data = da.array(cube.data)
+        ref_cube.data = da.array(ref_cube.data)
+
+    out_cubes = distance_metric([cube], metric, ref_cube=ref_cube)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    if lazy:
+        assert out_cube.has_lazy_data()
+    else:
+        assert not out_cube.has_lazy_data()
+    assert out_cube.dtype == np.float64
+
+    expected_data = np.ma.masked_invalid(np.nan)
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+
+
 TEST_METRICS = [
     'weighted_rmse',
     'rmse',

From a6989d6a14e240e5dd35cf5c5ca4f59b27b43a7d Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 15 Feb 2024 15:17:01 +0100
Subject: [PATCH 23/51] Optimize formula for EMD

---
 doc/recipe/preprocessor.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 0f137f6926..5c5855a847 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2590,15 +2590,16 @@ recipe:
 
     W_1 = \min_{\gamma \in \mathbb{R}^{n \times n}_{+}} \sum_{i,j}^{n}
     \gamma_{ij} \lvert X_i - R_i \rvert \\
-    \textrm{with} ~~ \gamma 1 = p(X);~ \gamma^T 1 = p(R);~ \gamma \ge 0
+    \textrm{with} ~~ \gamma 1 = p_X(X);~ \gamma^T 1 = p_R(R)
 
   Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
   and a corresponding reference, respectively (a bar over a variable denotes
   its arithmetic/weighted mean [the latter for weighted metrics]).
   Capital letters (`X`\ :sub:`i` and `R`\ :sub:`i`) refer to bin centers of a
-  discrete probability distribution with values `p`\ (`X`\ :sub:`i`) or `p`\
-  (`R`\ :sub:`i`) and a number of bins `n` (see the argument ``n_bins`` below)
-  that has been derived for the variables `x` and `r` through binning.
+  discrete probability distribution with values `p`\ :sub:`X`(`X`\ :sub:`i`) or
+  `p`\ :sub:`X` (`R`\ :sub:`i`) and a number of bins `n` (see the argument
+  ``n_bins`` below) that has been derived for the variables `x` and `r` through
+  binning.
   `w`\ :sub:`i` are weights that sum to one (see note below) and `N` is the
   total number of samples.
 

From 58dd3c53bffad63fad037c02c3a9df8f28114a2d Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 15 Feb 2024 15:21:06 +0100
Subject: [PATCH 24/51] ref_cube -> reference in distance_metrics

---
 esmvalcore/preprocessor/_compare_with_refs.py | 52 +++++++++----------
 .../test_compare_with_refs.py                 | 26 +++++-----
 2 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 778f88d29f..5c9874e2fa 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -217,7 +217,7 @@ def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
 def distance_metric(
     products: set[PreprocessorFile] | Iterable[Cube],
     metric: MetricType,
-    ref_cube: Optional[Cube] = None,
+    reference: Optional[Cube] = None,
     coords: Iterable[Coord] | Iterable[str] | None = None,
     keep_reference_dataset: bool = True,
     **kwargs,
@@ -232,10 +232,10 @@ def distance_metric(
     Notes
     -----
     This preprocessor requires a reference dataset, which can be specified with
-    the `ref_cube` argument. If `ref_cube` is ``None``, exactly one input
+    the `reference` argument. If `reference` is ``None``, exactly one input
     dataset in the `products` set needs to have the facet
     ``reference_for_metric: true`` defined in the recipe. Please do **not**
-    specify the option `ref_cube` when using this preprocessor function in a
+    specify the option `reference` when using this preprocessor function in a
     recipe.
 
     Parameters
@@ -268,7 +268,7 @@ def distance_metric(
             (this only works for regular grids). By default, **NO**
             supplementary variables will be used; they need to be explicitly
             requested in the recipe.
-    ref_cube:
+    reference:
         Cube which is used as reference for the distance metric calculation. If
         ``None``, `products` needs to be a :obj:`set` of
         :class:`~esmvalcore.preprocessor.PreprocessorFile` objects and exactly
@@ -305,7 +305,7 @@ def distance_metric(
     ValueError
         Shape and coordinates of products and reference data does not match;
         not exactly one input datasets contains the facet
-        ``reference_for_metric: true`` if ``ref_cube=None`; ``ref_cube=None``
+        ``reference_for_metric: true`` if ``reference=None`; ``reference=None``
         and the input products are given as iterable of
         :class:`~iris.cube.Cube` objects; an invalid ``metric`` has been given.
     iris.exceptions.CoordinateNotFoundError
@@ -318,11 +318,11 @@ def distance_metric(
     all_cubes_given = all(isinstance(p, Cube) for p in products)
 
     # Get reference cube if not explicitly given
-    if ref_cube is None:
+    if reference is None:
         if all_cubes_given:
             raise ValueError(
                 "A list of Cubes is given to this preprocessor; please "
-                "specify a `ref_cube`"
+                "specify a `reference`"
             )
         reference_products = []
         for product in products:
@@ -342,13 +342,13 @@ def distance_metric(
         # earlier in the preprocessing chain of ESMValTool. To make sure that
         # this preprocessor can also be used outside the ESMValTool
         # preprocessing chain, an additional concatenate call is added here.
-        ref_cube = concatenate(reference_product.cubes)
+        reference = concatenate(reference_product.cubes)
 
     # If input is an Iterable of Cube objects, calculate distance metric for
     # each element
     if all_cubes_given:
         cubes = [
-            _calculate_metric(c, ref_cube, metric, coords, **kwargs)
+            _calculate_metric(c, reference, metric, coords, **kwargs)
             for c in products
         ]
         return CubeList(cubes)
@@ -362,7 +362,7 @@ def distance_metric(
         cube = concatenate(product.cubes)
 
         # Calculate distance metric
-        cube = _calculate_metric(cube, ref_cube, metric, coords, **kwargs)
+        cube = _calculate_metric(cube, reference, metric, coords, **kwargs)
 
         # Adapt metadata and provenance information
         product.attributes['standard_name'] = cube.standard_name
@@ -408,21 +408,21 @@ def _get_all_coord_dims(
 
 def _calculate_metric(
     cube: Cube,
-    ref_cube: Cube,
+    reference: Cube,
     metric: MetricType,
     coords: Iterable[Coord] | Iterable[str] | None,
     **kwargs,
 ) -> Cube:
     """Calculate metric for a single cube relative to a reference cube."""
     # Make sure that dimensional metadata of data and ref data is compatible
-    if cube.shape != ref_cube.shape:
+    if cube.shape != reference.shape:
         raise ValueError(
             f"Expected identical shapes of cube and reference cube for "
             f"distance metric calculation, got {cube.shape} and "
-            f"{ref_cube.shape}, respectively"
+            f"{reference.shape}, respectively"
         )
     try:
-        cube + ref_cube  # dummy operation to check if cubes are compatible
+        cube + reference  # dummy operation to check if cubes are compatible
     except Exception as exc:
         raise ValueError(
             "Cannot calculate distance metric between cube and reference cube "
@@ -445,7 +445,7 @@ def _calculate_metric(
         raise ValueError(
             f"Expected one of {list(metrics_funcs)} for metric, got '{metric}'"
         )
-    (res_data, res_metadata) = metrics_funcs[metric](cube, ref_cube, coords)
+    (res_data, res_metadata) = metrics_funcs[metric](cube, reference, coords)
 
     # Get result cube with correct dimensional metadata by using dummy
     # operation (max)
@@ -499,7 +499,7 @@ def _get_weights(
 
 def _calculate_rmse(
     cube: Cube,
-    ref_cube: Cube,
+    reference: Cube,
     coords: Iterable[Coord] | Iterable[str],
     *,
     weighted: bool,
@@ -508,7 +508,7 @@ def _calculate_rmse(
     # Data
     axis = _get_all_coord_dims(cube, coords)
     weights = _get_weights(cube, coords) if weighted else None
-    squared_error = (cube.core_data() - ref_cube.core_data())**2
+    squared_error = (cube.core_data() - reference.core_data())**2
     npx = get_array_module(squared_error)
     rmse = npx.sqrt(npx.ma.average(squared_error, axis=axis, weights=weights))
 
@@ -527,7 +527,7 @@ def _calculate_rmse(
 
 def _calculate_pearsonr(
     cube: Cube,
-    ref_cube: Cube,
+    reference: Cube,
     coords: Iterable[Coord] | Iterable[str],
     *,
     weighted: bool,
@@ -537,7 +537,7 @@ def _calculate_pearsonr(
     # Data
     weights = _get_weights(cube, coords) if weighted else None
     res_cube = iris.analysis.stats.pearsonr(
-        cube, ref_cube, corr_coords=coords, weights=weights, **kwargs
+        cube, reference, corr_coords=coords, weights=weights, **kwargs
     )
 
     # Metadata
@@ -558,7 +558,7 @@ def _calculate_pearsonr(
 
 def _calculate_emd(
     cube: Cube,
-    ref_cube: Cube,
+    reference: Cube,
     coords: Iterable[Coord] | Iterable[str],
     *,
     n_bins: int = 100,
@@ -566,13 +566,13 @@ def _calculate_emd(
     """Calculate Earth mover's distance."""
     # Make sure that data is not chunked along `coords`
     cube = rechunk_cube(cube, coords)
-    ref_cube = rechunk_cube(ref_cube, coords)
+    reference = rechunk_cube(reference, coords)
 
     # Data
     axes = _get_all_coord_dims(cube, coords)
-    (bins, bin_centers) = _get_bins(cube, ref_cube, n_bins)
+    (bins, bin_centers) = _get_bins(cube, reference, n_bins)
 
-    if cube.has_lazy_data() and ref_cube.has_lazy_data():
+    if cube.has_lazy_data() and reference.has_lazy_data():
         func = partial(
             _calculate_emd_lazy, axes=axes, bins=bins, bin_centers=bin_centers
         )
@@ -580,7 +580,7 @@ def _calculate_emd(
         func = partial(
             _calculate_emd_eager, axes=axes, bins=bins, bin_centers=bin_centers
         )
-    emd = func(cube.core_data(), ref_cube.core_data())
+    emd = func(cube.core_data(), reference.core_data())
 
     # Metadata
     metadata = CubeMetadata(
@@ -597,11 +597,11 @@ def _calculate_emd(
 
 def _get_bins(
     cube: Cube,
-    ref_cube: Cube,
+    reference: Cube,
     n_bins: int,
 ) -> tuple[np.ndarray, np.ndarray]:
     """Get bins for discretization of data."""
-    all_data = da.stack([cube.core_data(), ref_cube.core_data()])
+    all_data = da.stack([cube.core_data(), reference.core_data()])
     (min_, max_) = dask.compute(all_data.min(), all_data.max())
     small_value = (max_ - min_) * 0.01 / n_bins
     bins = np.linspace(min_ - small_value, max_ + small_value, n_bins + 1)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 5b299ce4c9..b03075bab3 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -532,7 +532,7 @@ def test_distance_metric_cubes(
 ):
     """Test `distance_metric` with cubes."""
     regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
-    out_cubes = distance_metric(regular_cubes, metric, ref_cube=ref_cubes[0])
+    out_cubes = distance_metric(regular_cubes, metric, reference=ref_cubes[0])
 
     assert isinstance(out_cubes, CubeList)
     assert len(out_cubes) == 1
@@ -595,7 +595,7 @@ def test_distance_metric_masked_data(
         cube.data = da.array(cube.data)
         ref_cube.data = da.array(ref_cube.data)
 
-    out_cubes = distance_metric([cube], metric, ref_cube=ref_cube)
+    out_cubes = distance_metric([cube], metric, reference=ref_cube)
 
     assert isinstance(out_cubes, CubeList)
     assert len(out_cubes) == 1
@@ -640,7 +640,7 @@ def test_distance_metric_fully_masked_data(
         cube.data = da.array(cube.data)
         ref_cube.data = da.array(ref_cube.data)
 
-    out_cubes = distance_metric([cube], metric, ref_cube=ref_cube)
+    out_cubes = distance_metric([cube], metric, reference=ref_cube)
 
     assert isinstance(out_cubes, CubeList)
     assert len(out_cubes) == 1
@@ -719,11 +719,11 @@ def test_invalid_metric(regular_cubes, ref_cubes):
 
 
 @pytest.mark.parametrize('metric', TEST_METRICS)
-def test_distance_metric_ref_cube_non_cubes(regular_cubes, metric):
-    """Test distance metric with ref_cube=None with with cubes."""
+def test_distance_metric_reference_non_cubes(regular_cubes, metric):
+    """Test distance metric with reference=None with with cubes."""
     msg = (
         "A list of Cubes is given to this preprocessor; please specify a "
-        "`ref_cube`"
+        "`reference`"
     )
     with pytest.raises(ValueError, match=msg):
         distance_metric(regular_cubes, metric)
@@ -731,7 +731,7 @@ def test_distance_metric_ref_cube_non_cubes(regular_cubes, metric):
 
 @pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_no_named_dimensions(metric):
-    """Test distance metric with ref_cube=None with with cubes."""
+    """Test distance metric with reference=None with with cubes."""
     ref_cube = Cube([0, 1])
     cubes = CubeList([ref_cube])
     msg = (
@@ -739,31 +739,31 @@ def test_distance_metric_no_named_dimensions(metric):
         "dimensions"
     )
     with pytest.raises(ValueError, match=msg):
-        distance_metric(cubes, metric, ref_cube=ref_cube)
+        distance_metric(cubes, metric, reference=ref_cube)
 
 
 @pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_non_matching_shapes(regular_cubes, metric):
-    """Test distance metric with ref_cube=None with with cubes."""
+    """Test distance metric with cubes of different shapes."""
     ref_cube = Cube(0)
     msg = (
         r"Expected identical shapes of cube and reference cube for distance "
         r"metric calculation, got \(2, 2, 2\) and \(\), respectively"
     )
     with pytest.raises(ValueError, match=msg):
-        distance_metric(regular_cubes, metric, ref_cube=ref_cube)
+        distance_metric(regular_cubes, metric, reference=ref_cube)
 
 
 @pytest.mark.parametrize('metric', TEST_METRICS)
 def test_distance_metric_non_matching_dims(regular_cubes, metric):
-    """Test distance metric with ref_cube=None with with cubes."""
+    """Test distance metric with cubes with difference dimensions."""
     ref_cube = regular_cubes[0].copy()
     ref_cube.remove_coord('time')
     new_coord = iris.coords.DimCoord([0.0, 1.0], var_name='not_time')
     ref_cube.add_dim_coord(new_coord, 0)
     msg = "Cannot calculate distance metric between cube and reference cube"
     with pytest.raises(ValueError, match=msg):
-        distance_metric(regular_cubes, metric, ref_cube=ref_cube)
+        distance_metric(regular_cubes, metric, reference=ref_cube)
 
 
 @pytest.mark.parametrize(
@@ -794,6 +794,6 @@ def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):
         distance_metric(
             regular_cubes,
             metric,
-            ref_cube=ref_cube,
+            reference=ref_cube,
             coords=['time', 'latitude']
         )

From b36af0153ad6d790658c6aadf0c3cc17afa99374 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 15 Feb 2024 15:24:16 +0100
Subject: [PATCH 25/51] ref_cube -> reference in bias preproc

---
 esmvalcore/preprocessor/_compare_with_refs.py | 49 +++++++++----------
 .../test_compare_with_refs.py                 | 10 ++--
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 5c9874e2fa..8f1831baa7 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -36,7 +36,7 @@
 
 def bias(
     products: set[PreprocessorFile] | Iterable[Cube],
-    ref_cube: Optional[Cube] = None,
+    reference: Optional[Cube] = None,
     bias_type: BiasType = 'absolute',
     denominator_mask_threshold: float = 1e-3,
     keep_reference_dataset: bool = False,
@@ -52,10 +52,10 @@ def bias(
 
     Notes
     -----
-    The reference dataset can be specified with the `ref_cube` argument. If
-    `ref_cube` is ``None``, exactly one input dataset in the `products` set
+    The reference dataset can be specified with the `reference` argument. If
+    `reference` is ``None``, exactly one input dataset in the `products` set
     needs to have the facet ``reference_for_bias: true`` defined in the recipe.
-    Please do **not** specify the option `ref_cube` when using this
+    Please do **not** specify the option `reference` when using this
     preprocessor function in a recipe.
 
     Parameters
@@ -63,7 +63,7 @@ def bias(
     products:
         Input datasets/cubes for which the bias is calculated relative to a
         reference dataset/cube.
-    ref_cube:
+    reference:
         Cube which is used as reference for the bias calculation. If ``None``,
         `products` needs to be a :obj:`set` of
         `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
@@ -84,7 +84,7 @@ def bias(
         results.
     keep_reference_dataset:
         If ``True``, keep the reference dataset in the output. If ``False``,
-        drop the reference dataset. Ignored if `ref_cube` is given.
+        drop the reference dataset. Ignored if `reference` is given.
 
     Returns
     -------
@@ -96,40 +96,39 @@ def bias(
     Raises
     ------
     ValueError
-        Not exactly one input datasets contains the facet
-        ``reference_for_bias: true`` if ``ref_cube=None``; ``ref_cube=None``
-        and the input products are given as iterable of
-        :class:`~iris.cube.Cube` objects; ``bias_type`` is not one of
-        ``'absolute'`` or ``'relative'``.
+        Not exactly one input datasets contains the facet ``reference_for_bias:
+        true`` if ``reference=None``; ``reference=None`` and the input products
+        are given as iterable of :class:`~iris.cube.Cube` objects;
+        ``bias_type`` is not one of ``'absolute'`` or ``'relative'``.
 
     """
     ref_product = None
     all_cubes_given = all(isinstance(p, Cube) for p in products)
 
     # Get reference cube if not explicitly given
-    if ref_cube is None:
+    if reference is None:
         if all_cubes_given:
             raise ValueError(
                 "A list of Cubes is given to this preprocessor; please "
-                "specify a `ref_cube`"
+                "specify a `reference`"
             )
-        (ref_cube, ref_product) = _get_ref(products, 'reference_for_bias')
+        (reference, ref_product) = _get_ref(products, 'reference_for_bias')
     else:
         ref_product = None
 
     # Mask reference cube appropriately for relative biases
     if bias_type == 'relative':
-        ref_cube = ref_cube.copy()
-        npx = get_array_module(ref_cube.core_data())
-        ref_cube.data = npx.ma.masked_inside(
-            ref_cube.core_data(),
+        reference = reference.copy()
+        npx = get_array_module(reference.core_data())
+        reference.data = npx.ma.masked_inside(
+            reference.core_data(),
             -denominator_mask_threshold,
             denominator_mask_threshold,
         )
 
     # If input is an Iterable of Cube objects, calculate bias for each element
     if all_cubes_given:
-        cubes = [_calculate_bias(c, ref_cube, bias_type) for c in products]
+        cubes = [_calculate_bias(c, reference, bias_type) for c in products]
         return CubeList(cubes)
 
     # Otherwise, iterate over all input products, calculate bias and adapt
@@ -141,7 +140,7 @@ def bias(
         cube = concatenate(product.cubes)
 
         # Calculate bias
-        cube = _calculate_bias(cube, ref_cube, bias_type)
+        cube = _calculate_bias(cube, reference, bias_type)
 
         # Adapt metadata and provenance information
         product.attributes['units'] = str(cube.units)
@@ -178,20 +177,20 @@ def _get_ref(products, ref_tag: str) -> tuple[Cube, PreprocessorFile]:
     # the preprocessing chain of ESMValTool. To make sure that this
     # preprocessor can also be used outside the ESMValTool preprocessing chain,
     # an additional concatenate call is added here.
-    ref_cube = concatenate(ref_product.cubes)
+    reference = concatenate(ref_product.cubes)
 
-    return (ref_cube, ref_product)
+    return (reference, ref_product)
 
 
-def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
+def _calculate_bias(cube: Cube, reference: Cube, bias_type: BiasType) -> Cube:
     """Calculate bias for a single cube relative to a reference cube."""
     cube_metadata = cube.metadata
 
     if bias_type == 'absolute':
-        cube = cube - ref_cube
+        cube = cube - reference
         new_units = cube.units
     elif bias_type == 'relative':
-        cube = (cube - ref_cube) / ref_cube
+        cube = (cube - reference) / reference
         new_units = '1'
     else:
         raise ValueError(
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index b03075bab3..6cd96cdbec 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -275,7 +275,7 @@ def test_bias_products_and_ref_cube(
 
     out_products = bias(
         products,
-        ref_cube=ref_cube,
+        reference=ref_cube,
         bias_type=bias_type,
         keep_reference_dataset=keep_ref,
     )
@@ -337,11 +337,11 @@ def test_invalid_bias_type(regular_cubes, ref_cubes):
         bias(products, bias_type='invalid_bias_type')
 
 
-def test_ref_cube_non_cubes(regular_cubes):
-    """Test ref_cube=None with with cubes."""
+def test_reference_none_cubes(regular_cubes):
+    """Test reference=None with with cubes."""
     msg = (
         "A list of Cubes is given to this preprocessor; please specify a "
-        "`ref_cube`"
+        "`reference`"
     )
     with pytest.raises(ValueError, match=msg):
         bias(regular_cubes)
@@ -719,7 +719,7 @@ def test_invalid_metric(regular_cubes, ref_cubes):
 
 
 @pytest.mark.parametrize('metric', TEST_METRICS)
-def test_distance_metric_reference_non_cubes(regular_cubes, metric):
+def test_distance_metric_reference_none_cubes(regular_cubes, metric):
     """Test distance metric with reference=None with with cubes."""
     msg = (
         "A list of Cubes is given to this preprocessor; please specify a "

From 0d9f4b291efa4b77f6c65034c7b01170d40cde3f Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Thu, 15 Feb 2024 15:44:09 +0100
Subject: [PATCH 26/51] Fix typo in doc

---
 doc/recipe/preprocessor.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 5c5855a847..71e12748e0 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2596,8 +2596,8 @@ recipe:
   and a corresponding reference, respectively (a bar over a variable denotes
   its arithmetic/weighted mean [the latter for weighted metrics]).
   Capital letters (`X`\ :sub:`i` and `R`\ :sub:`i`) refer to bin centers of a
-  discrete probability distribution with values `p`\ :sub:`X`(`X`\ :sub:`i`) or
-  `p`\ :sub:`X` (`R`\ :sub:`i`) and a number of bins `n` (see the argument
+  discrete probability distribution with values `p`\ :sub:`X`\ (`X`\ :sub:`i`)
+  or `p`\ :sub:`R`\ (`R`\ :sub:`i`) and a number of bins `n` (see the argument
   ``n_bins`` below) that has been derived for the variables `x` and `r` through
   binning.
   `w`\ :sub:`i` are weights that sum to one (see note below) and `N` is the

From b825426908661343aa98da955707d703648272af Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 1 Mar 2024 12:13:50 +0100
Subject: [PATCH 27/51] Avoid potential memory leak

---
 esmvalcore/preprocessor/_compare_with_refs.py | 55 ++++++++++---------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 8f1831baa7..5200de85c3 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -572,14 +572,16 @@ def _calculate_emd(
     (bins, bin_centers) = _get_bins(cube, reference, n_bins)
 
     if cube.has_lazy_data() and reference.has_lazy_data():
-        func = partial(
-            _calculate_emd_lazy, axes=axes, bins=bins, bin_centers=bin_centers
-        )
+        func = _calculate_emd_lazy  # type: ignore
     else:
-        func = partial(
-            _calculate_emd_eager, axes=axes, bins=bins, bin_centers=bin_centers
-        )
-    emd = func(cube.core_data(), reference.core_data())
+        func = _calculate_emd_eager  # type: ignore
+    emd = func(
+        cube.core_data(),
+        reference.core_data(),
+        bins,
+        bin_centers,
+        along_axes=axes,
+    )
 
     # Metadata
     metadata = CubeMetadata(
@@ -611,38 +613,37 @@ def _get_bins(
 def _calculate_emd_lazy(
     data: da.Array,
     ref_data: da.Array,
-    *,
-    axes: tuple[int, ...],
     bins: np.ndarray,
     bin_centers: np.ndarray,
+    *,
+    along_axes: tuple[int, ...],
 ) -> np.ndarray:
     """Calculate Earth mover's distance along axes (eager version)."""
-    n_axes = len(axes)
+    n_axes = len(along_axes)
 
     # da.apply_gufunc transposes the input array so that the axes given by the
-    # `axes` argument to this function are the rightmost dimensions. Thus, we
-    # need to use `axes=(ndim-n_axes, ..., ndim-2, ndim-1)` for
+    # `axes` argument to da.apply_gufunc are the rightmost dimensions. Thus, we
+    # need to use `along_axes=(ndim-n_axes, ..., ndim-2, ndim-1)` for
     # _calculate_emd_eager here.
     axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
 
     # The call signature depends also on the number of axes in `axes`, and will
-    # be (a,b,...)->()
-    input_signature = f"({','.join(list(string.ascii_lowercase)[:len(axes)])})"
-    signature = f"{input_signature},{input_signature}->()"
+    # be (a,b,...),(a,b,...),(z),(y)->() where a,b,... are the data dimensions
+    # that are collapsed, z is the number of bin edges, and y the number of bin
+    # centers.
+    input_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
+    signature = f"{input_signature},{input_signature},(z),(y)->()"
 
-    _calculate_emd_for_chunk = partial(
-        _calculate_emd_eager,
-        axes=axes_in_chunk,
-        bins=bins,
-        bin_centers=bin_centers,
-    )
     emd = da.apply_gufunc(
-        _calculate_emd_for_chunk,
+        _calculate_emd_eager,
         signature,
         data,
         ref_data,
-        axes=[axes, axes, ()],
+        bins,
+        bin_centers,
+        axes=[along_axes, along_axes, (0,), (0,), ()],
         output_dtypes=data.dtype,
+        along_axes=axes_in_chunk,
     )
 
     return emd
@@ -651,14 +652,14 @@ def _calculate_emd_lazy(
 def _calculate_emd_eager(
     data: np.ndarray,
     ref_data: np.ndarray,
-    *,
-    axes: tuple[int, ...],
     bins: np.ndarray,
     bin_centers: np.ndarray,
+    *,
+    along_axes: tuple[int, ...],
 ) -> np.ndarray:
     """Calculate Earth mover's distance along axes (eager version)."""
-    pmf = _get_pmf(data, axes, bins)
-    ref_pmf = _get_pmf(ref_data, axes, bins)
+    pmf = _get_pmf(data, along_axes, bins)
+    ref_pmf = _get_pmf(ref_data, along_axes, bins)
 
     # Get vectorized version of scipy.stats.wasserstein_distance that also
     # handles masks properly and calculate EMD metric (= First Wasserstein

From 5823b1d69ef4e57247e786701b687703cbacabbb Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 27 Mar 2024 16:51:42 +0100
Subject: [PATCH 28/51] Fixed units of EMD

---
 esmvalcore/preprocessor/_compare_with_refs.py  |  2 +-
 .../test_compare_with_refs.py                  | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 5200de85c3..6756a42c82 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -588,7 +588,7 @@ def _calculate_emd(
         None,
         'EMD' if cube.long_name is None else f'EMD of {cube.long_name}',
         'emd' if cube.var_name is None else f'emd_{cube.var_name}',
-        '1',
+        cube.units,
         cube.attributes,
         cube.cell_methods,
     )
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 6cd96cdbec..bc18c1f465 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -56,20 +56,22 @@ def get_3d_cube(data, **cube_kwargs):
 
 @pytest.fixture
 def regular_cubes():
-    """Regular cube."""
+    """Regular cubes."""
     cube_data = np.arange(8.0).reshape(2, 2, 2)
-    cube = get_3d_cube(cube_data, standard_name='air_temperature',
-                       var_name='tas', units='K')
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
     return CubeList([cube])
 
 
 @pytest.fixture
 def ref_cubes():
-    """Reference cube."""
+    """Reference cubes."""
     cube_data = np.full((2, 2, 2), 2.0)
     cube_data[1, 1, 1] = 4.0
-    cube = get_3d_cube(cube_data, standard_name='air_temperature',
-                       var_name='tas', units='K')
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
     return CubeList([cube])
 
 
@@ -352,7 +354,7 @@ def test_reference_none_cubes(regular_cubes):
     ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
     ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
     ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
-    ('emd', 1.9866472482681274, 0.0, 'EMD', 'emd_tas', '1'),
+    ('emd', 1.9866472482681274, 0.0, 'EMD', 'emd_tas', 'K'),
 ]
 AREA_WEIGHTS = CellMeasure(
     np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
@@ -465,7 +467,7 @@ def test_distance_metric(
         '1',
     ),
     ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
-    ('emd', [0.980196, 2.9930985], 'EMD', 'emd_tas', '1'),
+    ('emd', [0.980196, 2.9930985], 'EMD', 'emd_tas', 'K'),
 ]
 
 

From 8a22f5af005225213fbced13cfcdc1a6aec5137e Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 27 Mar 2024 17:09:30 +0100
Subject: [PATCH 29/51] Make rechunk_cube work with any coords

---
 esmvalcore/iris_helpers.py      | 12 +++---------
 tests/unit/test_iris_helpers.py | 14 +++-----------
 2 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/esmvalcore/iris_helpers.py b/esmvalcore/iris_helpers.py
index 9b5fddbfe1..ea61d8f903 100644
--- a/esmvalcore/iris_helpers.py
+++ b/esmvalcore/iris_helpers.py
@@ -236,7 +236,7 @@ def rechunk_cube(
         Input cube.
     complete_coords:
         (Names of) coordinates along which the output cubes should not be
-        chunked. The given coordinates must span exactly 1 dimension.
+        chunked.
     remaining_dims:
         Chunksize of the remaining dimensions.
 
@@ -248,17 +248,11 @@ def rechunk_cube(
     """
     cube = cube.copy()  # do not modify input cube
 
-    # Make sure that complete_coords span exactly 1 dimension
     complete_dims = []
     for coord in complete_coords:
         coord = cube.coord(coord)
-        dims = cube.coord_dims(coord)
-        if len(dims) != 1:
-            raise CoordinateMultiDimError(
-                f"Complete coordinates must be 1D coordinates, got "
-                f"{len(dims):d}D coordinate '{coord.name()}'"
-            )
-        complete_dims.append(dims[0])
+        complete_dims.extend(cube.coord_dims(coord))
+    complete_dims = list(set(complete_dims))
 
     # Rechunk data
     if cube.has_lazy_data():
diff --git a/tests/unit/test_iris_helpers.py b/tests/unit/test_iris_helpers.py
index 0b742ffe30..15d1a2bc01 100644
--- a/tests/unit/test_iris_helpers.py
+++ b/tests/unit/test_iris_helpers.py
@@ -302,7 +302,8 @@ def test_rechunk_cube_fully_lazy(cube_3d):
     assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2)
 
 
-def test_rechunk_cube_partly_lazy(cube_3d):
+@pytest.mark.parametrize('complete_dims', [['x', 'y'], ['xy']])
+def test_rechunk_cube_partly_lazy(cube_3d, complete_dims):
     """Test ``rechunk_cube``."""
     input_cube = cube_3d.copy()
 
@@ -312,7 +313,7 @@ def test_rechunk_cube_partly_lazy(cube_3d):
     input_cube.coord('xyz').bounds
     input_cube.cell_measure('cell_measure').data
 
-    result = rechunk_cube(input_cube, ['x', 'y'], remaining_dims=2)
+    result = rechunk_cube(input_cube, complete_dims, remaining_dims=2)
 
     assert input_cube == cube_3d
     assert result == cube_3d
@@ -333,15 +334,6 @@ def test_rechunk_cube_partly_lazy(cube_3d):
     assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2)
 
 
-def test_rechunk_cube_invalid_coord_fail(cube_3d):
-    """Test ``rechunk_cube``."""
-    msg = (
-        "Complete coordinates must be 1D coordinates, got 2D coordinate 'xy'"
-    )
-    with pytest.raises(CoordinateMultiDimError, match=msg):
-        rechunk_cube(cube_3d, ['xy'])
-
-
 @pytest.fixture
 def lat_coord_1d():
     """1D latitude coordinate."""

From 6f0e2a0b9af23f174eb6c86fc772e623efdabe99 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 27 Mar 2024 17:50:21 +0100
Subject: [PATCH 30/51] Added histogram preprocessor

---
 esmvalcore/preprocessor/_compare_with_refs.py |  48 +--
 esmvalcore/preprocessor/_other.py             | 302 ++++++++++++++++++
 tests/unit/preprocessor/_other/test_other.py  |  75 +++++
 3 files changed, 389 insertions(+), 36 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 6756a42c82..3578f9d3a1 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -22,7 +22,11 @@
 from esmvalcore.iris_helpers import rechunk_cube
 from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._io import concatenate
-from esmvalcore.preprocessor._other import get_array_module
+from esmvalcore.preprocessor._other import (
+    get_all_coord_dims,
+    get_all_coords,
+    get_array_module,
+)
 from esmvalcore.preprocessor._time import get_time_weights
 
 if TYPE_CHECKING:
@@ -377,34 +381,6 @@ def distance_metric(
     return output_products
 
 
-def _get_coords(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str] | None,
-) -> Iterable[Coord] | Iterable[str]:
-    """Get coordinates over which distance metric is calculated."""
-    if coords is None:
-        coords = [c.name() for c in cube.dim_coords]
-        if len(coords) != cube.ndim:
-            raise ValueError(
-                f"If coords=None is specified, the cube "
-                f"{cube.summary(shorten=True)} must not have unnamed "
-                f"dimensions"
-            )
-    return coords
-
-
-def _get_all_coord_dims(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str],
-) -> tuple[int, ...]:
-    """Get sorted list of all coordinate dimensions from coordinates."""
-    all_coord_dims = []
-    for coord in coords:
-        all_coord_dims.extend(cube.coord_dims(coord))
-    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
-    return tuple(sorted_all_coord_dims)
-
-
 def _calculate_metric(
     cube: Cube,
     reference: Cube,
@@ -430,7 +406,7 @@ def _calculate_metric(
     # Perform the actual calculation of the distance metric
     # Note: we work on arrays here instead of cube to stay as flexible as
     # possible since some operations (e.g., sqrt()) are not available for cubes
-    coords = _get_coords(cube, coords)
+    coords = get_all_coords(cube, coords)
     metrics_funcs = {
         'weighted_rmse': partial(_calculate_rmse, weighted=True, **kwargs),
         'rmse': partial(_calculate_rmse, weighted=False, **kwargs),
@@ -459,7 +435,7 @@ def _calculate_metric(
 def _get_weights(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str],
-) -> da.Array:
+) -> np.ndarray | da.Array:
     """Calculate weights for weighted distance metrics."""
     npx = get_array_module(cube.core_data())
     weights = npx.ones(cube.shape, dtype=cube.dtype)
@@ -505,7 +481,7 @@ def _calculate_rmse(
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate root mean square error."""
     # Data
-    axis = _get_all_coord_dims(cube, coords)
+    axis = get_all_coord_dims(cube, coords)
     weights = _get_weights(cube, coords) if weighted else None
     squared_error = (cube.core_data() - reference.core_data())**2
     npx = get_array_module(squared_error)
@@ -568,7 +544,7 @@ def _calculate_emd(
     reference = rechunk_cube(reference, coords)
 
     # Data
-    axes = _get_all_coord_dims(cube, coords)
+    axes = get_all_coord_dims(cube, coords)
     (bins, bin_centers) = _get_bins(cube, reference, n_bins)
 
     if cube.has_lazy_data() and reference.has_lazy_data():
@@ -617,8 +593,8 @@ def _calculate_emd_lazy(
     bin_centers: np.ndarray,
     *,
     along_axes: tuple[int, ...],
-) -> np.ndarray:
-    """Calculate Earth mover's distance along axes (eager version)."""
+) -> da.Array:
+    """Calculate Earth mover's distance along axes (lazy version)."""
     n_axes = len(along_axes)
 
     # da.apply_gufunc transposes the input array so that the axes given by the
@@ -680,7 +656,7 @@ def _get_pmf(
     axes: tuple[int, ...],
     bins: np.ndarray,
 ) -> np.ndarray:
-    """Get probaility mass function (PMF) of data along axes.
+    """Get probability mass function (PMF) of data along axes.
 
     This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
     the dimensions of `data` not appearing in `axes` and `n_bins` is the number
diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 14594ba7cf..6e6140c0cd 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -1,10 +1,20 @@
 """Preprocessor functions that do not fit into any of the categories."""
+from __future__ import annotations
 
 import logging
+import string
 from collections import defaultdict
+from collections.abc import Iterable, Sequence
+from typing import Literal
 
+import dask
 import dask.array as da
+import iris.analysis
 import numpy as np
+from iris.coords import CellMethod, Coord, DimCoord
+from iris.cube import Cube
+
+from esmvalcore.iris_helpers import add_leading_dim_to_cube, rechunk_cube
 
 logger = logging.getLogger(__name__)
 
@@ -85,3 +95,295 @@ def get_array_module(*args):
         if isinstance(arg, da.Array):
             return da
     return np
+
+
+def get_all_coords(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None,
+) -> Iterable[Coord] | Iterable[str]:
+    """Get all desired coordinates in a cube."""
+    if coords is None:
+        coords = [c.name() for c in cube.dim_coords]
+        if len(coords) != cube.ndim:
+            raise ValueError(
+                f"If coords=None is specified, the cube "
+                f"{cube.summary(shorten=True)} must not have unnamed "
+                f"dimensions"
+            )
+    return coords
+
+
+def get_all_coord_dims(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[int, ...]:
+    """Get sorted list of all coordinate dimensions from coordinates."""
+    all_coord_dims = []
+    for coord in coords:
+        all_coord_dims.extend(cube.coord_dims(coord))
+    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
+    return tuple(sorted_all_coord_dims)
+
+
+def histogram(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None = None,
+    bins: int | Sequence[float] = 10,
+    bin_range: tuple[float, float] | None = None,
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> Cube:
+    """Calculate histogram.
+
+    Very similar to :func:`numpy.histogram`, but calculates histogram only over
+    the given coordinates.
+
+    Handles lazy data and masked data.
+
+    Parameters
+    ----------
+    cube:
+        Input cube.
+    coords:
+        Coordinates over which the histogram is calculated. If ``None``,
+        calculate the histogram over all coordinates, which results in a scalar
+        cube.
+    bins:
+        If `bins` is an :obj:`int`, it defines the number of equal-width bins
+        in the given `bin_range`. If `bins` is a sequence, it defines a
+        monotonically increasing array of bin edges, including the rightmost
+        edge, allowing for non-uniform bin widths.
+    bin_range:
+        The lower and upper range of the bins. If not provided, `bin_range` is
+        simply (``cube.core_data().min(), cube.core_data().max()``). Values
+        outside the range are ignored. The first element of the range must be
+        less than or equal to the second. `bin_range` affects the automatic bin
+        computation as well if `bins` is an :obj:`int` (see description for
+        `bins` above).
+    normalization:
+        If ``None``, the result will contain the number of samples in each bin.
+        If ``'integral'``, the result is the value of the probability `density`
+        function at the bin, normalized such that the integral over the range
+        is 1. If ``'sum'``, the result is the value of the probability
+        `mass` function at the bin, normalized such that the sum over
+        the range is 1. Normalization will be applied across `coords`, not the
+        entire cube.
+
+    Returns
+    -------
+    Cube
+        Histogram cube. The shape of this cube will be `(x1, x2, ..., n_bins)`,
+        where `xi` are the dimensions of the input cube not appearing in
+        `coords` and `n_bins` is the number of bins.
+
+    Raises
+    ------
+    TypeError
+        Invalid `bin` type given
+    ValueError
+        Invalid `normalization` given.
+
+    """
+    # Check arguments
+    if isinstance(bins, str):
+        raise TypeError("bins cannot be a str, must be int or Sequence of int")
+    allowed_norms = (None, 'sum', 'integral')
+    if normalization is not None and normalization not in allowed_norms:
+        raise ValueError(
+            f"Expected one of {allowed_norms} for normalization, got "
+            f"'{normalization}'"
+        )
+
+    # Calculate bin edges
+    if bin_range is None:
+        bin_range = dask.compute(
+            cube.core_data().min(), cube.core_data().max()
+        )
+    if isinstance(bins, int):
+        bin_edges = np.linspace(
+            bin_range[0], bin_range[1], bins + 1, dtype=np.float64
+        )
+    else:
+        bin_edges = np.array(bins, dtype=np.float64)
+
+    # If histogram is calculated over all coordinates, we can use
+    # dask.array.histogram and do not need to worry about chunks; otherwise,
+    # make sure that the cube is not chunked along the given coordinates
+    coords = get_all_coords(cube, coords)
+    axes = get_all_coord_dims(cube, coords)
+    if cube.has_lazy_data() and len(axes) == cube.ndim:
+        cube = rechunk_cube(cube, coords)
+
+    # Calculate histogram
+    if cube.has_lazy_data():
+        func = _calculate_histogram_lazy  # type: ignore
+    else:
+        func = _calculate_histogram_eager  # type: ignore
+    hist_data = func(
+        cube.core_data(),
+        along_axes=axes,
+        bin_edges=bin_edges,
+        bin_range=bin_range,
+        normalization=normalization,
+    )
+
+    # Get final cube with correct metadata and data
+    hist_cube = _get_histogram_cube(cube, coords, bin_edges, normalization)
+    hist_cube.data = hist_data.astype(cube.dtype)
+
+    return hist_cube
+
+
+def _calculate_histogram_lazy(
+    data: da.Array,
+    *,
+    along_axes: tuple[int, ...],
+    bin_edges: np.ndarray,
+    bin_range: tuple[float, float],
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> da.Array:
+    """Calculate histogram over data along axes (lazy version).
+
+    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
+    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
+    of bins.
+
+    """
+    n_axes = len(along_axes)
+
+    # If histogram is calculated over all axes, use the efficient da.histogram
+    # function
+    if n_axes == data.ndim:
+        data = data.ravel()
+        data = data[~da.ma.getmaskarray(data)]
+        hist = da.histogram(data, bins=bin_edges, range=bin_range)[0]
+        if normalization == 'sum':
+            hist = hist / hist.sum()
+        elif normalization == 'integral':
+            diffs = np.array(np.diff(bin_edges), dtype=data.dtype)
+            hist = hist / hist.sum() / diffs
+        hist = da.ma.masked_invalid(hist)
+
+    # (2) Otherwise, use da.apply_gufunc with the eager version
+    # _calculate_histogram_eager
+    else:
+        # da.apply_gufunc transposes the input array so that the axes given by
+        # the `axes` argument to da.apply_gufunc are the rightmost dimensions.
+        # Thus, we need to use `along_axes=(ndim-n_axes, ..., ndim-2, ndim-1)`
+        # for _calculate_histogram_eager here.
+        axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
+
+        # The call signature depends also on the number of axes in `axes`, and
+        # will be (a,b,...),(a,b,...),(z),(y)->() where a,b,... are the data
+        # dimensions that are collapsed, z is the number of bin edges, and y
+        # the number of bin centers.
+        input_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
+        hist = da.apply_gufunc(
+            _calculate_histogram_eager,
+            f"{input_signature}->(nbins)",
+            data,
+            axes=[along_axes, (0,)],
+            output_sizes={'nbins': len(bin_edges) - 1},
+            along_axes=axes_in_chunk,
+            bin_edges=bin_edges,
+            bin_range=bin_range,
+            normalization=normalization,
+        )
+
+    return hist
+
+
+def _calculate_histogram_eager(
+    data: np.ndarray,
+    *,
+    along_axes: tuple[int, ...],
+    bin_edges: np.ndarray,
+    bin_range: tuple[float, float],
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> np.ndarray:
+    """Calculate histogram over data along axes (eager version).
+
+    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
+    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
+    of bins.
+
+    """
+    # Create array with shape (x1, x2, ..., y) where `y` is the product of all
+    # dimensions in `axes` and the `xi` are the remaining dimensions
+    remaining_dims = tuple(a for a in range(data.ndim) if a not in along_axes)
+    reshaped_data = np.transpose(data, axes=(*remaining_dims, *along_axes))
+    shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
+    reshaped_data = reshaped_data.reshape(*shape_rem_dims, -1)
+
+    # Apply vectorized version of np.histogram
+    def _get_hist_values(arr):
+        mask = np.ma.getmaskarray(arr)
+        arr = arr[~mask]
+        return np.histogram(arr, bins=bin_edges, range=bin_range)[0]
+
+    v_histogram = np.vectorize(_get_hist_values, signature='(n)->(m)')
+    hist = v_histogram(reshaped_data)
+
+    # Mask points where all input data were masked (these are the ones where
+    # the histograms sums to 0)
+    hist_sum = hist.sum(axis=-1, keepdims=True)
+    mask = np.isclose(hist_sum, 0.0)
+    mask_broadcast = np.broadcast_to(mask, hist.shape)
+    hist = np.ma.array(hist, mask=mask_broadcast)
+
+    # Apply normalization
+    if normalization == 'sum':
+        hist = hist / np.ma.array(hist_sum, mask=mask)
+    elif normalization == 'integral':
+        diffs = np.ma.array(np.diff(bin_edges), dtype=data.dtype)
+        hist = hist / np.ma.array(hist_sum, mask=mask) / diffs
+
+    return hist
+
+
+def _get_histogram_cube(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    bin_edges: np.ndarray,
+    normalization: Literal['sum', 'integral'] | None,
+):
+    """Get cube with correct metadata for histogram."""
+    # Calculate bin centers using 2-window running mean and get corresponding
+    # coordinate
+    bin_centers = np.convolve(bin_edges, np.ones(2), 'valid') / 2.0
+    bin_coord = DimCoord(
+        bin_centers,
+        bounds=np.stack((bin_edges[:-1], bin_edges[1:]), axis=-1),
+        var_name='bin',
+        long_name='Histogram Bin',
+        units=cube.units,
+    )
+
+    # Get result cube with correct dimensional metadata by using dummy
+    # operation (max)
+    hist_cube = cube.collapsed(coords, iris.analysis.MAX)
+    hist_cube.cell_methods = [
+        *cube.cell_methods, CellMethod('histogram', coords)
+    ]
+    hist_cube = add_leading_dim_to_cube(hist_cube, bin_coord)
+    new_order = list(range(hist_cube.ndim))
+    new_order[0] = hist_cube.ndim - 1
+    new_order[-1] = 0
+    hist_cube.transpose(new_order)
+
+    # Adapt other metadata
+    hist_cube.var_name = (
+        'histogram' if hist_cube.var_name is None else
+        f'histogram_{hist_cube.var_name}'
+    )
+    hist_cube.long_name = (
+        'Histogram' if hist_cube.long_name is None else
+        f'Histogram of {hist_cube.long_name}'
+    )
+    if normalization == 'integral':
+        hist_cube.units = cube.units**-1
+    else:
+        hist_cube.units = '1'
+        new_units = '1'
+    hist_cube.units = new_units
+
+    return hist_cube
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index 803b66d997..19d6ae6edd 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -6,7 +6,9 @@
 import iris.coord_categorisation
 import iris.coords
 import numpy as np
+import pytest
 from cf_units import Unit
+from iris.coords import CellMethod
 from iris.cube import Cube
 from numpy.testing import assert_array_equal
 
@@ -15,6 +17,10 @@
     _group_products,
     clip,
     get_array_module,
+    histogram,
+)
+from tests.unit.preprocessor._compare_with_refs.test_compare_with_refs import (
+    get_3d_cube,
 )
 
 
@@ -91,5 +97,74 @@ def test_get_array_module_mixed():
     assert npx is da
 
 
+@pytest.fixture
+def cube():
+    """Regular cube."""
+    cube_data = np.ma.masked_inside(
+        np.arange(8.0, dtype=np.float32).reshape(2, 2, 2), 1.5, 3.5
+    )
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
+    return cube
+
+
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_defaults(cube, lazy):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(input_cube)
+
+    assert input_cube == cube
+    assert result.shape == (10,)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    np.testing.assert_allclose(
+        result.data, [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0]
+    )
+    assert result.dtype == np.float32
+    assert result.var_name == 'histogram_tas'
+    assert result.long_name == 'Histogram'
+    assert result.standard_name == 'air_temperature'
+    assert result.units == '1'
+    assert result.cell_methods == (
+        CellMethod('histogram', ('time', 'latitude', 'longitude')),
+    )
+    assert result.coords('Histogram Bin')
+    bin_coord = result.coord('Histogram Bin')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(
+        bin_coord.points,
+        [0.35, 1.05, 1.75, 2.45, 3.15, 3.85, 4.55, 5.25, 5.95, 6.65],
+    )
+    np.testing.assert_allclose(
+        bin_coord.bounds,
+        [
+            [0.0, 0.7],
+            [0.7, 1.4],
+            [1.4, 2.1],
+            [2.1, 2.8],
+            [2.8, 3.5],
+            [3.5, 4.2],
+            [4.2, 4.9],
+            [4.9, 5.6],
+            [5.6, 6.3],
+            [6.3, 7.0],
+        ],
+    )
+    assert bin_coord.var_name == 'bin'
+    assert bin_coord.long_name == 'Histogram Bin'
+    assert bin_coord.standard_name is None
+    assert bin_coord.units == 'K'
+    assert bin_coord.attributes == {}
+
+
 if __name__ == '__main__':
     unittest.main()

From 60597b728cadfbf7033fa400e3ea2913eba993eb Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 27 Mar 2024 17:57:06 +0100
Subject: [PATCH 31/51] Fix flake8

---
 esmvalcore/preprocessor/_other.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 6e6140c0cd..485d91fa80 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -273,13 +273,11 @@ def _calculate_histogram_lazy(
         axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
 
         # The call signature depends also on the number of axes in `axes`, and
-        # will be (a,b,...),(a,b,...),(z),(y)->() where a,b,... are the data
-        # dimensions that are collapsed, z is the number of bin edges, and y
-        # the number of bin centers.
-        input_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
+        # will be (a,b,...)->(nbins) where a,b,... are the data dimensions that
+        # are collapsed, and nbins the number of bin centers
         hist = da.apply_gufunc(
             _calculate_histogram_eager,
-            f"{input_signature}->(nbins)",
+            f"({','.join(list(string.ascii_lowercase)[:n_axes])})->(nbins)",
             data,
             axes=[along_axes, (0,)],
             output_sizes={'nbins': len(bin_edges) - 1},

From 478ae11e859f4c8c0ebe1a3e29a0719572e38630 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 10:32:49 +0200
Subject: [PATCH 32/51] Use correct metadata for histogram cube and coordinates

---
 esmvalcore/preprocessor/_other.py            | 13 +++++++++----
 tests/unit/preprocessor/_other/test_other.py | 13 +++++++------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 485d91fa80..e7d2db904d 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -351,8 +351,9 @@ def _get_histogram_cube(
     bin_coord = DimCoord(
         bin_centers,
         bounds=np.stack((bin_edges[:-1], bin_edges[1:]), axis=-1),
-        var_name='bin',
-        long_name='Histogram Bin',
+        standard_name=cube.standard_name,
+        var_name=cube.var_name,
+        long_name=cube.long_name,
         units=cube.units,
     )
 
@@ -369,6 +370,7 @@ def _get_histogram_cube(
     hist_cube.transpose(new_order)
 
     # Adapt other metadata
+    hist_cube.standard_name = None
     hist_cube.var_name = (
         'histogram' if hist_cube.var_name is None else
         f'histogram_{hist_cube.var_name}'
@@ -379,9 +381,12 @@ def _get_histogram_cube(
     )
     if normalization == 'integral':
         hist_cube.units = cube.units**-1
+        hist_cube.attributes['normalization'] = 'integral'
+    if normalization == 'sum':
+        hist_cube.units = '1'
+        hist_cube.attributes['normalization'] = 'sum'
     else:
         hist_cube.units = '1'
-        new_units = '1'
-    hist_cube.units = new_units
+        hist_cube.attributes['normalization'] = 'none'
 
     return hist_cube
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index 19d6ae6edd..d35dc885a7 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -128,15 +128,16 @@ def test_histogram_defaults(cube, lazy):
         result.data, [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0]
     )
     assert result.dtype == np.float32
+    assert result.standard_name is None
     assert result.var_name == 'histogram_tas'
     assert result.long_name == 'Histogram'
-    assert result.standard_name == 'air_temperature'
     assert result.units == '1'
     assert result.cell_methods == (
         CellMethod('histogram', ('time', 'latitude', 'longitude')),
     )
-    assert result.coords('Histogram Bin')
-    bin_coord = result.coord('Histogram Bin')
+    assert result.attributes == {'normalization': 'none'}
+    assert result.coords('air_temperature')
+    bin_coord = result.coord('air_temperature')
     bin_coord.shape == (10,)
     bin_coord.dtype == np.float64
     bin_coord.bounds_dtype == np.float64
@@ -159,9 +160,9 @@ def test_histogram_defaults(cube, lazy):
             [6.3, 7.0],
         ],
     )
-    assert bin_coord.var_name == 'bin'
-    assert bin_coord.long_name == 'Histogram Bin'
-    assert bin_coord.standard_name is None
+    assert bin_coord.standard_name == 'air_temperature'
+    assert bin_coord.var_name == 'tas'
+    assert bin_coord.long_name is None
     assert bin_coord.units == 'K'
     assert bin_coord.attributes == {}
 

From f052bc495e879b02e7acf0c72e10c43e64455869 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 12:31:59 +0200
Subject: [PATCH 33/51] Add test for fully masked data

---
 esmvalcore/preprocessor/_other.py            |  21 ++--
 tests/unit/preprocessor/_other/test_other.py | 103 ++++++++++++++++---
 2 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index e7d2db904d..d0fa8f55c1 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -180,7 +180,8 @@ def histogram(
     TypeError
         Invalid `bin` type given
     ValueError
-        Invalid `normalization` given.
+        Invalid `normalization` or `bin_range` given or `bin_range` is ``None``
+        and data is fully masked.
 
     """
     # Check arguments
@@ -204,6 +205,12 @@ def histogram(
         )
     else:
         bin_edges = np.array(bins, dtype=np.float64)
+    finite_bin_range = [bool(np.isfinite(r)) for r in bin_range]
+    if not all(finite_bin_range):
+        raise ValueError(
+            f"Cannot calculate histogram for bin_range={bin_range} (or for "
+            f"fully masked data when `bin_range` is not given)"
+        )
 
     # If histogram is calculated over all coordinates, we can use
     # dask.array.histogram and do not need to worry about chunks; otherwise,
@@ -250,17 +257,19 @@ def _calculate_histogram_lazy(
     """
     n_axes = len(along_axes)
 
-    # If histogram is calculated over all axes, use the efficient da.histogram
-    # function
+    # (1) If histogram is calculated over all axes, use the efficient
+    # da.histogram function
     if n_axes == data.ndim:
         data = data.ravel()
         data = data[~da.ma.getmaskarray(data)]
         hist = da.histogram(data, bins=bin_edges, range=bin_range)[0]
+        hist_sum = hist.sum()
+        hist = da.ma.masked_array(hist, mask=da.allclose(hist_sum, 0.0))
         if normalization == 'sum':
-            hist = hist / hist.sum()
+            hist = hist / hist_sum
         elif normalization == 'integral':
             diffs = np.array(np.diff(bin_edges), dtype=data.dtype)
-            hist = hist / hist.sum() / diffs
+            hist = hist / hist_sum / diffs
         hist = da.ma.masked_invalid(hist)
 
     # (2) Otherwise, use da.apply_gufunc with the eager version
@@ -382,7 +391,7 @@ def _get_histogram_cube(
     if normalization == 'integral':
         hist_cube.units = cube.units**-1
         hist_cube.attributes['normalization'] = 'integral'
-    if normalization == 'sum':
+    elif normalization == 'sum':
         hist_cube.units = '1'
         hist_cube.attributes['normalization'] = 'sum'
     else:
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index d35dc885a7..cf18dfb89e 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -109,6 +109,31 @@ def cube():
     return cube
 
 
+def assert_metadata_all_coords(cube, normalization=None):
+    """Assert correct metadata."""
+    assert cube.standard_name is None
+    assert cube.var_name == 'histogram_tas'
+    assert cube.long_name == 'Histogram'
+    if normalization == 'integral':
+        assert cube.units == 'K-1'
+    else:
+        assert cube.units == '1'
+    assert cube.cell_methods == (
+        CellMethod('histogram', ('time', 'latitude', 'longitude')),
+    )
+    if normalization is None:
+        assert cube.attributes == {'normalization': 'none'}
+    else:
+        assert cube.attributes == {'normalization': normalization}
+    assert cube.coords('air_temperature')
+    bin_coord = cube.coord('air_temperature')
+    assert bin_coord.standard_name == 'air_temperature'
+    assert bin_coord.var_name == 'tas'
+    assert bin_coord.long_name is None
+    assert bin_coord.units == 'K'
+    assert bin_coord.attributes == {}
+
+
 @pytest.mark.parametrize('lazy', [False, True])
 def test_histogram_defaults(cube, lazy):
     """Test `histogram`."""
@@ -118,25 +143,18 @@ def test_histogram_defaults(cube, lazy):
 
     result = histogram(input_cube)
 
+    assert_metadata_all_coords(result)
     assert input_cube == cube
     assert result.shape == (10,)
     if lazy:
         assert result.has_lazy_data()
     else:
         assert not result.has_lazy_data()
+    assert result.dtype == np.float32
     np.testing.assert_allclose(
         result.data, [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0]
     )
-    assert result.dtype == np.float32
-    assert result.standard_name is None
-    assert result.var_name == 'histogram_tas'
-    assert result.long_name == 'Histogram'
-    assert result.units == '1'
-    assert result.cell_methods == (
-        CellMethod('histogram', ('time', 'latitude', 'longitude')),
-    )
-    assert result.attributes == {'normalization': 'none'}
-    assert result.coords('air_temperature')
+    np.testing.assert_allclose(result.data.mask, [False] * 10)
     bin_coord = result.coord('air_temperature')
     bin_coord.shape == (10,)
     bin_coord.dtype == np.float64
@@ -160,11 +178,66 @@ def test_histogram_defaults(cube, lazy):
             [6.3, 7.0],
         ],
     )
-    assert bin_coord.standard_name == 'air_temperature'
-    assert bin_coord.var_name == 'tas'
-    assert bin_coord.long_name is None
-    assert bin_coord.units == 'K'
-    assert bin_coord.attributes == {}
+
+
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_fully_masked(cube, lazy, normalization):
+    """Test `histogram`."""
+    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
+    if lazy:
+        cube.data = cube.lazy_data()
+
+    result = histogram(cube, bin_range=(0, 10), normalization=normalization)
+
+    assert_metadata_all_coords(result, normalization=normalization)
+    assert result.shape == (10,)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    print(result.data)
+    np.testing.assert_allclose(result.data, np.ma.masked_all(10,))
+    np.testing.assert_equal(result.data.mask, [True] * 10)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(
+        bin_coord.points,
+        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5],
+    )
+    np.testing.assert_allclose(
+        bin_coord.bounds,
+        [
+            [0.0, 1.0],
+            [1.0, 2.0],
+            [2.0, 3.0],
+            [3.0, 4.0],
+            [4.0, 5.0],
+            [5.0, 6.0],
+            [6.0, 7.0],
+            [7.0, 8.0],
+            [8.0, 9.0],
+            [9.0, 10.0],
+        ],
+    )
+
+
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_fully_masked_no_bin_range(cube, lazy):
+    """Test `histogram`."""
+    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
+    if lazy:
+        cube.data = cube.lazy_data()
+
+    msg = (
+        r"Cannot calculate histogram for bin_range=\(masked, masked\) \(or "
+        r"for fully masked data when `bin_range` is not given\)"
+    )
+    with pytest.raises(ValueError, match=msg):
+        histogram(cube)
 
 
 if __name__ == '__main__':

From 8360274a964f595ba0e9750091844c46efaf2cb8 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 15:51:18 +0200
Subject: [PATCH 34/51] Add test for coords=time

---
 esmvalcore/preprocessor/_other.py            |  2 +-
 tests/unit/preprocessor/_other/test_other.py | 71 ++++++++++++++++++--
 2 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index d0fa8f55c1..7744e177a4 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -288,7 +288,7 @@ def _calculate_histogram_lazy(
             _calculate_histogram_eager,
             f"({','.join(list(string.ascii_lowercase)[:n_axes])})->(nbins)",
             data,
-            axes=[along_axes, (0,)],
+            axes=[along_axes, (-1,)],
             output_sizes={'nbins': len(bin_edges) - 1},
             along_axes=axes_in_chunk,
             bin_edges=bin_edges,
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index cf18dfb89e..6ade112497 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -103,13 +103,14 @@ def cube():
     cube_data = np.ma.masked_inside(
         np.arange(8.0, dtype=np.float32).reshape(2, 2, 2), 1.5, 3.5
     )
+    cube_data = np.swapaxes(cube_data, 0, -1)
     cube = get_3d_cube(
         cube_data, standard_name='air_temperature', var_name='tas', units='K'
     )
     return cube
 
 
-def assert_metadata_all_coords(cube, normalization=None):
+def assert_metadata(cube, normalization=None):
     """Assert correct metadata."""
     assert cube.standard_name is None
     assert cube.var_name == 'histogram_tas'
@@ -118,9 +119,6 @@ def assert_metadata_all_coords(cube, normalization=None):
         assert cube.units == 'K-1'
     else:
         assert cube.units == '1'
-    assert cube.cell_methods == (
-        CellMethod('histogram', ('time', 'latitude', 'longitude')),
-    )
     if normalization is None:
         assert cube.attributes == {'normalization': 'none'}
     else:
@@ -143,8 +141,11 @@ def test_histogram_defaults(cube, lazy):
 
     result = histogram(input_cube)
 
-    assert_metadata_all_coords(result)
     assert input_cube == cube
+    assert_metadata(result)
+    assert result.cell_methods == (
+        CellMethod('histogram', ('time', 'latitude', 'longitude')),
+    )
     assert result.shape == (10,)
     if lazy:
         assert result.has_lazy_data()
@@ -180,6 +181,60 @@ def test_histogram_defaults(cube, lazy):
     )
 
 
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_over_time(cube, lazy, normalization):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time'],
+        bins=[4.5, 6.5, 8.5, 10.5],
+        bin_range=(4.5, 10.5),
+        normalization=normalization,
+    )
+
+    assert input_cube == cube
+    assert_metadata(result, normalization=normalization)
+    assert result.cell_methods == (CellMethod('histogram', ('time',)),)
+    assert result.shape == (2, 2, 3)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    print(cube.data)
+    print(result.data)
+    if normalization == 'integral':
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [0.5, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [0.25, 0.25, 0.0]],
+        ])
+    elif normalization == 'sum':
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [1.0, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [0.5, 0.5, 0.0]],
+        ])
+    else:
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [1.0, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [1.0, 1.0, 0.0]],
+        ])
+    np.testing.assert_allclose(result.data, expected_data)
+    np.testing.assert_allclose(result.data.mask, expected_data.mask)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(bin_coord.points, [5.5, 7.5, 9.5])
+    np.testing.assert_allclose(
+        bin_coord.bounds, [[4.5, 6.5], [6.5, 8.5], [8.5, 10.5]],
+    )
+
+
 @pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
 @pytest.mark.parametrize('lazy', [False, True])
 def test_histogram_fully_masked(cube, lazy, normalization):
@@ -190,14 +245,16 @@ def test_histogram_fully_masked(cube, lazy, normalization):
 
     result = histogram(cube, bin_range=(0, 10), normalization=normalization)
 
-    assert_metadata_all_coords(result, normalization=normalization)
+    assert_metadata(result, normalization=normalization)
+    assert result.cell_methods == (
+        CellMethod('histogram', ('time', 'latitude', 'longitude')),
+    )
     assert result.shape == (10,)
     if lazy:
         assert result.has_lazy_data()
     else:
         assert not result.has_lazy_data()
     assert result.dtype == np.float32
-    print(result.data)
     np.testing.assert_allclose(result.data, np.ma.masked_all(10,))
     np.testing.assert_equal(result.data.mask, [True] * 10)
     bin_coord = result.coord('air_temperature')

From d6f6ac11c88441aaa7d56877771bfb7ddb74c199 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 15:57:34 +0200
Subject: [PATCH 35/51] Moved get_weights to _other module

---
 esmvalcore/preprocessor/_compare_with_refs.py | 49 ++-----------------
 esmvalcore/preprocessor/_other.py             | 44 +++++++++++++++++
 2 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 3578f9d3a1..81c7b7d101 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -15,19 +15,16 @@
 from iris.common.metadata import CubeMetadata
 from iris.coords import CellMethod, Coord
 from iris.cube import Cube, CubeList
-from iris.exceptions import CoordinateNotFoundError
-from iris.util import broadcast_to_shape
 from scipy.stats import wasserstein_distance
 
 from esmvalcore.iris_helpers import rechunk_cube
-from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._io import concatenate
 from esmvalcore.preprocessor._other import (
     get_all_coord_dims,
     get_all_coords,
     get_array_module,
+    get_weights,
 )
-from esmvalcore.preprocessor._time import get_time_weights
 
 if TYPE_CHECKING:
     from esmvalcore.preprocessor import PreprocessorFile
@@ -432,46 +429,6 @@ def _calculate_metric(
     return res_cube
 
 
-def _get_weights(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str],
-) -> np.ndarray | da.Array:
-    """Calculate weights for weighted distance metrics."""
-    npx = get_array_module(cube.core_data())
-    weights = npx.ones(cube.shape, dtype=cube.dtype)
-
-    # Time weights: lengths of time interval
-    if 'time' in coords:
-        weights *= broadcast_to_shape(
-            npx.array(get_time_weights(cube)),
-            cube.shape,
-            cube.coord_dims('time'),
-        )
-
-    # Latitude weights: cell areas
-    if 'latitude' in coords:
-        cube = cube.copy()  # avoid overwriting input cube
-        if (
-                not cube.cell_measures('cell_area') and
-                not cube.coords('longitude')
-        ):
-            raise CoordinateNotFoundError(
-                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
-                f"coordinate to calculate cell area weights for weighted "
-                f"distance metric over coordinates {coords} (alternatively, "
-                f"a `cell_area` can be given to the cube as supplementary "
-                f"variable)"
-            )
-        _try_adding_calculated_cell_area(cube)
-        weights *= broadcast_to_shape(
-            cube.cell_measure('cell_area').core_data(),
-            cube.shape,
-            cube.cell_measure_dims('cell_area'),
-        )
-
-    return weights
-
-
 def _calculate_rmse(
     cube: Cube,
     reference: Cube,
@@ -482,7 +439,7 @@ def _calculate_rmse(
     """Calculate root mean square error."""
     # Data
     axis = get_all_coord_dims(cube, coords)
-    weights = _get_weights(cube, coords) if weighted else None
+    weights = get_weights(cube, coords) if weighted else None
     squared_error = (cube.core_data() - reference.core_data())**2
     npx = get_array_module(squared_error)
     rmse = npx.sqrt(npx.ma.average(squared_error, axis=axis, weights=weights))
@@ -510,7 +467,7 @@ def _calculate_pearsonr(
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
     # Data
-    weights = _get_weights(cube, coords) if weighted else None
+    weights = get_weights(cube, coords) if weighted else None
     res_cube = iris.analysis.stats.pearsonr(
         cube, reference, corr_coords=coords, weights=weights, **kwargs
     )
diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 7744e177a4..8ed899eed6 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -13,8 +13,12 @@
 import numpy as np
 from iris.coords import CellMethod, Coord, DimCoord
 from iris.cube import Cube
+from iris.exceptions import CoordinateNotFoundError
+from iris.util import broadcast_to_shape
 
 from esmvalcore.iris_helpers import add_leading_dim_to_cube, rechunk_cube
+from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
+from esmvalcore.preprocessor._time import get_time_weights
 
 logger = logging.getLogger(__name__)
 
@@ -125,6 +129,46 @@ def get_all_coord_dims(
     return tuple(sorted_all_coord_dims)
 
 
+def get_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> np.ndarray | da.Array:
+    """Calculate suitable weights for given coordinates."""
+    npx = get_array_module(cube.core_data())
+    weights = npx.ones(cube.shape, dtype=cube.dtype)
+
+    # Time weights: lengths of time interval
+    if 'time' in coords:
+        weights *= broadcast_to_shape(
+            npx.array(get_time_weights(cube)),
+            cube.shape,
+            cube.coord_dims('time'),
+        )
+
+    # Latitude weights: cell areas
+    if 'latitude' in coords:
+        cube = cube.copy()  # avoid overwriting input cube
+        if (
+                not cube.cell_measures('cell_area') and
+                not cube.coords('longitude')
+        ):
+            raise CoordinateNotFoundError(
+                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
+                f"coordinate to calculate cell area weights for weighted "
+                f"distance metric over coordinates {coords} (alternatively, "
+                f"a `cell_area` can be given to the cube as supplementary "
+                f"variable)"
+            )
+        _try_adding_calculated_cell_area(cube)
+        weights *= broadcast_to_shape(
+            cube.cell_measure('cell_area').core_data(),
+            cube.shape,
+            cube.cell_measure_dims('cell_area'),
+        )
+
+    return weights
+
+
 def histogram(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str] | None = None,

From 3ff677c7329137588c500ba1f9b804d8b2e1a7e8 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 17:52:31 +0200
Subject: [PATCH 36/51] Support weighted histograms

---
 esmvalcore/preprocessor/_other.py            | 74 ++++++++++++++++----
 tests/unit/preprocessor/_other/test_other.py | 62 +++++++++++++++-
 2 files changed, 120 insertions(+), 16 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 8ed899eed6..d7b4ae5565 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -135,7 +135,7 @@ def get_weights(
 ) -> np.ndarray | da.Array:
     """Calculate suitable weights for given coordinates."""
     npx = get_array_module(cube.core_data())
-    weights = npx.ones(cube.shape, dtype=cube.dtype)
+    weights = npx.ones_like(cube.core_data())
 
     # Time weights: lengths of time interval
     if 'time' in coords:
@@ -174,6 +174,7 @@ def histogram(
     coords: Iterable[Coord] | Iterable[str] | None = None,
     bins: int | Sequence[float] = 10,
     bin_range: tuple[float, float] | None = None,
+    weights: np.ndarray | da.Array | bool | None = None,
     normalization: Literal['sum', 'integral'] | None = None,
 ) -> Cube:
     """Calculate histogram.
@@ -197,12 +198,27 @@ def histogram(
         monotonically increasing array of bin edges, including the rightmost
         edge, allowing for non-uniform bin widths.
     bin_range:
-        The lower and upper range of the bins. If not provided, `bin_range` is
+        The lower and upper range of the bins. If ``None``, `bin_range` is
         simply (``cube.core_data().min(), cube.core_data().max()``). Values
         outside the range are ignored. The first element of the range must be
         less than or equal to the second. `bin_range` affects the automatic bin
         computation as well if `bins` is an :obj:`int` (see description for
         `bins` above).
+    weights:
+        Weights for the histogram calculation. Each value in the input data
+        only contributes its associated weight towards the bin count (instead
+        of 1). Weights are normalized before entering the calculation if
+        `normalization` is ``'integral'`` or ``'sum'``. Can be an array of the
+        same shape as the input data, ``False`` or ``None`` (no weighting), or
+        ``True``. In the latter case, weighting will depend on `coords`, and
+        the following coordinates will trigger weighting: `time` (will use
+        lengths of time intervals as weights) and `latitude` (will use cell
+        area weights). Time weights are always calculated from the input data.
+        Area weights can be given as supplementary variables to the recipe
+        (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+        calculated from the input data (this only works for regular grids). By
+        default, **NO** supplementary variables will be used; they need to be
+        explicitly requested in the recipe.
     normalization:
         If ``None``, the result will contain the number of samples in each bin.
         If ``'integral'``, the result is the value of the probability `density`
@@ -226,6 +242,10 @@ def histogram(
     ValueError
         Invalid `normalization` or `bin_range` given or `bin_range` is ``None``
         and data is fully masked.
+    iris.exceptions.CoordinateNotFoundError
+        ``longitude` is not found in cube if `weights=True`, `latitude` is in
+        `coords`, and no `cell_area` is given as
+        :ref:`supplementary_variables`.
 
     """
     # Check arguments
@@ -256,13 +276,24 @@ def histogram(
             f"fully masked data when `bin_range` is not given)"
         )
 
+    # Get (normalized) weights
+    coords = get_all_coords(cube, coords)
+    axes = get_all_coord_dims(cube, coords)
+    npx = get_array_module(cube.core_data())
+    if not weights:
+        weights = npx.ones_like(cube.core_data())
+    elif weights is True:
+        weights = get_weights(cube, coords)
+    if normalization is not None:
+        weights = weights / npx.sum(weights, axis=axes, keepdims=True)
+
     # If histogram is calculated over all coordinates, we can use
     # dask.array.histogram and do not need to worry about chunks; otherwise,
     # make sure that the cube is not chunked along the given coordinates
-    coords = get_all_coords(cube, coords)
-    axes = get_all_coord_dims(cube, coords)
-    if cube.has_lazy_data() and len(axes) == cube.ndim:
+    if cube.has_lazy_data() and len(axes) != cube.ndim:
         cube = rechunk_cube(cube, coords)
+    if isinstance(weights, da.Array):
+        weights = weights.rechunk(cube.lazy_data().chunks)
 
     # Calculate histogram
     if cube.has_lazy_data():
@@ -271,6 +302,7 @@ def histogram(
         func = _calculate_histogram_eager  # type: ignore
     hist_data = func(
         cube.core_data(),
+        weights,  # type: ignore
         along_axes=axes,
         bin_edges=bin_edges,
         bin_range=bin_range,
@@ -286,6 +318,7 @@ def histogram(
 
 def _calculate_histogram_lazy(
     data: da.Array,
+    weights: da.Array,
     *,
     along_axes: tuple[int, ...],
     bin_edges: np.ndarray,
@@ -305,8 +338,13 @@ def _calculate_histogram_lazy(
     # da.histogram function
     if n_axes == data.ndim:
         data = data.ravel()
-        data = data[~da.ma.getmaskarray(data)]
-        hist = da.histogram(data, bins=bin_edges, range=bin_range)[0]
+        weights = weights.ravel()
+        mask = da.ma.getmaskarray(data)
+        data = data[~mask]
+        weights = weights[~mask]
+        hist = da.histogram(
+            data, bins=bin_edges, range=bin_range, weights=weights
+        )[0]
         hist_sum = hist.sum()
         hist = da.ma.masked_array(hist, mask=da.allclose(hist_sum, 0.0))
         if normalization == 'sum':
@@ -328,11 +366,13 @@ def _calculate_histogram_lazy(
         # The call signature depends also on the number of axes in `axes`, and
         # will be (a,b,...)->(nbins) where a,b,... are the data dimensions that
         # are collapsed, and nbins the number of bin centers
+        in_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
         hist = da.apply_gufunc(
             _calculate_histogram_eager,
-            f"({','.join(list(string.ascii_lowercase)[:n_axes])})->(nbins)",
+            f"{in_signature},{in_signature}->(nbins)",
             data,
-            axes=[along_axes, (-1,)],
+            weights,
+            axes=[along_axes, along_axes, (-1,)],
             output_sizes={'nbins': len(bin_edges) - 1},
             along_axes=axes_in_chunk,
             bin_edges=bin_edges,
@@ -345,6 +385,7 @@ def _calculate_histogram_lazy(
 
 def _calculate_histogram_eager(
     data: np.ndarray,
+    weights: np.ndarray,
     *,
     along_axes: tuple[int, ...],
     bin_edges: np.ndarray,
@@ -362,17 +403,24 @@ def _calculate_histogram_eager(
     # dimensions in `axes` and the `xi` are the remaining dimensions
     remaining_dims = tuple(a for a in range(data.ndim) if a not in along_axes)
     reshaped_data = np.transpose(data, axes=(*remaining_dims, *along_axes))
+    reshaped_weights = np.transpose(
+        weights, axes=(*remaining_dims, *along_axes)
+    )
     shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
     reshaped_data = reshaped_data.reshape(*shape_rem_dims, -1)
+    reshaped_weights = reshaped_weights.reshape(*shape_rem_dims, -1)
 
     # Apply vectorized version of np.histogram
-    def _get_hist_values(arr):
+    def _get_hist_values(arr, weights):
         mask = np.ma.getmaskarray(arr)
         arr = arr[~mask]
-        return np.histogram(arr, bins=bin_edges, range=bin_range)[0]
+        weights = weights[~mask]
+        return np.histogram(
+            arr, bins=bin_edges, range=bin_range, weights=weights
+        )[0]
 
-    v_histogram = np.vectorize(_get_hist_values, signature='(n)->(m)')
-    hist = v_histogram(reshaped_data)
+    v_histogram = np.vectorize(_get_hist_values, signature='(n),(n)->(m)')
+    hist = v_histogram(reshaped_data, reshaped_weights)
 
     # Mask points where all input data were masked (these are the ones where
     # the histograms sums to 0)
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index 6ade112497..1748a98b70 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -182,8 +182,9 @@ def test_histogram_defaults(cube, lazy):
 
 
 @pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('weights', [False, None])
 @pytest.mark.parametrize('lazy', [False, True])
-def test_histogram_over_time(cube, lazy, normalization):
+def test_histogram_over_time(cube, lazy, weights, normalization):
     """Test `histogram`."""
     if lazy:
         cube.data = cube.lazy_data()
@@ -194,11 +195,14 @@ def test_histogram_over_time(cube, lazy, normalization):
         coords=['time'],
         bins=[4.5, 6.5, 8.5, 10.5],
         bin_range=(4.5, 10.5),
+        weights=weights,
         normalization=normalization,
     )
 
     assert input_cube == cube
     assert_metadata(result, normalization=normalization)
+    assert result.coord('latitude') == input_cube.coord('latitude')
+    assert result.coord('longitude') == input_cube.coord('longitude')
     assert result.cell_methods == (CellMethod('histogram', ('time',)),)
     assert result.shape == (2, 2, 3)
     if lazy:
@@ -206,8 +210,6 @@ def test_histogram_over_time(cube, lazy, normalization):
     else:
         assert not result.has_lazy_data()
     assert result.dtype == np.float32
-    print(cube.data)
-    print(result.data)
     if normalization == 'integral':
         expected_data = np.ma.masked_invalid([
             [[np.nan, np.nan, np.nan], [0.5, 0.0, 0.0]],
@@ -282,6 +284,60 @@ def test_histogram_fully_masked(cube, lazy, normalization):
     )
 
 
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('weights', [True])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_weights(cube, lazy, weights, normalization):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time', 'longitude'],
+        bins=[0.0, 2.0, 4.0, 8.0],
+        weights=weights,
+        normalization=normalization,
+    )
+
+    assert input_cube == cube
+    assert_metadata(result, normalization=normalization)
+    assert result.coord('latitude') == input_cube.coord('latitude')
+    assert result.cell_methods == (
+        CellMethod('histogram', ('time', 'longitude')),
+    )
+    assert result.shape == (2, 3)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    print(result.data)
+    if normalization == 'integral':
+        expected_data = np.ma.masked_invalid(
+            [[0.25, 0.0, 0.125], [0.0, 0.0, 0.25]]
+        )
+    elif normalization == 'sum':
+        expected_data = np.ma.masked_invalid(
+            [[0.5, 0.0, 0.5], [0.0, 0.0, 1.0]]
+        )
+    else:
+        expected_data = np.ma.masked_invalid(
+            [[8.0, 0.0, 8.0], [0.0, 0.0, 8.0]]
+        )
+    np.testing.assert_allclose(result.data, expected_data)
+    np.testing.assert_allclose(result.data.mask, expected_data.mask)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(bin_coord.points, [1.0, 3.0, 6.0])
+    np.testing.assert_allclose(
+        bin_coord.bounds, [[0.0, 2.0], [2.0, 4.0], [4.0, 8.0]],
+    )
+
+
 @pytest.mark.parametrize('lazy', [False, True])
 def test_histogram_fully_masked_no_bin_range(cube, lazy):
     """Test `histogram`."""

From ca15464fedc6c6e09be9277bb18da80e11254cde Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 3 Apr 2024 17:53:23 +0200
Subject: [PATCH 37/51] Add doc

---
 doc/recipe/preprocessor.rst | 72 +++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 6ed66f5061..44adb16289 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -310,6 +310,7 @@ Preprocessor                                                          Variable s
 :ref:`volume_statistics<volume_statistics>` [#f4]_                    ``volcello``, ``areacello``    ocean_volume, cell_area
 :ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]_ ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
 :ref:`distance_metric<distance_metric>` [#f5]_                        ``areacella``, ``areacello``   cell_area
+:ref:`histogram<histogram>` [#f5]_                                    ``areacella``, ``areacello``   cell_area
 ===================================================================== ============================== =====================================
 
 .. [#f3] This preprocessor requires at least one of the mentioned supplementary
@@ -2734,3 +2735,74 @@ The example below shows how to set all values below zero to zero.
       clip:
         minimum: 0
         maximum: null
+
+.. _histogram:
+
+``histogram``
+-------------------
+
+This function calculates histograms.
+
+The ``histogram`` preprocessor supports the following arguments in the
+recipe:
+
+* ``coords`` (:obj:`list` of :obj:`str`, default: ``None``): Coordinates over
+  which the histogram is calculated.
+  If ``None``, calculate the histogram over all coordinates.
+  The shape of the output cube will be `(x1, x2, ..., n_bins)`, where `xi` are
+  the dimensions of the input cube not appearing in `coords` and `n_bins` is
+  the number of bins.
+* ``bins`` (:obj:`int` or sequence of :obj:`float`, default: 10): If `bins` is
+  an :obj:`int`, it defines the number of equal-width bins in the given
+  `bin_range`.
+  If `bins` is a sequence, it defines a monotonically increasing array of bin
+  edges, including the rightmost edge, allowing for non-uniform bin widths.
+  also calculate the distance of the reference dataset with itself.
+* ``bin_range`` (:obj:`tuple` of :obj:`float` or ``None``, default: ``None``):
+  The lower and upper range of the bins.
+  If ``None``, `bin_range` is simply (``cube.core_data().min(),
+  cube.core_data().max()``).
+  Values outside the range are ignored.
+  The first element of the range must be less than or equal to the second.
+  `bin_range` affects the automatic bin computation as well if `bins` is an
+  :obj:`int` (see description for `bins` above).
+* ``weights`` (array-like, :obj:`bool`, or ``None``, default: ``None``):
+  Weights for the histogram calculation.
+  Each value in the input data only contributes its associated weight towards
+  the bin count (instead of 1).
+  Weights are normalized before entering the calculation if `normalization` is
+  ``'integral'`` or ``'sum'``.
+  Can be an array of the same shape as the input data, ``False`` or ``None``
+  (no weighting), or ``True``.
+  In the latter case, weighting will depend on `coords`, and the following
+  coordinates will trigger weighting: `time` (will use lengths of time
+  intervals as weights) and `latitude` (will use cell area weights).
+  Time weights are always calculated from the input data.
+  Area weights can be given as supplementary variables to the recipe
+  (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+  calculated from the input data (this only works for regular grids).
+  By default, **NO** supplementary variables will be used; they need to be
+  explicitly requested in the recipe.
+* ``normalization`` (``None``, ``'sum'``, or ``'integral'``, default:
+  ``None``): If ``None``, the result will contain the number of samples in each
+  bin.
+  If ``'integral'``, the result is the value of the probability `density`
+  function at the bin, normalized such that the integral over the range is 1.
+  If ``'sum'``, the result is the value of the probability `mass` function at
+  the bin, normalized such that the sum over the range is 1.
+  Normalization will be applied across `coords`, not the entire cube.
+
+Example:
+
+.. code-block:: yaml
+
+    preprocessors:
+      preproc_histogram:
+        histogram:
+          coords: [latitude, longitude]
+          bins: 12
+          bin_range: [100.0, 150.0]
+          weights: true
+          normalization: sum
+
+See also :func:`esmvalcore.preprocessor.histogram`.

From 6968e80d0134d1b6549e6749b379ea27f6a5239b Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 5 Apr 2024 13:21:12 +0200
Subject: [PATCH 38/51] Nicer way of setting up histogram cube

---
 esmvalcore/preprocessor/_other.py            |  83 ++++++-----
 tests/unit/preprocessor/_other/test_other.py | 144 ++++++++++++++++---
 2 files changed, 174 insertions(+), 53 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index d7b4ae5565..e3b9101a92 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -11,12 +11,12 @@
 import dask.array as da
 import iris.analysis
 import numpy as np
-from iris.coords import CellMethod, Coord, DimCoord
+from iris.coords import Coord, DimCoord
 from iris.cube import Cube
 from iris.exceptions import CoordinateNotFoundError
 from iris.util import broadcast_to_shape
 
-from esmvalcore.iris_helpers import add_leading_dim_to_cube, rechunk_cube
+from esmvalcore.iris_helpers import rechunk_cube
 from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
 from esmvalcore.preprocessor._time import get_time_weights
 
@@ -280,7 +280,7 @@ def histogram(
     coords = get_all_coords(cube, coords)
     axes = get_all_coord_dims(cube, coords)
     npx = get_array_module(cube.core_data())
-    if not weights:
+    if weights is None or weights is False:
         weights = npx.ones_like(cube.core_data())
     elif weights is True:
         weights = get_weights(cube, coords)
@@ -308,10 +308,12 @@ def histogram(
         bin_range=bin_range,
         normalization=normalization,
     )
+    hist_data = hist_data.astype(cube.dtype)
 
-    # Get final cube with correct metadata and data
-    hist_cube = _get_histogram_cube(cube, coords, bin_edges, normalization)
-    hist_cube.data = hist_data.astype(cube.dtype)
+    # Get final cube
+    hist_cube = _get_histogram_cube(
+        cube, hist_data, coords, bin_edges, normalization
+    )
 
     return hist_cube
 
@@ -441,6 +443,7 @@ def _get_hist_values(arr, weights):
 
 def _get_histogram_cube(
     cube: Cube,
+    data: np.ndarray | da.Array,
     coords: Iterable[Coord] | Iterable[str],
     bin_edges: np.ndarray,
     normalization: Literal['sum', 'integral'] | None,
@@ -453,41 +456,55 @@ def _get_histogram_cube(
         bin_centers,
         bounds=np.stack((bin_edges[:-1], bin_edges[1:]), axis=-1),
         standard_name=cube.standard_name,
-        var_name=cube.var_name,
         long_name=cube.long_name,
+        var_name=cube.var_name,
         units=cube.units,
     )
 
     # Get result cube with correct dimensional metadata by using dummy
     # operation (max)
-    hist_cube = cube.collapsed(coords, iris.analysis.MAX)
-    hist_cube.cell_methods = [
-        *cube.cell_methods, CellMethod('histogram', coords)
-    ]
-    hist_cube = add_leading_dim_to_cube(hist_cube, bin_coord)
-    new_order = list(range(hist_cube.ndim))
-    new_order[0] = hist_cube.ndim - 1
-    new_order[-1] = 0
-    hist_cube.transpose(new_order)
-
-    # Adapt other metadata
-    hist_cube.standard_name = None
-    hist_cube.var_name = (
-        'histogram' if hist_cube.var_name is None else
-        f'histogram_{hist_cube.var_name}'
+    cell_methods = cube.cell_methods
+    cube = cube.collapsed(coords, iris.analysis.MAX)
+
+    # Get histogram cube
+    long_name_suffix = (
+        '' if cube.long_name is None else f' of {cube.long_name}'
     )
-    hist_cube.long_name = (
-        'Histogram' if hist_cube.long_name is None else
-        f'Histogram of {hist_cube.long_name}'
+    var_name_suffix = '' if cube.var_name is None else f'_{cube.var_name}'
+    dim_spec = (
+        [(d, cube.coord_dims(d)) for d in cube.dim_coords] +
+        [(bin_coord, cube.ndim)]
     )
-    if normalization == 'integral':
-        hist_cube.units = cube.units**-1
-        hist_cube.attributes['normalization'] = 'integral'
-    elif normalization == 'sum':
-        hist_cube.units = '1'
-        hist_cube.attributes['normalization'] = 'sum'
+    if normalization == 'sum':
+        long_name = f"Relative Frequency{long_name_suffix}"
+        var_name = f"relative_frequency{var_name_suffix}"
+        units = '1'
+    elif normalization == 'integral':
+        long_name = f"Density{long_name_suffix}"
+        var_name = f"density{var_name_suffix}"
+        units = cube.units**-1
     else:
-        hist_cube.units = '1'
-        hist_cube.attributes['normalization'] = 'none'
+        long_name = f"Frequency{long_name_suffix}"
+        var_name = f"frequency{var_name_suffix}"
+        units = '1'
+    hist_cube = Cube(
+        data,
+        standard_name=None,
+        long_name=long_name,
+        var_name=var_name,
+        units=units,
+        attributes = cube.attributes,
+        cell_methods=cell_methods,
+        dim_coords_and_dims=dim_spec,
+        aux_coords_and_dims=[(a, cube.coord_dims(a)) for a in cube.aux_coords],
+        aux_factories=cube.aux_factories,
+        ancillary_variables_and_dims=[
+            (a, cube.ancillary_variable_dims(a)) for a in
+            cube.ancillary_variables()
+        ],
+        cell_measures_and_dims=[
+            (c, cube.cell_measure_dims(c)) for c in cube.cell_measures()
+        ],
+    )
 
     return hist_cube
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index 1748a98b70..e362645c52 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -8,7 +8,14 @@
 import numpy as np
 import pytest
 from cf_units import Unit
-from iris.coords import CellMethod
+from iris.aux_factory import AtmosphereSigmaFactory
+from iris.coords import (
+    AncillaryVariable,
+    AuxCoord,
+    CellMeasure,
+    CellMethod,
+    DimCoord,
+)
 from iris.cube import Cube
 from numpy.testing import assert_array_equal
 
@@ -113,16 +120,20 @@ def cube():
 def assert_metadata(cube, normalization=None):
     """Assert correct metadata."""
     assert cube.standard_name is None
-    assert cube.var_name == 'histogram_tas'
-    assert cube.long_name == 'Histogram'
-    if normalization == 'integral':
+    if normalization == 'sum':
+        assert cube.long_name == 'Relative Frequency'
+        assert cube.var_name == 'relative_frequency_tas'
+        assert cube.units == '1'
+    elif normalization == 'integral':
+        assert cube.long_name == 'Density'
+        assert cube.var_name == 'density_tas'
         assert cube.units == 'K-1'
     else:
+        assert cube.long_name == 'Frequency'
+        assert cube.var_name == 'frequency_tas'
         assert cube.units == '1'
-    if normalization is None:
-        assert cube.attributes == {'normalization': 'none'}
-    else:
-        assert cube.attributes == {'normalization': normalization}
+    assert cube.attributes == {}
+    assert cube.cell_methods == ()
     assert cube.coords('air_temperature')
     bin_coord = cube.coord('air_temperature')
     assert bin_coord.standard_name == 'air_temperature'
@@ -143,9 +154,6 @@ def test_histogram_defaults(cube, lazy):
 
     assert input_cube == cube
     assert_metadata(result)
-    assert result.cell_methods == (
-        CellMethod('histogram', ('time', 'latitude', 'longitude')),
-    )
     assert result.shape == (10,)
     if lazy:
         assert result.has_lazy_data()
@@ -203,7 +211,6 @@ def test_histogram_over_time(cube, lazy, weights, normalization):
     assert_metadata(result, normalization=normalization)
     assert result.coord('latitude') == input_cube.coord('latitude')
     assert result.coord('longitude') == input_cube.coord('longitude')
-    assert result.cell_methods == (CellMethod('histogram', ('time',)),)
     assert result.shape == (2, 2, 3)
     if lazy:
         assert result.has_lazy_data()
@@ -248,9 +255,6 @@ def test_histogram_fully_masked(cube, lazy, normalization):
     result = histogram(cube, bin_range=(0, 10), normalization=normalization)
 
     assert_metadata(result, normalization=normalization)
-    assert result.cell_methods == (
-        CellMethod('histogram', ('time', 'latitude', 'longitude')),
-    )
     assert result.shape == (10,)
     if lazy:
         assert result.has_lazy_data()
@@ -285,7 +289,14 @@ def test_histogram_fully_masked(cube, lazy, normalization):
 
 
 @pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
-@pytest.mark.parametrize('weights', [True])
+@pytest.mark.parametrize(
+    'weights',
+    [
+        True,
+        np.array([[[6, 6], [6, 6]], [[2, 2], [2, 2]]]),
+        da.array([[[6, 6], [6, 6]], [[2, 2], [2, 2]]]),
+    ]
+)
 @pytest.mark.parametrize('lazy', [False, True])
 def test_histogram_weights(cube, lazy, weights, normalization):
     """Test `histogram`."""
@@ -304,16 +315,12 @@ def test_histogram_weights(cube, lazy, weights, normalization):
     assert input_cube == cube
     assert_metadata(result, normalization=normalization)
     assert result.coord('latitude') == input_cube.coord('latitude')
-    assert result.cell_methods == (
-        CellMethod('histogram', ('time', 'longitude')),
-    )
     assert result.shape == (2, 3)
     if lazy:
         assert result.has_lazy_data()
     else:
         assert not result.has_lazy_data()
     assert result.dtype == np.float32
-    print(result.data)
     if normalization == 'integral':
         expected_data = np.ma.masked_invalid(
             [[0.25, 0.0, 0.125], [0.0, 0.0, 0.25]]
@@ -353,5 +360,102 @@ def test_histogram_fully_masked_no_bin_range(cube, lazy):
         histogram(cube)
 
 
+@pytest.fixture
+def cube_with_rich_metadata():
+    """Cube with rich metadata."""
+    time = DimCoord([0], bounds=[[-1, 1]], var_name='time', units='s')
+    sigma = DimCoord([0], var_name='sigma', units='1')
+    lat = DimCoord([0], var_name='lat', units='degrees')
+    lon = DimCoord([0], var_name='lon', units='degrees')
+    ptop = AuxCoord(0, var_name='ptop', units='Pa')
+    psur = AuxCoord([[0]], var_name='ps', units='Pa')
+    sigma_factory = AtmosphereSigmaFactory(ptop, sigma, psur)
+    cell_area = CellMeasure([[1]], var_name='area', units='m2', measure='area')
+    anc = AncillaryVariable([0], var_name='anc')
+    cube = Cube(
+        np.ones((1, 1, 1, 1), dtype=np.float32),
+        standard_name=None,
+        long_name='Air Temperature',
+        var_name=None,
+        units='K',
+        attributes={'test': '1'},
+        cell_methods=(CellMethod('point', 'sigma'),),
+        dim_coords_and_dims=[(time, 0), (sigma, 1), (lat, 2), (lon, 3)],
+        aux_coords_and_dims=[(ptop, ()), (psur, (2, 3))],
+        aux_factories=[sigma_factory],
+        ancillary_variables_and_dims=[(anc, 1)],
+        cell_measures_and_dims=[(cell_area, (2, 3))],
+    )
+    return cube
+
+
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('weights', [True, False, None])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_metadata(
+    cube_with_rich_metadata, lazy, weights, normalization
+):
+    """Test `histogram`."""
+    if lazy:
+        cube_with_rich_metadata.data = cube_with_rich_metadata.lazy_data()
+    input_cube = cube_with_rich_metadata.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time'],
+        bins=[0.0, 1.0, 2.0],
+        bin_range=(0.0, 2.0),
+        weights=weights,
+        normalization=normalization,
+    )
+
+    assert input_cube == cube_with_rich_metadata
+    assert result.shape == (1, 1, 1, 2)
+
+    assert result.standard_name is None
+    if normalization == 'sum':
+        assert result.long_name == 'Relative Frequency of Air Temperature'
+        assert result.var_name == 'relative_frequency'
+        assert result.units == '1'
+    elif normalization == 'integral':
+        assert result.long_name == 'Density of Air Temperature'
+        assert result.var_name == 'density'
+        assert result.units == 'K-1'
+    else:
+        assert result.long_name == 'Frequency of Air Temperature'
+        assert result.var_name == 'frequency'
+        assert result.units == '1'
+    assert result.attributes == {'test': '1'}
+    assert result.cell_methods == (CellMethod('point', 'sigma'),)
+
+    assert not result.coords('time', dim_coords=True)
+    for dim_coord in ('sigma', 'lat', 'lon'):
+        assert (
+            result.coord(dim_coord, dim_coords=True) ==
+            input_cube.coord(dim_coord, dim_coords=True)
+        )
+        assert (
+            result.coord_dims(dim_coord) ==
+            (input_cube.coord_dims(dim_coord)[0] - 1,)
+        )
+    assert result.coords('Air Temperature', dim_coords=True)
+    bin_coord = result.coord('Air Temperature')
+    assert result.coord_dims(bin_coord) == (3,)
+    assert bin_coord.standard_name is None
+    assert bin_coord.long_name == 'Air Temperature'
+    assert bin_coord.var_name is None
+    assert bin_coord.units == 'K'
+    assert bin_coord.attributes == {}
+
+    assert result.coords('time', dim_coords=False)
+    assert result.coord_dims('time') == ()
+    assert result.coord('ptop') == input_cube.coord('ptop')
+    assert result.coord('ps') == input_cube.coord('ps')
+    assert len(result.aux_factories) == 1
+    assert isinstance(result.aux_factories[0], AtmosphereSigmaFactory)
+    assert result.ancillary_variables() == input_cube.ancillary_variables()
+    assert result.cell_measures() == input_cube.cell_measures()
+
+
 if __name__ == '__main__':
     unittest.main()

From 07807c125e0bbdbcc5ca22a23110ac5b37c22171 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 5 Apr 2024 14:17:38 +0200
Subject: [PATCH 39/51] Fix tests

---
 esmvalcore/preprocessor/_other.py | 94 ++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 32 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index e3b9101a92..2a457ed640 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -258,44 +258,17 @@ def histogram(
             f"'{normalization}'"
         )
 
-    # Calculate bin edges
-    if bin_range is None:
-        bin_range = dask.compute(
-            cube.core_data().min(), cube.core_data().max()
-        )
-    if isinstance(bins, int):
-        bin_edges = np.linspace(
-            bin_range[0], bin_range[1], bins + 1, dtype=np.float64
-        )
-    else:
-        bin_edges = np.array(bins, dtype=np.float64)
-    finite_bin_range = [bool(np.isfinite(r)) for r in bin_range]
-    if not all(finite_bin_range):
-        raise ValueError(
-            f"Cannot calculate histogram for bin_range={bin_range} (or for "
-            f"fully masked data when `bin_range` is not given)"
-        )
-
-    # Get (normalized) weights
-    coords = get_all_coords(cube, coords)
-    axes = get_all_coord_dims(cube, coords)
-    npx = get_array_module(cube.core_data())
-    if weights is None or weights is False:
-        weights = npx.ones_like(cube.core_data())
-    elif weights is True:
-        weights = get_weights(cube, coords)
-    if normalization is not None:
-        weights = weights / npx.sum(weights, axis=axes, keepdims=True)
-
     # If histogram is calculated over all coordinates, we can use
     # dask.array.histogram and do not need to worry about chunks; otherwise,
     # make sure that the cube is not chunked along the given coordinates
+    coords = get_all_coords(cube, coords)
+    axes = get_all_coord_dims(cube, coords)
     if cube.has_lazy_data() and len(axes) != cube.ndim:
         cube = rechunk_cube(cube, coords)
-    if isinstance(weights, da.Array):
-        weights = weights.rechunk(cube.lazy_data().chunks)
 
     # Calculate histogram
+    weights = _get_histogram_weights(cube, coords, weights, normalization)
+    (bin_range, bin_edges) = _get_bins(cube, bins, bin_range)
     if cube.has_lazy_data():
         func = _calculate_histogram_lazy  # type: ignore
     else:
@@ -318,6 +291,63 @@ def histogram(
     return hist_cube
 
 
+def _get_bins(
+    cube: Cube,
+    bins: int | Sequence[float],
+    bin_range: tuple[float, float] | None,
+) -> tuple[tuple[float, float], np.ndarray]:
+    """Calculate bin range and edges."""
+    if bin_range is None:
+        bin_range = dask.compute(
+            cube.core_data().min(), cube.core_data().max()
+        )
+    if isinstance(bins, int):
+        bin_edges = np.linspace(
+            bin_range[0], bin_range[1], bins + 1, dtype=np.float64
+        )
+    else:
+        bin_edges = np.array(bins, dtype=np.float64)
+
+    finite_bin_range = [bool(np.isfinite(r)) for r in bin_range]
+    if not all(finite_bin_range):
+        raise ValueError(
+            f"Cannot calculate histogram for bin_range={bin_range} (or for "
+            f"fully masked data when `bin_range` is not given)"
+        )
+
+    return (bin_range, bin_edges)
+
+
+def _get_histogram_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    weights: np.ndarray | da.Array | bool | None,
+    normalization: Literal['sum', 'integral'] | None,
+) -> np.ndarray | da.Array:
+    """Get histogram weights."""
+    axes = get_all_coord_dims(cube, coords)
+    npx = get_array_module(cube.core_data())
+
+    weights_array: np.ndarray | da.Array
+    if weights is None or weights is False:
+        weights_array = npx.ones_like(cube.core_data())
+    elif weights is True:
+        weights_array = get_weights(cube, coords)
+    else:
+        weights_array = weights
+
+    if normalization is not None:
+        norm = npx.sum(weights_array, axis=axes, keepdims=True)
+        weights_array = weights_array / norm
+
+    # For lazy arrays, make sure that the chunks of the cube data and weights
+    # match
+    if isinstance(weights_array, da.Array):
+        weights_array = weights_array.rechunk(cube.lazy_data().chunks)
+
+    return weights_array
+
+
 def _calculate_histogram_lazy(
     data: da.Array,
     weights: da.Array,
@@ -493,7 +523,7 @@ def _get_histogram_cube(
         long_name=long_name,
         var_name=var_name,
         units=units,
-        attributes = cube.attributes,
+        attributes=cube.attributes,
         cell_methods=cell_methods,
         dim_coords_and_dims=dim_spec,
         aux_coords_and_dims=[(a, cube.coord_dims(a)) for a in cube.aux_coords],

From be33379e3bdd1e1f6ec54abc21c17bf9e86d59a7 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 5 Apr 2024 18:06:38 +0200
Subject: [PATCH 40/51] Fix codacy and codecov

---
 esmvalcore/preprocessor/_other.py            | 21 +++++----
 tests/unit/preprocessor/_other/test_other.py | 49 ++++++++++++++------
 2 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 2a457ed640..c123ed7f07 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -250,7 +250,10 @@ def histogram(
     """
     # Check arguments
     if isinstance(bins, str):
-        raise TypeError("bins cannot be a str, must be int or Sequence of int")
+        raise TypeError(
+            f"bins cannot be a str (got '{bins}'), must be int or Sequence of "
+            f"int"
+        )
     allowed_norms = (None, 'sum', 'integral')
     if normalization is not None and normalization not in allowed_norms:
         raise ValueError(
@@ -443,12 +446,12 @@ def _calculate_histogram_eager(
     reshaped_weights = reshaped_weights.reshape(*shape_rem_dims, -1)
 
     # Apply vectorized version of np.histogram
-    def _get_hist_values(arr, weights):
+    def _get_hist_values(arr, wgts):
         mask = np.ma.getmaskarray(arr)
         arr = arr[~mask]
-        weights = weights[~mask]
+        wgts = wgts[~mask]
         return np.histogram(
-            arr, bins=bin_edges, range=bin_range, weights=weights
+            arr, bins=bin_edges, range=bin_range, weights=wgts
         )[0]
 
     v_histogram = np.vectorize(_get_hist_values, signature='(n),(n)->(m)')
@@ -458,15 +461,17 @@ def _get_hist_values(arr, weights):
     # the histograms sums to 0)
     hist_sum = hist.sum(axis=-1, keepdims=True)
     mask = np.isclose(hist_sum, 0.0)
-    mask_broadcast = np.broadcast_to(mask, hist.shape)
-    hist = np.ma.array(hist, mask=mask_broadcast)
+    hist = np.ma.array(hist, mask=np.broadcast_to(mask, hist.shape))
 
     # Apply normalization
     if normalization == 'sum':
         hist = hist / np.ma.array(hist_sum, mask=mask)
     elif normalization == 'integral':
-        diffs = np.ma.array(np.diff(bin_edges), dtype=data.dtype)
-        hist = hist / np.ma.array(hist_sum, mask=mask) / diffs
+        hist = (
+            hist /
+            np.ma.array(hist_sum, mask=mask) /
+            np.ma.array(np.diff(bin_edges), dtype=data.dtype)
+        )
 
     return hist
 
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index e362645c52..af56d593e0 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -345,21 +345,6 @@ def test_histogram_weights(cube, lazy, weights, normalization):
     )
 
 
-@pytest.mark.parametrize('lazy', [False, True])
-def test_histogram_fully_masked_no_bin_range(cube, lazy):
-    """Test `histogram`."""
-    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
-    if lazy:
-        cube.data = cube.lazy_data()
-
-    msg = (
-        r"Cannot calculate histogram for bin_range=\(masked, masked\) \(or "
-        r"for fully masked data when `bin_range` is not given\)"
-    )
-    with pytest.raises(ValueError, match=msg):
-        histogram(cube)
-
-
 @pytest.fixture
 def cube_with_rich_metadata():
     """Cube with rich metadata."""
@@ -457,5 +442,39 @@ def test_histogram_metadata(
     assert result.cell_measures() == input_cube.cell_measures()
 
 
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_fully_masked_no_bin_range(cube, lazy):
+    """Test `histogram`."""
+    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
+    if lazy:
+        cube.data = cube.lazy_data()
+
+    msg = (
+        r"Cannot calculate histogram for bin_range=\(masked, masked\) \(or "
+        r"for fully masked data when `bin_range` is not given\)"
+    )
+    with pytest.raises(ValueError, match=msg):
+        histogram(cube)
+
+
+def test_histogram_invalid_bins(cube):
+    """Test `histogram`."""
+    msg = (
+        r"bins cannot be a str \(got 'auto'\), must be int or Sequence of int"
+    )
+    with pytest.raises(TypeError, match=msg):
+        histogram(cube, bins='auto')
+
+
+def test_histogram_invalid_normalization(cube):
+    """Test `histogram`."""
+    msg = (
+        r"Expected one of \(None, 'sum', 'integral'\) for normalization, got "
+        r"'invalid'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        histogram(cube, normalization='invalid')
+
+
 if __name__ == '__main__':
     unittest.main()

From ae70e53b359d0209c7a71f46b0054850b6079586 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Mon, 8 Apr 2024 12:53:35 +0200
Subject: [PATCH 41/51] Use histogram() in EMD calculation

---
 esmvalcore/preprocessor/_compare_with_refs.py | 170 ++++--------------
 .../test_compare_with_refs.py                 |  14 +-
 2 files changed, 46 insertions(+), 138 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 81c7b7d101..1c8ccba295 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -2,7 +2,6 @@
 from __future__ import annotations
 
 import logging
-import string
 from collections.abc import Iterable
 from functools import partial
 from typing import TYPE_CHECKING, Literal, Optional
@@ -24,6 +23,7 @@
     get_all_coords,
     get_array_module,
     get_weights,
+    histogram,
 )
 
 if TYPE_CHECKING:
@@ -496,25 +496,44 @@ def _calculate_emd(
     n_bins: int = 100,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Earth mover's distance."""
+    # Get probability mass functions (using histogram preprocessor)
+    all_data = da.stack([cube.core_data(), reference.core_data()])
+    bin_range = dask.compute(all_data.min(), all_data.max())
+    pmf = histogram(
+        cube,
+        coords=coords,
+        bins=n_bins,
+        bin_range=bin_range,
+        normalization='sum',
+    )
+    pmf_ref = histogram(
+        reference,
+        coords=coords,
+        bins=n_bins,
+        bin_range=bin_range,
+        normalization='sum',
+    )
+    bin_centers = pmf.coord(cube.name()).points
+
     # Make sure that data is not chunked along `coords`
-    cube = rechunk_cube(cube, coords)
-    reference = rechunk_cube(reference, coords)
+    pmf = rechunk_cube(pmf, [cube.name()])
+    pmf_ref = rechunk_cube(pmf_ref, [reference.name()])
 
     # Data
-    axes = get_all_coord_dims(cube, coords)
-    (bins, bin_centers) = _get_bins(cube, reference, n_bins)
-
     if cube.has_lazy_data() and reference.has_lazy_data():
-        func = _calculate_emd_lazy  # type: ignore
+        emd = da.apply_gufunc(
+            _get_emd,
+            '(i),(i),(i)->()',
+            pmf.lazy_data(),
+            pmf_ref.lazy_data(),
+            bin_centers,
+            axes=[(-1,), (-1,), (-1,), ()],
+            output_dtypes=pmf.dtype,
+            vectorize=True,
+        )
     else:
-        func = _calculate_emd_eager  # type: ignore
-    emd = func(
-        cube.core_data(),
-        reference.core_data(),
-        bins,
-        bin_centers,
-        along_axes=axes,
-    )
+        v_get_emd = np.vectorize(_get_emd, signature='(n),(n),(n)->()')
+        emd = v_get_emd(pmf.data, pmf_ref.data, bin_centers)
 
     # Metadata
     metadata = CubeMetadata(
@@ -529,119 +548,8 @@ def _calculate_emd(
     return (emd, metadata)
 
 
-def _get_bins(
-    cube: Cube,
-    reference: Cube,
-    n_bins: int,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Get bins for discretization of data."""
-    all_data = da.stack([cube.core_data(), reference.core_data()])
-    (min_, max_) = dask.compute(all_data.min(), all_data.max())
-    small_value = (max_ - min_) * 0.01 / n_bins
-    bins = np.linspace(min_ - small_value, max_ + small_value, n_bins + 1)
-    bin_centers = np.convolve(bins, np.ones(2), 'valid') / 2.0
-    return (bins, bin_centers)
-
-
-def _calculate_emd_lazy(
-    data: da.Array,
-    ref_data: da.Array,
-    bins: np.ndarray,
-    bin_centers: np.ndarray,
-    *,
-    along_axes: tuple[int, ...],
-) -> da.Array:
-    """Calculate Earth mover's distance along axes (lazy version)."""
-    n_axes = len(along_axes)
-
-    # da.apply_gufunc transposes the input array so that the axes given by the
-    # `axes` argument to da.apply_gufunc are the rightmost dimensions. Thus, we
-    # need to use `along_axes=(ndim-n_axes, ..., ndim-2, ndim-1)` for
-    # _calculate_emd_eager here.
-    axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
-
-    # The call signature depends also on the number of axes in `axes`, and will
-    # be (a,b,...),(a,b,...),(z),(y)->() where a,b,... are the data dimensions
-    # that are collapsed, z is the number of bin edges, and y the number of bin
-    # centers.
-    input_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
-    signature = f"{input_signature},{input_signature},(z),(y)->()"
-
-    emd = da.apply_gufunc(
-        _calculate_emd_eager,
-        signature,
-        data,
-        ref_data,
-        bins,
-        bin_centers,
-        axes=[along_axes, along_axes, (0,), (0,), ()],
-        output_dtypes=data.dtype,
-        along_axes=axes_in_chunk,
-    )
-
-    return emd
-
-
-def _calculate_emd_eager(
-    data: np.ndarray,
-    ref_data: np.ndarray,
-    bins: np.ndarray,
-    bin_centers: np.ndarray,
-    *,
-    along_axes: tuple[int, ...],
-) -> np.ndarray:
-    """Calculate Earth mover's distance along axes (eager version)."""
-    pmf = _get_pmf(data, along_axes, bins)
-    ref_pmf = _get_pmf(ref_data, along_axes, bins)
-
-    # Get vectorized version of scipy.stats.wasserstein_distance that also
-    # handles masks properly and calculate EMD metric (= First Wasserstein
-    # distance)
-    def calc_emd(arr, ref_arr):
-        if np.ma.is_masked(arr) or np.ma.is_masked(ref_arr):
-            return np.nan
-        return wasserstein_distance(bin_centers, bin_centers, arr, ref_arr)
-
-    v_calc_emd = np.vectorize(calc_emd, signature='(n),(n)->()')
-    emd = v_calc_emd(pmf, ref_pmf)
-
-    return np.ma.masked_invalid(emd)
-
-
-def _get_pmf(
-    data: np.ndarray,
-    axes: tuple[int, ...],
-    bins: np.ndarray,
-) -> np.ndarray:
-    """Get probability mass function (PMF) of data along axes.
-
-    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
-    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
-    of bins.
-
-    """
-    # Create array with shape (x1, x2, ..., y) where `y` is the product of all
-    # dimensions in `axes` and the `xi` are the remaining dimensions
-    remaining_dims = tuple(a for a in range(data.ndim) if a not in axes)
-    reshaped_data = np.transpose(data, axes=(*remaining_dims, *axes))
-    shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
-    reshaped_data = reshaped_data.reshape(*shape_rem_dims, -1)
-
-    # Apply vectorized version of np.histogram
-    def _get_hist_values(arr):
-        mask = np.ma.getmaskarray(arr)
-        arr = arr[~mask]
-        return np.histogram(arr, bins=bins, range=(bins[0], bins[-1]))[0]
-
-    v_histogram = np.vectorize(_get_hist_values, signature='(n)->(m)')
-    hist = v_histogram(reshaped_data)
-
-    # Mask points where all input data were masked (these are the ones where
-    # the histograms sums to 0) and normalize histrogram by number of samples
-    # that entered the calculation to get PMF
-    norm = hist.sum(axis=-1, keepdims=True)
-    mask = np.isclose(norm, 0.0)
-    mask_broadcast = np.broadcast_to(mask, hist.shape)
-    pmf = np.ma.array(hist, mask=mask_broadcast) / np.ma.array(norm, mask=mask)
-
-    return pmf
+def _get_emd(arr, ref_arr, bin_centers):
+    """Calculate Earth mover's distance (non-lazy)."""
+    if np.ma.is_masked(arr) or np.ma.is_masked(ref_arr):
+        return np.ma.masked  # this is safe because PMFs will be masked arrays
+    return wasserstein_distance(bin_centers, bin_centers, arr, ref_arr)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index bc18c1f465..7cff433241 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -18,10 +18,10 @@
 def assert_allclose(array_1, array_2):
     """Assert that (masked) array 1 is close to (masked) array 2."""
     if np.ma.is_masked(array_1) or np.ma.is_masked(array_2):
-        np.testing.assert_array_equal(np.ma.getmaskarray(array_1),
-                                      np.ma.getmaskarray(array_2))
-        mask = np.ma.getmaskarray(array_1)
-        np.testing.assert_allclose(array_1[~mask], array_2[~mask])
+        mask_1 = np.ma.getmaskarray(array_1)
+        mask_2 = np.ma.getmaskarray(array_2)
+        np.testing.assert_equal(mask_1, mask_2)
+        np.testing.assert_allclose(array_1[~mask_1], array_2[~mask_2])
     else:
         np.testing.assert_allclose(array_1, array_2)
 
@@ -354,7 +354,7 @@ def test_reference_none_cubes(regular_cubes):
     ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
     ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
     ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
-    ('emd', 1.9866472482681274, 0.0, 'EMD', 'emd_tas', 'K'),
+    ('emd', 1.98625, 0.0, 'EMD', 'emd_tas', 'K'),
 ]
 AREA_WEIGHTS = CellMeasure(
     np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
@@ -467,7 +467,7 @@ def test_distance_metric(
         '1',
     ),
     ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
-    ('emd', [0.980196, 2.9930985], 'EMD', 'emd_tas', 'K'),
+    ('emd', [0.98, 2.9925], 'EMD', 'emd_tas', 'K'),
 ]
 
 
@@ -655,7 +655,7 @@ def test_distance_metric_fully_masked_data(
         assert not out_cube.has_lazy_data()
     assert out_cube.dtype == np.float64
 
-    expected_data = np.ma.masked_invalid(np.nan)
+    expected_data = np.ma.masked_all(())
     assert_allclose(out_cube.data, expected_data)
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name

From 1f8c927920da86400db35fa97ea996788e9e72a8 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Mon, 8 Apr 2024 13:37:35 +0200
Subject: [PATCH 42/51] Added weighted EMD

---
 doc/recipe/preprocessor.rst                   | 57 +++++++++++--------
 esmvalcore/_recipe/check.py                   |  5 +-
 esmvalcore/preprocessor/_compare_with_refs.py | 40 ++++++++-----
 tests/integration/recipe/test_recipe.py       |  5 +-
 .../test_compare_with_refs.py                 | 25 ++++----
 5 files changed, 79 insertions(+), 53 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 44adb16289..7db8db8737 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2541,47 +2541,47 @@ recipe:
 * ``metric`` (:obj:`str`): Distance metric that is calculated.
   Must be one of
 
-  * ``'weighted_rmse'``: `Weighted root mean square error`_.
+  * ``'rmse'``: `Unweighted root mean square error`_.
 
   .. math::
 
-    WRMSE = \sqrt{\sum_{i=1}^N w_i \left( x_i - r_i \right)^2}
+    RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N \left( x_i - r_i \right)^2}
 
-  * ``'rmse'``: `Unweighted root mean square error`_.
+  * ``'weighted_rmse'``: `Weighted root mean square error`_.
 
   .. math::
 
-    RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N \left( x_i - r_i \right)^2}
+    WRMSE = \sqrt{\sum_{i=1}^N w_i \left( x_i - r_i \right)^2}
 
-  * ``'weighted_pearsonr'``: `Weighted Pearson correlation coefficient`_.
+  * ``'pearsonr'``: `Unweighted Pearson correlation coefficient`_.
 
   .. math::
 
     r = \frac{
       \sum_{i=1}^N
-      w_i \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+      \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
     }{
-      \sqrt{\sum_{i=1}^N w_i \left( x_i - \bar{x} \right)^2}
-      \sqrt{\sum_{i=1}^N w_i \left( r_i - \bar{r} \right)^2}
+      \sqrt{\sum_{i=1}^N \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N \left( r_i - \bar{r} \right)^2}
     }
 
-  * ``'pearsonr'``: `Unweighted Pearson correlation coefficient`_.
+  * ``'weighted_pearsonr'``: `Weighted Pearson correlation coefficient`_.
 
   .. math::
 
     r = \frac{
       \sum_{i=1}^N
-      \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+      w_i \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
     }{
-      \sqrt{\sum_{i=1}^N \left( x_i - \bar{x} \right)^2}
-      \sqrt{\sum_{i=1}^N \left( r_i - \bar{r} \right)^2}
+      \sqrt{\sum_{i=1}^N w_i \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N w_i \left( r_i - \bar{r} \right)^2}
     }
 
-  * ``'emd'``: `Earth mover's distance`_, also known as first Wasserstein
-    metric `W`\ :sub:`1`.
-    The Wasserstein metric measures distances between two probability
-    distributions.
-    Here, we first create discrete probability distributions of the input data
+
+  * ``'emd'``: `Unweighted Earth mover's distance`_ (EMD).
+    The EMD is also known as first Wasserstein metric `W`\ :sub:`1`, which is a
+    metric that measures distances between two probability distributions.
+    For this, discrete probability distributions of the input data are created
     through binning, which are then used as input for the Wasserstein metric.
     The metric is also known as `Earth mover's distance` since, intuitively, it
     can be seen as the minimum "cost" of turning one pile of earth into another
@@ -2597,6 +2597,12 @@ recipe:
     \gamma_{ij} \lvert X_i - R_i \rvert \\
     \textrm{with} ~~ \gamma 1 = p_X(X);~ \gamma^T 1 = p_R(R)
 
+  * ``'weighted_emd'``: `Weighted Earth mover's distance`_.
+    Similar to the unweighted EMD (see above), but here weights are considered
+    when calculating the probability distributions (i.e., instead of 1, each
+    element contributes a given weight in the bin count; see also `weights`
+    argument of :func:`numpy.histogram`).
+
   Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
   and a corresponding reference, respectively (a bar over a variable denotes
   its arithmetic/weighted mean [the latter for weighted metrics]).
@@ -2636,12 +2642,13 @@ recipe:
 * Other parameters are directly used for the metric calculation.
   The following keyword arguments are supported:
 
-  * `weighted_rmse` and `rmse`: none.
-  * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all keyword
+  * `rmse` and `weighted_rmse`: none.
+  * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all keyword
     arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
     for more details on these arguments).
-  * `emd`: ``n_bins`` = number of bins used to create discrete probability
-    distribution of data before calculating the EMD (:obj:`int`, default: 100).
+  * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
+    discrete probability distribution of data before calculating the EMD
+    (:obj:`int`, default: 100).
 
 Example:
 
@@ -2658,15 +2665,17 @@ Example:
 
 See also :func:`esmvalcore.preprocessor.distance_metric`.
 
-.. _Weighted root mean square error: https://en.wikipedia.org/wiki/
-  Root-mean-square_deviation
 .. _Unweighted root mean square error: https://en.wikipedia.org/wiki/
   Root-mean-square_deviation
+.. _Weighted root mean square error: https://en.wikipedia.org/wiki/
+  Root-mean-square_deviation
 .. _Unweighted Pearson correlation coefficient: https://en.wikipedia.org/
   wiki/Pearson_correlation_coefficient
 .. _Weighted Pearson correlation coefficient: https://en.wikipedia.org/
   wiki/Pearson_correlation_coefficient
-.. _Earth mover's distance: https://pythonot.github.io/
+.. _Unweighted Earth mover's distance: https://pythonot.github.io/
+  quickstart.html#computing-wasserstein-distance
+.. _Weighted Earth mover's distance: https://pythonot.github.io/
   quickstart.html#computing-wasserstein-distance
 
 
diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index 829413ff45..c72bcce063 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -427,11 +427,12 @@ def _check_literal(
     step='distance_metric',
     option='metric',
     allowed_values=(
-        'weighted_rmse',
         'rmse',
-        'weighted_pearsonr',
+        'weighted_rmse',
         'pearsonr',
+        'weighted_pearsonr',
         'emd',
+        'weighted_emd',
     ),
 )
 
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index 1c8ccba295..e9f98e4319 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -206,11 +206,12 @@ def _calculate_bias(cube: Cube, reference: Cube, bias_type: BiasType) -> Cube:
 
 
 MetricType = Literal[
-    'weighted_rmse',
     'rmse',
-    'weighted_pearsonr',
+    'weighted_rmse',
     'pearsonr',
+    'weighted_pearsonr',
     'emd',
+    'weighted_emd',
 ]
 
 
@@ -246,12 +247,15 @@ def distance_metric(
     metric:
         Distance metric that is calculated. Must be one of
 
-        * ``'weighted_rmse'``: Weighted root mean square error.
-        * ``'rmse'``: Unweighted root mean square error.
-        * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient.
-        * ``'pearsonr'``: Unweighted Pearson correlation coefficient.
-        * ``'emd'``: Earth mover's distance, also known as first Wasserstein
-          metric `W`\ :sub:`1`.
+        * ``'rmse'``: Unweighted root mean square error
+        * ``'weighted_rmse'``: Weighted root mean square error
+        * ``'pearsonr'``: Unweighted Pearson correlation coefficient
+        * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient
+        * ``'emd'``: Unweighted Earth mover's distance
+        * ``'weighted_emd'``: Weighted Earth mover's distance
+
+        The Earth mover's distance is also known as first Wasserstein metric
+        `W`\ :sub:`1`.
 
         A detailed description of these metrics can be found :ref:`here
         <list_of_distance_metrics>`.
@@ -285,12 +289,12 @@ def distance_metric(
         Additional options for the metric calculation. The following keyword
         arguments are supported:
 
-        * `weighted_rmse` and `rmse`: none.
-        * `weighted_pearsonr` and `pearsonr`: ``mdtol``, ``common_mask`` (all
+        * `rmse` and `weighted_rmse`: none.
+        * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
           see that link for more details on these arguments).
-        * `emd`: ``n_bins`` = number of bins used to create discrete
-          probability distribition of data before calculating the EMD
+        * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
+          discrete probability distribition of data before calculating the EMD
           (:obj:`int`, default: 100).
 
     Returns
@@ -405,13 +409,14 @@ def _calculate_metric(
     # possible since some operations (e.g., sqrt()) are not available for cubes
     coords = get_all_coords(cube, coords)
     metrics_funcs = {
-        'weighted_rmse': partial(_calculate_rmse, weighted=True, **kwargs),
         'rmse': partial(_calculate_rmse, weighted=False, **kwargs),
+        'weighted_rmse': partial(_calculate_rmse, weighted=True, **kwargs),
+        'pearsonr': partial(_calculate_pearsonr, weighted=False, **kwargs),
         'weighted_pearsonr': partial(
             _calculate_pearsonr, weighted=True, **kwargs
         ),
-        'pearsonr': partial(_calculate_pearsonr, weighted=False, **kwargs),
-        'emd': partial(_calculate_emd, **kwargs),
+        'emd': partial(_calculate_emd, weighted=False, **kwargs),
+        'weighted_emd': partial(_calculate_emd, weighted=True, **kwargs),
     }
     if metric not in metrics_funcs:
         raise ValueError(
@@ -494,8 +499,11 @@ def _calculate_emd(
     coords: Iterable[Coord] | Iterable[str],
     *,
     n_bins: int = 100,
+    weighted: bool,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Earth mover's distance."""
+    weights = get_weights(cube, coords) if weighted else None
+
     # Get probability mass functions (using histogram preprocessor)
     all_data = da.stack([cube.core_data(), reference.core_data()])
     bin_range = dask.compute(all_data.min(), all_data.max())
@@ -504,6 +512,7 @@ def _calculate_emd(
         coords=coords,
         bins=n_bins,
         bin_range=bin_range,
+        weights=weights,
         normalization='sum',
     )
     pmf_ref = histogram(
@@ -511,6 +520,7 @@ def _calculate_emd(
         coords=coords,
         bins=n_bins,
         bin_range=bin_range,
+        weights=weights,
         normalization='sum',
     )
     bin_centers = pmf.coord(cube.name()).points
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
index 47f3c074e7..385c14cf95 100644
--- a/tests/integration/recipe/test_recipe.py
+++ b/tests/integration/recipe/test_recipe.py
@@ -3281,8 +3281,9 @@ def test_invalid_metric(tmp_path, patched_datafinder, session):
             scripts: null
         """)
     msg = (
-        "Expected one of ('weighted_rmse', 'rmse', 'weighted_pearsonr', "
-        "'pearsonr', 'emd') for `metric`, got 'INVALID'"
+        "Expected one of ('rmse', 'weighted_rmse', 'pearsonr', "
+        "'weighted_pearsonr', 'emd', 'weighted_emd') for `metric`, got "
+        "'INVALID'"
     )
     with pytest.raises(RecipeError) as exc:
         get_recipe(tmp_path, content, session)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 7cff433241..21a23ae745 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -350,11 +350,12 @@ def test_reference_none_cubes(regular_cubes):
 
 
 TEST_DISTANCE_METRICS = [
-    ('weighted_rmse', 2.0, 0.0, 'RMSE', 'rmse_tas', 'K'),
     ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
-    ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('weighted_rmse', 2.0, 0.0, 'RMSE', 'rmse_tas', 'K'),
     ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
     ('emd', 1.98625, 0.0, 'EMD', 'emd_tas', 'K'),
+    ('weighted_emd', 0.9975, 0.0, 'EMD', 'emd_tas', 'K'),
 ]
 AREA_WEIGHTS = CellMeasure(
     np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
@@ -457,8 +458,9 @@ def test_distance_metric(
 
 
 TEST_DISTANCE_METRICS_LAZY = [
-    ('weighted_rmse', [1.2278657, 3.0784798], 'RMSE', 'rmse_tas', 'K'),
     ('rmse', [1.224744871, 3.082207001], 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_rmse', [1.2278657, 3.0784798], 'RMSE', 'rmse_tas', 'K'),
+    ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
     (
         'weighted_pearsonr',
         [np.nan, 0.7745946],
@@ -466,8 +468,8 @@ def test_distance_metric(
         'pearsonr_tas',
         '1',
     ),
-    ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
     ('emd', [0.98, 2.9925], 'EMD', 'emd_tas', 'K'),
+    ('weighted_emd', [0.9837506, 2.9888833], 'EMD', 'emd_tas', 'K'),
 ]
 
 
@@ -667,11 +669,12 @@ def test_distance_metric_fully_masked_data(
 
 
 TEST_METRICS = [
-    'weighted_rmse',
     'rmse',
-    'weighted_pearsonr',
+    'weighted_rmse',
     'pearsonr',
+    'weighted_pearsonr',
     'emd',
+    'weighted_emd',
 ]
 
 
@@ -713,8 +716,9 @@ def test_invalid_metric(regular_cubes, ref_cubes):
         PreprocessorFile(ref_cubes, 'REF', {'reference_for_metric': True}),
     }
     msg = (
-        r"Expected one of \['weighted_rmse', 'rmse', 'weighted_pearsonr', "
-        r"'pearsonr', 'emd'\] for metric, got 'invalid'"
+        r"Expected one of \['rmse', 'weighted_rmse', 'pearsonr', "
+        r"'weighted_pearsonr', 'emd', 'weighted_emd'\] for metric, got "
+        r"'invalid'"
     )
     with pytest.raises(ValueError, match=msg):
         distance_metric(products, 'invalid')
@@ -771,11 +775,12 @@ def test_distance_metric_non_matching_dims(regular_cubes, metric):
 @pytest.mark.parametrize(
     'metric,error',
     [
-        ('weighted_rmse', True),
         ('rmse', False),
-        ('weighted_pearsonr', True),
+        ('weighted_rmse', True),
         ('pearsonr', False),
+        ('weighted_pearsonr', True),
         ('emd', False),
+        ('weighted_emd', True),
     ]
 )
 def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):

From 85652fbc3c6991dcd224bd9b271aa3ea6db29337 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Mon, 8 Apr 2024 13:50:28 +0200
Subject: [PATCH 43/51] Added histogram to list of preproc

---
 esmvalcore/preprocessor/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index a27b265b44..7ac7aa556a 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -46,7 +46,7 @@
     mask_outside_range,
 )
 from ._multimodel import ensemble_statistics, multi_model_statistics
-from ._other import clip
+from ._other import clip, histogram
 from ._regrid import (
     extract_coordinate_points,
     extract_levels,
@@ -175,6 +175,8 @@
     'linear_trend_stderr',
     # Convert units
     'convert_units',
+    # Histograms
+    'histogram',
     # Ensemble statistics
     'ensemble_statistics',
     # Multi model statistics

From 2f58f8fd2480e450d44bb9f7717f43c30a30c848 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Tue, 16 Apr 2024 10:23:25 +0200
Subject: [PATCH 44/51] Fix doc build

---
 esmvalcore/preprocessor/_other.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index c123ed7f07..d84286bc21 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -243,7 +243,7 @@ def histogram(
         Invalid `normalization` or `bin_range` given or `bin_range` is ``None``
         and data is fully masked.
     iris.exceptions.CoordinateNotFoundError
-        ``longitude` is not found in cube if `weights=True`, `latitude` is in
+        `longitude` is not found in cube if `weights=True`, `latitude` is in
         `coords`, and no `cell_area` is given as
         :ref:`supplementary_variables`.
 

From 8cf2cf6e99cb0929908f4e5d46aafdf3f753bbcf Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 17 Apr 2024 15:24:54 +0200
Subject: [PATCH 45/51] Use common_mask=True for pearsonr

---
 doc/recipe/preprocessor.rst                               | 2 ++
 esmvalcore/preprocessor/_compare_with_refs.py             | 8 +++++++-
 .../_compare_with_refs/test_compare_with_refs.py          | 5 ++++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index c89e4768c7..08c5c77cdb 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2670,6 +2670,8 @@ recipe:
   * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all keyword
     arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
     for more details on these arguments).
+    Note: in contrast to :func:`~iris.analysis.stats.pearsonr`,
+    ``common_mask=True`` by default.
   * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
     discrete probability distribution of data before calculating the EMD
     (:obj:`int`, default: 100).
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index e9f98e4319..d1f11294b8 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -292,7 +292,9 @@ def distance_metric(
         * `rmse` and `weighted_rmse`: none.
         * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
-          see that link for more details on these arguments).
+          see that link for more details on these arguments). Note: in contrast
+          to :func:`~iris.analysis.stats.pearsonr`, ``common_mask=True`` by
+          default).
         * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
           discrete probability distribition of data before calculating the EMD
           (:obj:`int`, default: 100).
@@ -471,6 +473,10 @@ def _calculate_pearsonr(
     **kwargs,
 ) -> tuple[np.ndarray | da.Array, CubeMetadata]:
     """Calculate Pearson correlation coefficient."""
+    # Here, we want to use common_mask=True in iris.analysis.stats.pearsonr
+    # (iris' default is common_mask=False)
+    kwargs.setdefault('common_mask', True)
+
     # Data
     weights = get_weights(cube, coords) if weighted else None
     res_cube = iris.analysis.stats.pearsonr(
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
index 21a23ae745..7def9afe1b 100644
--- a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -515,7 +515,10 @@ def test_distance_metric_lazy(
     assert out_cube.shape == (2,)
     assert out_cube.dtype == np.float32
     assert out_cube.has_lazy_data()
-    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
+    assert_allclose(
+        out_cube.data,
+        np.ma.masked_invalid(np.array(data, dtype=np.float32)),
+    )
     assert out_cube.coord('time') == regular_cubes[0].coord('time')
     assert out_cube.var_name == var_name
     assert out_cube.long_name == long_name

From 9d2c206c8c791e7c873dc0c531dca11705f6a49b Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Wed, 17 Apr 2024 15:29:35 +0200
Subject: [PATCH 46/51] Fixed typo

---
 esmvalcore/preprocessor/_compare_with_refs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index d1f11294b8..f2710adbfb 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -294,7 +294,7 @@ def distance_metric(
           keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
           see that link for more details on these arguments). Note: in contrast
           to :func:`~iris.analysis.stats.pearsonr`, ``common_mask=True`` by
-          default).
+          default.
         * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
           discrete probability distribition of data before calculating the EMD
           (:obj:`int`, default: 100).

From 9a2a2e26de6739b58152f4924d1738398de82ffd Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 26 Apr 2024 16:11:40 +0200
Subject: [PATCH 47/51] Moved shared preprocessor functions to _shared module

---
 esmvalcore/_recipe/recipe.py                  |   2 +-
 esmvalcore/preprocessor/_area.py              | 121 +------
 esmvalcore/preprocessor/_compare_with_refs.py |   4 +-
 esmvalcore/preprocessor/_multimodel.py        |   7 +-
 esmvalcore/preprocessor/_other.py             | 127 +------
 esmvalcore/preprocessor/_regrid.py            |  54 +--
 esmvalcore/preprocessor/_shared.py            | 316 +++++++++++++++++-
 esmvalcore/preprocessor/_time.py              |  35 +-
 esmvalcore/preprocessor/_volume.py            |   4 +-
 tests/unit/preprocessor/_area/test_area.py    |  19 --
 tests/unit/preprocessor/_other/test_other.py  |  49 +--
 tests/unit/preprocessor/test_shared.py        |  94 ++++++
 12 files changed, 432 insertions(+), 400 deletions(-)

diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
index dab27c8597..2c1d01cd20 100644
--- a/esmvalcore/_recipe/recipe.py
+++ b/esmvalcore/_recipe/recipe.py
@@ -37,13 +37,13 @@
 )
 from esmvalcore.preprocessor._area import _update_shapefile_path
 from esmvalcore.preprocessor._multimodel import _get_stat_identifier
-from esmvalcore.preprocessor._other import _group_products
 from esmvalcore.preprocessor._regrid import (
     _spec_to_latlonvals,
     get_cmor_levels,
     get_reference_levels,
     parse_cell_spec,
 )
+from esmvalcore.preprocessor._shared import _group_products
 
 from . import check
 from .from_datasets import datasets_to_recipe
diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py
index 4a9ecebdb6..dd2f9b4b4b 100644
--- a/esmvalcore/preprocessor/_area.py
+++ b/esmvalcore/preprocessor/_area.py
@@ -6,7 +6,6 @@
 from __future__ import annotations
 
 import logging
-import warnings
 from pathlib import Path
 from typing import TYPE_CHECKING, Iterable, Literal, Optional
 
@@ -16,16 +15,15 @@
 import shapely
 import shapely.ops
 from dask import array as da
-from iris.coords import AuxCoord, CellMeasure
+from iris.coords import AuxCoord
 from iris.cube import Cube, CubeList
-from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError
+from iris.exceptions import CoordinateNotFoundError
 
-from esmvalcore.preprocessor._regrid import broadcast_to_shape
 from esmvalcore.preprocessor._shared import (
     get_iris_aggregator,
     get_normalized_cube,
-    guess_bounds,
     preserve_float_dtype,
+    try_adding_calculated_cell_area,
     update_weights_kwargs,
 )
 from esmvalcore.preprocessor._supplementary_vars import (
@@ -290,117 +288,6 @@ def meridional_statistics(
     return result
 
 
-def compute_area_weights(cube):
-    """Compute area weights."""
-    with warnings.catch_warnings(record=True) as caught_warnings:
-        warnings.filterwarnings(
-            'always',
-            message="Using DEFAULT_SPHERICAL_EARTH_RADIUS.",
-            category=UserWarning,
-            module='iris.analysis.cartography',
-        )
-        # TODO: replace the following line with
-        # weights = iris.analysis.cartography.area_weights(
-        #     cube, compute=not cube.has_lazy_data()
-        # )
-        # once https://github.com/SciTools/iris/pull/5658 is available
-        weights = _get_area_weights(cube)
-
-        for warning in caught_warnings:
-            logger.debug(
-                "%s while computing area weights of the following cube:\n%s",
-                warning.message, cube)
-    return weights
-
-
-def _get_area_weights(cube: Cube) -> np.ndarray | da.Array:
-    """Get area weights.
-
-    For non-lazy data, simply use the according iris function. For lazy data,
-    calculate area weights for a single lat-lon slice and broadcast it to the
-    correct shape.
-
-    Note
-    ----
-    This is a temporary workaround to get lazy area weights. Can be removed
-    once https://github.com/SciTools/iris/pull/5658 is available.
-
-    """
-    if not cube.has_lazy_data():
-        return iris.analysis.cartography.area_weights(cube)
-
-    lat_lon_dims = sorted(
-        tuple(set(cube.coord_dims('latitude') + cube.coord_dims('longitude')))
-    )
-    lat_lon_slice = next(cube.slices(['latitude', 'longitude'], ordered=False))
-    weights_2d = iris.analysis.cartography.area_weights(lat_lon_slice)
-    weights = broadcast_to_shape(
-        da.array(weights_2d),
-        cube.shape,
-        lat_lon_dims,
-        chunks=cube.lazy_data().chunks,
-    )
-    return weights
-
-
-def _try_adding_calculated_cell_area(cube: Cube) -> None:
-    """Try to add calculated cell measure 'cell_area' to cube (in-place)."""
-    if cube.cell_measures('cell_area'):
-        return
-
-    logger.debug(
-        "Found no cell measure 'cell_area' in cube %s. Check availability of "
-        "supplementary variables",
-        cube.summary(shorten=True),
-    )
-    logger.debug("Attempting to calculate grid cell area")
-
-    regular_grid = all([
-        cube.coord('latitude').points.ndim == 1,
-        cube.coord('longitude').points.ndim == 1,
-        cube.coord_dims('latitude') != cube.coord_dims('longitude'),
-    ])
-    rotated_pole_grid = all([
-        cube.coord('latitude').points.ndim == 2,
-        cube.coord('longitude').points.ndim == 2,
-        cube.coords('grid_latitude'),
-        cube.coords('grid_longitude'),
-    ])
-
-    # For regular grids, calculate grid cell areas with iris function
-    if regular_grid:
-        cube = guess_bounds(cube, ['latitude', 'longitude'])
-        logger.debug("Calculating grid cell areas for regular grid")
-        cell_areas = compute_area_weights(cube)
-
-    # For rotated pole grids, use grid_latitude and grid_longitude to calculate
-    # grid cell areas
-    elif rotated_pole_grid:
-        cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude'])
-        cube_tmp = cube.copy()
-        cube_tmp.remove_coord('latitude')
-        cube_tmp.coord('grid_latitude').rename('latitude')
-        cube_tmp.remove_coord('longitude')
-        cube_tmp.coord('grid_longitude').rename('longitude')
-        logger.debug("Calculating grid cell areas for rotated pole grid")
-        cell_areas = compute_area_weights(cube_tmp)
-
-    # For all other cases, grid cell areas cannot be calculated
-    else:
-        logger.error(
-            "Supplementary variables are needed to calculate grid cell "
-            "areas for irregular or unstructured grid of cube %s",
-            cube.summary(shorten=True),
-        )
-        raise CoordinateMultiDimError(cube.coord('latitude'))
-
-    # Add new cell measure
-    cell_measure = CellMeasure(
-        cell_areas, standard_name='cell_area', units='m2', measure='area',
-    )
-    cube.add_cell_measure(cell_measure, np.arange(cube.ndim))
-
-
 @register_supplementaries(
     variables=['areacella', 'areacello'],
     required='prefer_at_least_one',
@@ -458,7 +345,7 @@ def area_statistics(
     # Get aggregator and correct kwargs (incl. weights)
     (agg, agg_kwargs) = get_iris_aggregator(operator, **operator_kwargs)
     agg_kwargs = update_weights_kwargs(
-        agg, agg_kwargs, 'cell_area', cube, _try_adding_calculated_cell_area
+        agg, agg_kwargs, 'cell_area', cube, try_adding_calculated_cell_area
     )
 
     result = cube.collapsed(['latitude', 'longitude'], agg, **agg_kwargs)
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index f2710adbfb..df7708306e 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -18,12 +18,12 @@
 
 from esmvalcore.iris_helpers import rechunk_cube
 from esmvalcore.preprocessor._io import concatenate
-from esmvalcore.preprocessor._other import (
+from esmvalcore.preprocessor._other import histogram
+from esmvalcore.preprocessor._shared import (
     get_all_coord_dims,
     get_all_coords,
     get_array_module,
     get_weights,
-    histogram,
 )
 
 if TYPE_CHECKING:
diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
index 5390517a26..d1e0d90e74 100644
--- a/esmvalcore/preprocessor/_multimodel.py
+++ b/esmvalcore/preprocessor/_multimodel.py
@@ -26,13 +26,14 @@
 from iris.util import equalise_attributes, new_axis
 
 from esmvalcore.iris_helpers import date2num
+from esmvalcore.preprocessor._shared import (
+    _group_products,
+    get_iris_aggregator,
+)
 from esmvalcore.preprocessor._supplementary_vars import (
     remove_supplementary_variables,
 )
 
-from ._other import _group_products
-from ._shared import get_iris_aggregator
-
 if TYPE_CHECKING:
     from esmvalcore.preprocessor import PreprocessorFile
 
diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index d84286bc21..fa5a83033d 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -3,7 +3,6 @@
 
 import logging
 import string
-from collections import defaultdict
 from collections.abc import Iterable, Sequence
 from typing import Literal
 
@@ -13,12 +12,14 @@
 import numpy as np
 from iris.coords import Coord, DimCoord
 from iris.cube import Cube
-from iris.exceptions import CoordinateNotFoundError
-from iris.util import broadcast_to_shape
 
 from esmvalcore.iris_helpers import rechunk_cube
-from esmvalcore.preprocessor._area import _try_adding_calculated_cell_area
-from esmvalcore.preprocessor._time import get_time_weights
+from esmvalcore.preprocessor._shared import (
+    get_all_coord_dims,
+    get_all_coords,
+    get_array_module,
+    get_weights,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -53,122 +54,6 @@ def clip(cube, minimum=None, maximum=None):
     return cube
 
 
-def _groupby(iterable, keyfunc):
-    """Group iterable by key function.
-
-    The items are grouped by the value that is returned by the `keyfunc`
-
-    Parameters
-    ----------
-    iterable : list, tuple or iterable
-        List of items to group
-    keyfunc : callable
-        Used to determine the group of each item. These become the keys
-        of the returned dictionary
-
-    Returns
-    -------
-    dict
-        Returns a dictionary with the grouped values.
-    """
-    grouped = defaultdict(set)
-    for item in iterable:
-        key = keyfunc(item)
-        grouped[key].add(item)
-
-    return grouped
-
-
-def _group_products(products, by_key):
-    """Group products by the given list of attributes."""
-    def grouper(product):
-        return product.group(by_key)
-
-    grouped = _groupby(products, keyfunc=grouper)
-    return grouped.items()
-
-
-def get_array_module(*args):
-    """Return the best matching array module.
-
-    If at least one of the arguments is a :class:`dask.array.Array` object,
-    the :mod:`dask.array` module is returned. In all other cases the
-    :mod:`numpy` module is returned.
-    """
-    for arg in args:
-        if isinstance(arg, da.Array):
-            return da
-    return np
-
-
-def get_all_coords(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str] | None,
-) -> Iterable[Coord] | Iterable[str]:
-    """Get all desired coordinates in a cube."""
-    if coords is None:
-        coords = [c.name() for c in cube.dim_coords]
-        if len(coords) != cube.ndim:
-            raise ValueError(
-                f"If coords=None is specified, the cube "
-                f"{cube.summary(shorten=True)} must not have unnamed "
-                f"dimensions"
-            )
-    return coords
-
-
-def get_all_coord_dims(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str],
-) -> tuple[int, ...]:
-    """Get sorted list of all coordinate dimensions from coordinates."""
-    all_coord_dims = []
-    for coord in coords:
-        all_coord_dims.extend(cube.coord_dims(coord))
-    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
-    return tuple(sorted_all_coord_dims)
-
-
-def get_weights(
-    cube: Cube,
-    coords: Iterable[Coord] | Iterable[str],
-) -> np.ndarray | da.Array:
-    """Calculate suitable weights for given coordinates."""
-    npx = get_array_module(cube.core_data())
-    weights = npx.ones_like(cube.core_data())
-
-    # Time weights: lengths of time interval
-    if 'time' in coords:
-        weights *= broadcast_to_shape(
-            npx.array(get_time_weights(cube)),
-            cube.shape,
-            cube.coord_dims('time'),
-        )
-
-    # Latitude weights: cell areas
-    if 'latitude' in coords:
-        cube = cube.copy()  # avoid overwriting input cube
-        if (
-                not cube.cell_measures('cell_area') and
-                not cube.coords('longitude')
-        ):
-            raise CoordinateNotFoundError(
-                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
-                f"coordinate to calculate cell area weights for weighted "
-                f"distance metric over coordinates {coords} (alternatively, "
-                f"a `cell_area` can be given to the cube as supplementary "
-                f"variable)"
-            )
-        _try_adding_calculated_cell_area(cube)
-        weights *= broadcast_to_shape(
-            cube.cell_measure('cell_area').core_data(),
-            cube.shape,
-            cube.cell_measure_dims('cell_area'),
-        )
-
-    return weights
-
-
 def histogram(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str] | None = None,
diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py
index de43db1427..52d7441492 100644
--- a/esmvalcore/preprocessor/_regrid.py
+++ b/esmvalcore/preprocessor/_regrid.py
@@ -11,7 +11,6 @@
 import warnings
 from copy import deepcopy
 from decimal import Decimal
-from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional
 
@@ -30,8 +29,11 @@
 from esmvalcore.cmor.table import CMOR_TABLES
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
 from esmvalcore.iris_helpers import has_irregular_grid, has_unstructured_grid
-from esmvalcore.preprocessor._other import get_array_module
-from esmvalcore.preprocessor._shared import preserve_float_dtype
+from esmvalcore.preprocessor._shared import (
+    broadcast_to_shape,
+    get_array_module,
+    preserve_float_dtype,
+)
 from esmvalcore.preprocessor._supplementary_vars import (
     add_ancillary_variable,
     add_cell_measure,
@@ -1024,52 +1026,6 @@ def _create_cube(src_cube, data, src_levels, levels):
     return result
 
 
-def is_lazy_masked_data(array):
-    """Similar to `iris._lazy_data.is_lazy_masked_data`."""
-    return isinstance(array, da.Array) and isinstance(
-        da.utils.meta_from_array(array), np.ma.MaskedArray)
-
-
-def broadcast_to_shape(array, shape, dim_map, chunks=None):
-    """Copy of `iris.util.broadcast_to_shape` that allows specifying chunks."""
-    if isinstance(array, da.Array):
-        if chunks is not None:
-            chunks = list(chunks)
-            for src_idx, tgt_idx in enumerate(dim_map):
-                # Only use the specified chunks along new dimensions or on
-                # dimensions that have size 1 in the source array.
-                if array.shape[src_idx] != 1:
-                    chunks[tgt_idx] = array.chunks[src_idx]
-        broadcast = partial(da.broadcast_to, shape=shape, chunks=chunks)
-    else:
-        broadcast = partial(np.broadcast_to, shape=shape)
-
-    n_orig_dims = len(array.shape)
-    n_new_dims = len(shape) - n_orig_dims
-    array = array.reshape(array.shape + (1,) * n_new_dims)
-
-    # Get dims in required order.
-    array = np.moveaxis(array, range(n_orig_dims), dim_map)
-    new_array = broadcast(array)
-
-    if np.ma.isMA(array):
-        # broadcast_to strips masks so we need to handle them explicitly.
-        mask = np.ma.getmask(array)
-        if mask is np.ma.nomask:
-            new_mask = np.ma.nomask
-        else:
-            new_mask = broadcast(mask)
-        new_array = np.ma.array(new_array, mask=new_mask)
-
-    elif is_lazy_masked_data(array):
-        # broadcast_to strips masks so we need to handle them explicitly.
-        mask = da.ma.getmaskarray(array)
-        new_mask = broadcast(mask)
-        new_array = da.ma.masked_array(new_array, new_mask)
-
-    return new_array
-
-
 def _vertical_interpolate(cube, src_levels, levels, interpolation,
                           extrapolation):
     """Perform vertical interpolation."""
diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py
index 0a5f5627dd..773ca59476 100644
--- a/esmvalcore/preprocessor/_shared.py
+++ b/esmvalcore/preprocessor/_shared.py
@@ -8,15 +8,17 @@
 import logging
 import re
 import warnings
-from collections.abc import Callable
-from functools import wraps
+from collections import defaultdict
+from collections.abc import Callable, Iterable
+from functools import partial, wraps
 from typing import Any, Literal, Optional
 
 import dask.array as da
 import iris.analysis
 import numpy as np
-from iris.coords import DimCoord
+from iris.coords import CellMeasure, Coord, DimCoord
 from iris.cube import Cube
+from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError
 
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
 from esmvalcore.typing import DataType
@@ -24,7 +26,6 @@
 logger = logging.getLogger(__name__)
 
 
-# guess bounds tool
 def guess_bounds(cube, coords):
     """Guess bounds of a cube, or not."""
     # check for bounds just in case
@@ -266,3 +267,310 @@ def wrapper(data: DataType, *args: Any, **kwargs: Any) -> DataType:
         return result
 
     return wrapper
+
+
+def _groupby(iterable, keyfunc):
+    """Group iterable by key function.
+
+    The items are grouped by the value that is returned by the `keyfunc`
+
+    Parameters
+    ----------
+    iterable : list, tuple or iterable
+        List of items to group
+    keyfunc : callable
+        Used to determine the group of each item. These become the keys
+        of the returned dictionary
+
+    Returns
+    -------
+    dict
+        Returns a dictionary with the grouped values.
+    """
+    grouped = defaultdict(set)
+    for item in iterable:
+        key = keyfunc(item)
+        grouped[key].add(item)
+
+    return grouped
+
+
+def _group_products(products, by_key):
+    """Group products by the given list of attributes."""
+    def grouper(product):
+        return product.group(by_key)
+
+    grouped = _groupby(products, keyfunc=grouper)
+    return grouped.items()
+
+
+def get_array_module(*args):
+    """Return the best matching array module.
+
+    If at least one of the arguments is a :class:`dask.array.Array` object,
+    the :mod:`dask.array` module is returned. In all other cases the
+    :mod:`numpy` module is returned.
+    """
+    for arg in args:
+        if isinstance(arg, da.Array):
+            return da
+    return np
+
+
+def broadcast_to_shape(array, shape, dim_map, chunks=None):
+    """Copy of `iris.util.broadcast_to_shape` that allows specifying chunks."""
+    if isinstance(array, da.Array):
+        if chunks is not None:
+            chunks = list(chunks)
+            for src_idx, tgt_idx in enumerate(dim_map):
+                # Only use the specified chunks along new dimensions or on
+                # dimensions that have size 1 in the source array.
+                if array.shape[src_idx] != 1:
+                    chunks[tgt_idx] = array.chunks[src_idx]
+        broadcast = partial(da.broadcast_to, shape=shape, chunks=chunks)
+    else:
+        broadcast = partial(np.broadcast_to, shape=shape)
+
+    n_orig_dims = len(array.shape)
+    n_new_dims = len(shape) - n_orig_dims
+    array = array.reshape(array.shape + (1,) * n_new_dims)
+
+    # Get dims in required order.
+    array = np.moveaxis(array, range(n_orig_dims), dim_map)
+    new_array = broadcast(array)
+
+    if np.ma.isMA(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = np.ma.getmask(array)
+        if mask is np.ma.nomask:
+            new_mask = np.ma.nomask
+        else:
+            new_mask = broadcast(mask)
+        new_array = np.ma.array(new_array, mask=new_mask)
+
+    elif _is_lazy_masked_data(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = da.ma.getmaskarray(array)
+        new_mask = broadcast(mask)
+        new_array = da.ma.masked_array(new_array, new_mask)
+
+    return new_array
+
+
+def _is_lazy_masked_data(array):
+    """Similar to `iris._lazy_data.is_lazy_masked_data`."""
+    return isinstance(array, da.Array) and isinstance(
+        da.utils.meta_from_array(array), np.ma.MaskedArray)
+
+
+def get_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> np.ndarray | da.Array:
+    """Calculate suitable weights for given coordinates."""
+    npx = get_array_module(cube.core_data())
+    weights = npx.ones_like(cube.core_data())
+
+    # Time weights: lengths of time interval
+    if 'time' in coords:
+        weights *= broadcast_to_shape(
+            npx.array(get_time_weights(cube)),
+            cube.shape,
+            cube.coord_dims('time'),
+        )
+
+    # Latitude weights: cell areas
+    if 'latitude' in coords:
+        cube = cube.copy()  # avoid overwriting input cube
+        if (
+                not cube.cell_measures('cell_area') and
+                not cube.coords('longitude')
+        ):
+            raise CoordinateNotFoundError(
+                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
+                f"coordinate to calculate cell area weights for weighted "
+                f"distance metric over coordinates {coords} (alternatively, "
+                f"a `cell_area` can be given to the cube as supplementary "
+                f"variable)"
+            )
+        try_adding_calculated_cell_area(cube)
+        weights *= broadcast_to_shape(
+            cube.cell_measure('cell_area').core_data(),
+            cube.shape,
+            cube.cell_measure_dims('cell_area'),
+        )
+
+    return weights
+
+
+def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array:
+    """Compute the weighting of the time axis.
+
+    Parameters
+    ----------
+    cube:
+        Input cube.
+
+    Returns
+    -------
+    np.ndarray or da.Array
+        Array of time weights for averaging. Returns a
+        :class:`dask.array.Array` if the input cube has lazy data; a
+        :class:`numpy.ndarray` otherwise.
+
+    """
+    time = cube.coord('time')
+    coord_dims = cube.coord_dims('time')
+
+    # Multidimensional time coordinates are not supported: In this case,
+    # weights cannot be simply calculated as difference between the bounds
+    if len(coord_dims) > 1:
+        raise ValueError(
+            f"Weighted statistical operations are not supported for "
+            f"{len(coord_dims):d}D time coordinates, expected 0D or 1D"
+        )
+
+    # Extract 1D time weights (= lengths of time intervals)
+    time_weights = time.lazy_bounds()[:, 1] - time.lazy_bounds()[:, 0]
+    if not cube.has_lazy_data():
+        time_weights = time_weights.compute()
+    return time_weights
+
+
+def try_adding_calculated_cell_area(cube: Cube) -> None:
+    """Try to add calculated cell measure 'cell_area' to cube (in-place)."""
+    if cube.cell_measures('cell_area'):
+        return
+
+    logger.debug(
+        "Found no cell measure 'cell_area' in cube %s. Check availability of "
+        "supplementary variables",
+        cube.summary(shorten=True),
+    )
+    logger.debug("Attempting to calculate grid cell area")
+
+    regular_grid = all([
+        cube.coord('latitude').points.ndim == 1,
+        cube.coord('longitude').points.ndim == 1,
+        cube.coord_dims('latitude') != cube.coord_dims('longitude'),
+    ])
+    rotated_pole_grid = all([
+        cube.coord('latitude').points.ndim == 2,
+        cube.coord('longitude').points.ndim == 2,
+        cube.coords('grid_latitude'),
+        cube.coords('grid_longitude'),
+    ])
+
+    # For regular grids, calculate grid cell areas with iris function
+    if regular_grid:
+        cube = guess_bounds(cube, ['latitude', 'longitude'])
+        logger.debug("Calculating grid cell areas for regular grid")
+        cell_areas = _compute_area_weights(cube)
+
+    # For rotated pole grids, use grid_latitude and grid_longitude to calculate
+    # grid cell areas
+    elif rotated_pole_grid:
+        cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude'])
+        cube_tmp = cube.copy()
+        cube_tmp.remove_coord('latitude')
+        cube_tmp.coord('grid_latitude').rename('latitude')
+        cube_tmp.remove_coord('longitude')
+        cube_tmp.coord('grid_longitude').rename('longitude')
+        logger.debug("Calculating grid cell areas for rotated pole grid")
+        cell_areas = _compute_area_weights(cube_tmp)
+
+    # For all other cases, grid cell areas cannot be calculated
+    else:
+        logger.error(
+            "Supplementary variables are needed to calculate grid cell "
+            "areas for irregular or unstructured grid of cube %s",
+            cube.summary(shorten=True),
+        )
+        raise CoordinateMultiDimError(cube.coord('latitude'))
+
+    # Add new cell measure
+    cell_measure = CellMeasure(
+        cell_areas, standard_name='cell_area', units='m2', measure='area',
+    )
+    cube.add_cell_measure(cell_measure, np.arange(cube.ndim))
+
+
+def _compute_area_weights(cube):
+    """Compute area weights."""
+    with warnings.catch_warnings(record=True) as caught_warnings:
+        warnings.filterwarnings(
+            'always',
+            message="Using DEFAULT_SPHERICAL_EARTH_RADIUS.",
+            category=UserWarning,
+            module='iris.analysis.cartography',
+        )
+        # TODO: replace the following line with
+        # weights = iris.analysis.cartography.area_weights(
+        #     cube, compute=not cube.has_lazy_data()
+        # )
+        # once https://github.com/SciTools/iris/pull/5658 is available
+        weights = _get_area_weights(cube)
+
+        for warning in caught_warnings:
+            logger.debug(
+                "%s while computing area weights of the following cube:\n%s",
+                warning.message, cube)
+    return weights
+
+
+def _get_area_weights(cube: Cube) -> np.ndarray | da.Array:
+    """Get area weights.
+
+    For non-lazy data, simply use the according iris function. For lazy data,
+    calculate area weights for a single lat-lon slice and broadcast it to the
+    correct shape.
+
+    Note
+    ----
+    This is a temporary workaround to get lazy area weights. Can be removed
+    once https://github.com/SciTools/iris/pull/5658 is available.
+
+    """
+    if not cube.has_lazy_data():
+        return iris.analysis.cartography.area_weights(cube)
+
+    lat_lon_dims = sorted(
+        tuple(set(cube.coord_dims('latitude') + cube.coord_dims('longitude')))
+    )
+    lat_lon_slice = next(cube.slices(['latitude', 'longitude'], ordered=False))
+    weights_2d = iris.analysis.cartography.area_weights(lat_lon_slice)
+    weights = broadcast_to_shape(
+        da.array(weights_2d),
+        cube.shape,
+        lat_lon_dims,
+        chunks=cube.lazy_data().chunks,
+    )
+    return weights
+
+
+def get_all_coords(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None,
+) -> Iterable[Coord] | Iterable[str]:
+    """Get all desired coordinates in a cube."""
+    if coords is None:
+        coords = [c.name() for c in cube.dim_coords]
+        if len(coords) != cube.ndim:
+            raise ValueError(
+                f"If coords=None is specified, the cube "
+                f"{cube.summary(shorten=True)} must not have unnamed "
+                f"dimensions"
+            )
+    return coords
+
+
+def get_all_coord_dims(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[int, ...]:
+    """Get sorted list of all coordinate dimensions from coordinates."""
+    all_coord_dims = []
+    for coord in coords:
+        all_coord_dims.extend(cube.coord_dims(coord))
+    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
+    return tuple(sorted_all_coord_dims)
diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py
index ea3aec8619..2bc8adcbce 100644
--- a/esmvalcore/preprocessor/_time.py
+++ b/esmvalcore/preprocessor/_time.py
@@ -32,6 +32,7 @@
 from esmvalcore.iris_helpers import date2num, rechunk_cube
 from esmvalcore.preprocessor._shared import (
     get_iris_aggregator,
+    get_time_weights,
     preserve_float_dtype,
     update_weights_kwargs,
 )
@@ -383,40 +384,6 @@ def extract_month(cube: Cube, month: int) -> Cube:
     return result
 
 
-def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array:
-    """Compute the weighting of the time axis.
-
-    Parameters
-    ----------
-    cube:
-        Input cube.
-
-    Returns
-    -------
-    np.ndarray or da.Array
-        Array of time weights for averaging. Returns a
-        :class:`dask.array.Array` if the input cube has lazy data; a
-        :class:`numpy.ndarray` otherwise.
-
-    """
-    time = cube.coord('time')
-    coord_dims = cube.coord_dims('time')
-
-    # Multidimensional time coordinates are not supported: In this case,
-    # weights cannot be simply calculated as difference between the bounds
-    if len(coord_dims) > 1:
-        raise ValueError(
-            f"Weighted statistical operations are not supported for "
-            f"{len(coord_dims):d}D time coordinates, expected 0D or 1D"
-        )
-
-    # Extract 1D time weights (= lengths of time intervals)
-    time_weights = time.lazy_bounds()[:, 1] - time.lazy_bounds()[:, 0]
-    if not cube.has_lazy_data():
-        time_weights = time_weights.compute()
-    return time_weights
-
-
 def _aggregate_time_fx(result_cube, source_cube):
     time_dim = set(source_cube.coord_dims(source_cube.coord('time')))
     if source_cube.cell_measures():
diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py
index 239fa99758..3d04f23500 100644
--- a/esmvalcore/preprocessor/_volume.py
+++ b/esmvalcore/preprocessor/_volume.py
@@ -15,11 +15,11 @@
 from iris.coords import AuxCoord, CellMeasure
 from iris.cube import Cube
 
-from ._area import _try_adding_calculated_cell_area
 from ._shared import (
     get_iris_aggregator,
     get_normalized_cube,
     preserve_float_dtype,
+    try_adding_calculated_cell_area,
     update_weights_kwargs,
 )
 from ._supplementary_vars import register_supplementaries
@@ -155,7 +155,7 @@ def calculate_volume(cube: Cube) -> da.core.Array:
 
     # Get or calculate the horizontal areas of the cube
     has_cell_measure = bool(cube.cell_measures('cell_area'))
-    _try_adding_calculated_cell_area(cube)
+    try_adding_calculated_cell_area(cube)
     area = cube.cell_measure('cell_area').copy()
     area_dim = cube.cell_measure_dims(area)
 
diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py
index a7374a5e8b..e7c3f998c5 100644
--- a/tests/unit/preprocessor/_area/test_area.py
+++ b/tests/unit/preprocessor/_area/test_area.py
@@ -2,7 +2,6 @@
 import unittest
 from pathlib import Path
 
-import dask.array as da
 import fiona
 import iris
 import numpy as np
@@ -18,7 +17,6 @@
 import tests
 from esmvalcore.preprocessor._area import (
     _crop_cube,
-    _get_area_weights,
     _get_requested_geometries,
     _update_shapefile_path,
     area_statistics,
@@ -1447,22 +1445,5 @@ def test_time_dependent_volcello():
     assert cube.shape == cube.cell_measure('ocean_volume').shape
 
 
-@pytest.mark.parametrize('lazy', [True, False])
-def test_get_area_weights(lazy):
-    """Test _get_area_weights."""
-    cube = _create_sample_full_cube()
-    if lazy:
-        cube.data = cube.lazy_data()
-    weights = _get_area_weights(cube)
-    if lazy:
-        assert isinstance(weights, da.Array)
-        assert weights.chunks == cube.lazy_data().chunks
-    else:
-        assert isinstance(weights, np.ndarray)
-    np.testing.assert_allclose(
-        weights, iris.analysis.cartography.area_weights(cube)
-    )
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index af56d593e0..e5d6a871e1 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -19,13 +19,7 @@
 from iris.cube import Cube
 from numpy.testing import assert_array_equal
 
-from esmvalcore.preprocessor import PreprocessorFile
-from esmvalcore.preprocessor._other import (
-    _group_products,
-    clip,
-    get_array_module,
-    histogram,
-)
+from esmvalcore.preprocessor._other import clip, histogram
 from tests.unit.preprocessor._compare_with_refs.test_compare_with_refs import (
     get_3d_cube,
 )
@@ -63,47 +57,6 @@ def test_clip(self):
             clip(cube, 10, 8)
 
 
-def test_group_products_string_list():
-    products = [
-        PreprocessorFile(
-            filename='A_B.nc',
-            attributes={
-                'project': 'A',
-                'dataset': 'B',
-            },
-        ),
-        PreprocessorFile(
-            filename='A_C.nc',
-            attributes={
-                'project': 'A',
-                'dataset': 'C',
-            }
-        ),
-    ]
-    grouped_by_string = _group_products(products, 'project')
-    grouped_by_list = _group_products(products, ['project'])
-
-    assert grouped_by_list == grouped_by_string
-
-
-def test_get_array_module_da():
-
-    npx = get_array_module(da.array([1, 2]))
-    assert npx is da
-
-
-def test_get_array_module_np():
-
-    npx = get_array_module(np.array([1, 2]))
-    assert npx is np
-
-
-def test_get_array_module_mixed():
-
-    npx = get_array_module(da.array([1]), np.array([1]))
-    assert npx is da
-
-
 @pytest.fixture
 def cube():
     """Regular cube."""
diff --git a/tests/unit/preprocessor/test_shared.py b/tests/unit/preprocessor/test_shared.py
index f931f449a9..79324d5e8a 100644
--- a/tests/unit/preprocessor/test_shared.py
+++ b/tests/unit/preprocessor/test_shared.py
@@ -6,11 +6,16 @@
 import iris.analysis
 import numpy as np
 import pytest
+from cf_units import Unit
 from iris.cube import Cube
 
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
+from esmvalcore.preprocessor import PreprocessorFile
 from esmvalcore.preprocessor._shared import (
+    _get_area_weights,
+    _group_products,
     aggregator_accept_weights,
+    get_array_module,
     get_iris_aggregator,
     preserve_float_dtype,
 )
@@ -236,3 +241,92 @@ def test_preserve_float_dtype(data, dtype):
     assert _dummy_func.__name__ == '_dummy_func'
     signature = inspect.signature(_dummy_func)
     assert list(signature.parameters) == ['obj', 'arg', 'kwarg']
+
+
+def test_get_array_module_da():
+    npx = get_array_module(da.array([1, 2]))
+    assert npx is da
+
+
+def test_get_array_module_np():
+    npx = get_array_module(np.array([1, 2]))
+    assert npx is np
+
+
+def test_get_array_module_mixed():
+    npx = get_array_module(da.array([1]), np.array([1]))
+    assert npx is da
+
+
+def _create_sample_full_cube():
+    cube = Cube(np.zeros((4, 180, 360)), var_name='co2', units='J')
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.array([10., 40., 70., 110.]),
+            standard_name='time',
+            units=Unit('days since 1950-01-01 00:00:00', calendar='gregorian'),
+        ),
+        0,
+    )
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.arange(-90., 90., 1.),
+            standard_name='latitude',
+            units='degrees',
+        ),
+        1,
+    )
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.arange(0., 360., 1.),
+            standard_name='longitude',
+            units='degrees',
+        ),
+        2,
+    )
+
+    cube.coord("time").guess_bounds()
+    cube.coord("longitude").guess_bounds()
+    cube.coord("latitude").guess_bounds()
+
+    return cube
+
+
+@pytest.mark.parametrize('lazy', [True, False])
+def test_get_area_weights(lazy):
+    """Test _get_area_weights."""
+    cube = _create_sample_full_cube()
+    if lazy:
+        cube.data = cube.lazy_data()
+    weights = _get_area_weights(cube)
+    if lazy:
+        assert isinstance(weights, da.Array)
+        assert weights.chunks == cube.lazy_data().chunks
+    else:
+        assert isinstance(weights, np.ndarray)
+    np.testing.assert_allclose(
+        weights, iris.analysis.cartography.area_weights(cube)
+    )
+
+
+def test_group_products_string_list():
+    products = [
+        PreprocessorFile(
+            filename='A_B.nc',
+            attributes={
+                'project': 'A',
+                'dataset': 'B',
+            },
+        ),
+        PreprocessorFile(
+            filename='A_C.nc',
+            attributes={
+                'project': 'A',
+                'dataset': 'C',
+            }
+        ),
+    ]
+    grouped_by_string = _group_products(products, 'project')
+    grouped_by_list = _group_products(products, ['project'])
+
+    assert grouped_by_list == grouped_by_string

From 14c7fb45a5fdb3de421c10040fe17fbbe4db84c0 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Fri, 26 Apr 2024 17:13:07 +0200
Subject: [PATCH 48/51] 100% coverage

---
 tests/unit/preprocessor/test_shared.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/unit/preprocessor/test_shared.py b/tests/unit/preprocessor/test_shared.py
index 79324d5e8a..4f70b2b419 100644
--- a/tests/unit/preprocessor/test_shared.py
+++ b/tests/unit/preprocessor/test_shared.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 from cf_units import Unit
+from iris.coords import AuxCoord
 from iris.cube import Cube
 
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
@@ -18,6 +19,7 @@
     get_array_module,
     get_iris_aggregator,
     preserve_float_dtype,
+    try_adding_calculated_cell_area,
 )
 
 
@@ -330,3 +332,18 @@ def test_group_products_string_list():
     grouped_by_list = _group_products(products, ['project'])
 
     assert grouped_by_list == grouped_by_string
+
+
+def test_try_adding_calculated_cell_area():
+    """Test ``try_adding_calculated_cell_area``."""
+    cube = _create_sample_full_cube()
+    cube.coord('latitude').rename('grid_latitude')
+    cube.coord('longitude').rename('grid_longitude')
+    lat = AuxCoord(np.zeros((180, 360)), standard_name='latitude')
+    lon = AuxCoord(np.zeros((180, 360)), standard_name='longitude')
+    cube.add_aux_coord(lat, (1, 2))
+    cube.add_aux_coord(lon, (1, 2))
+
+    try_adding_calculated_cell_area(cube)
+
+    assert cube.cell_measures('cell_area')

From 37f06f88d3a7fa58ecef62b1fd4b694069673377 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <manuel.schlund@dlr.de>
Date: Mon, 29 Apr 2024 17:04:36 +0200
Subject: [PATCH 49/51] Proper dtype handling

---
 esmvalcore/preprocessor/_compare_with_refs.py | 4 +++-
 esmvalcore/preprocessor/_other.py             | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
index df7708306e..1634fc1752 100644
--- a/esmvalcore/preprocessor/_compare_with_refs.py
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -24,6 +24,7 @@
     get_all_coords,
     get_array_module,
     get_weights,
+    preserve_float_dtype,
 )
 
 if TYPE_CHECKING:
@@ -384,6 +385,7 @@ def distance_metric(
     return output_products
 
 
+@preserve_float_dtype
 def _calculate_metric(
     cube: Cube,
     reference: Cube,
@@ -429,7 +431,7 @@ def _calculate_metric(
     # Get result cube with correct dimensional metadata by using dummy
     # operation (max)
     res_cube = cube.collapsed(coords, iris.analysis.MAX)
-    res_cube.data = res_data.astype(cube.dtype)
+    res_cube.data = res_data
     res_cube.metadata = res_metadata
     res_cube.cell_methods = [*cube.cell_methods, CellMethod(metric, coords)]
 
diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index fa5a83033d..3d047a4e24 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -19,6 +19,7 @@
     get_all_coords,
     get_array_module,
     get_weights,
+    preserve_float_dtype,
 )
 
 logger = logging.getLogger(__name__)
@@ -54,6 +55,7 @@ def clip(cube, minimum=None, maximum=None):
     return cube
 
 
+@preserve_float_dtype
 def histogram(
     cube: Cube,
     coords: Iterable[Coord] | Iterable[str] | None = None,
@@ -169,7 +171,6 @@ def histogram(
         bin_range=bin_range,
         normalization=normalization,
     )
-    hist_data = hist_data.astype(cube.dtype)
 
     # Get final cube
     hist_cube = _get_histogram_cube(

From 3c3dad2b4461f526ef9173c40380da315e177b72 Mon Sep 17 00:00:00 2001
From: Manuel Schlund <32543114+schlunma@users.noreply.github.com>
Date: Fri, 3 May 2024 13:46:59 +0200
Subject: [PATCH 50/51] Update doc/recipe/preprocessor.rst

Co-authored-by: Axel Lauer <axel.lauer@dlr.de>
---
 doc/recipe/preprocessor.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 9d01faf69d..cb28841442 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2851,7 +2851,6 @@ recipe:
   `bin_range`.
   If `bins` is a sequence, it defines a monotonically increasing array of bin
   edges, including the rightmost edge, allowing for non-uniform bin widths.
-  also calculate the distance of the reference dataset with itself.
 * ``bin_range`` (:obj:`tuple` of :obj:`float` or ``None``, default: ``None``):
   The lower and upper range of the bins.
   If ``None``, `bin_range` is simply (``cube.core_data().min(),

From cbb668045640f833a2c3f158b7d38e827115e82c Mon Sep 17 00:00:00 2001
From: Manuel Schlund <32543114+schlunma@users.noreply.github.com>
Date: Fri, 3 May 2024 13:48:04 +0200
Subject: [PATCH 51/51] Apply suggestions from code review

Co-authored-by: Axel Lauer <axel.lauer@dlr.de>
---
 doc/recipe/preprocessor.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index cb28841442..74229e08eb 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -2663,7 +2663,7 @@ recipe:
 
   * ``'emd'``: `Unweighted Earth mover's distance`_ (EMD).
     The EMD is also known as first Wasserstein metric `W`\ :sub:`1`, which is a
-    metric that measures distances between two probability distributions.
+    metric that measures distance between two probability distributions.
     For this, discrete probability distributions of the input data are created
     through binning, which are then used as input for the Wasserstein metric.
     The metric is also known as `Earth mover's distance` since, intuitively, it
@@ -2683,7 +2683,7 @@ recipe:
   * ``'weighted_emd'``: `Weighted Earth mover's distance`_.
     Similar to the unweighted EMD (see above), but here weights are considered
     when calculating the probability distributions (i.e., instead of 1, each
-    element contributes a given weight in the bin count; see also `weights`
+    element provides a weight in the bin count; see also `weights`
     argument of :func:`numpy.histogram`).
 
   Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
@@ -2871,7 +2871,7 @@ recipe:
   coordinates will trigger weighting: `time` (will use lengths of time
   intervals as weights) and `latitude` (will use cell area weights).
   Time weights are always calculated from the input data.
-  Area weights can be given as supplementary variables to the recipe
+  Area weights can be given as supplementary variables in the recipe
   (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
   calculated from the input data (this only works for regular grids).
   By default, **NO** supplementary variables will be used; they need to be
@@ -2882,7 +2882,7 @@ recipe:
   If ``'integral'``, the result is the value of the probability `density`
   function at the bin, normalized such that the integral over the range is 1.
   If ``'sum'``, the result is the value of the probability `mass` function at
-  the bin, normalized such that the sum over the range is 1.
+  the bin, normalized such that the sum over the whole range is 1.
   Normalization will be applied across `coords`, not the entire cube.
 
 Example: