diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
index 694c75f57a..74229e08eb 100644
--- a/doc/recipe/preprocessor.rst
+++ b/doc/recipe/preprocessor.rst
@@ -27,7 +27,7 @@ roughly following the default order in which preprocessor functions are applied:
 * :ref:`Detrend`
 * :ref:`Rolling window statistics`
 * :ref:`Unit conversion`
-* :ref:`Bias`
+* :ref:`comparison_with_ref`
 * :ref:`Other`
 
 See :ref:`preprocessor_functions` for implementation details and the exact default order.
@@ -91,13 +91,13 @@ supported too if proper keyword arguments are specified:
 .. _supported_stat_operator:
 
 ============================== ================================================= =====================================
-`operator`                     Corresponding :class:`~iris.analysis.Aggregator`  Weighted? [1]_
+`operator`                     Corresponding :class:`~iris.analysis.Aggregator`  Weighted? [#f1]_
 ============================== ================================================= =====================================
 ``gmean``                      :const:`iris.analysis.GMEAN`                      no
 ``hmean``                      :const:`iris.analysis.HMEAN`                      no
 ``max``                        :const:`iris.analysis.MAX`                        no
 ``mean``                       :const:`iris.analysis.MEAN`                       yes
-``median``                     :const:`iris.analysis.MEDIAN` [2]_                no
+``median``                     :const:`iris.analysis.MEDIAN` [#f2]_                no
 ``min``                        :const:`iris.analysis.MIN`                        no
 ``peak``                       :const:`iris.analysis.PEAK`                       no
 ``percentile``                 :const:`iris.analysis.PERCENTILE`                 no
@@ -108,7 +108,7 @@ supported too if proper keyword arguments are specified:
 ``wpercentile``                :const:`iris.analysis.WPERCENTILE`                yes
 ============================== ================================================= =====================================
 
-.. [1] The following preprocessor support weighted statistics by default:
+.. [#f1] The following preprocessor support weighted statistics by default:
     :func:`~esmvalcore.preprocessor.area_statistics`: weighted by grid cell
     areas (see also :ref:`preprocessors_using_supplementary_variables`);
     :func:`~esmvalcore.preprocessor.climate_statistics`: weighted by lengths of
@@ -117,7 +117,7 @@ supported too if proper keyword arguments are specified:
     :ref:`preprocessors_using_supplementary_variables`);
     :func:`~esmvalcore.preprocessor.axis_statistics`: weighted by
     corresponding coordinate bounds.
-.. [2] :const:`iris.analysis.MEDIAN` is not lazy, but much faster than
+.. [#f2] :const:`iris.analysis.MEDIAN` is not lazy, but much faster than
     :const:`iris.analysis.PERCENTILE`. For a lazy median, use ``percentile``
     with the keyword argument ``percent: 50``.
 
@@ -301,23 +301,29 @@ or
 to perform their computations.
 In ESMValCore we call both types of variables "supplementary variables".
 
-============================================================== ============================== =====================================
-Preprocessor                                                   Variable short name            Variable standard name
-============================================================== ============================== =====================================
-:ref:`area_statistics<area_statistics>`                        ``areacella``, ``areacello``   cell_area
-:ref:`mask_landsea<land/sea/ice masking>`                      ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
-:ref:`mask_landseaice<ice masking>`                            ``sftgif``                     land_ice_area_fraction
-:ref:`volume_statistics<volume_statistics>`                    ``volcello``, ``areacello``    ocean_volume, cell_area
-:ref:`weighting_landsea_fraction<land/sea fraction weighting>` ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
-============================================================== ============================== =====================================
+===================================================================== ============================== =====================================
+Preprocessor                                                          Variable short name            Variable standard name
+===================================================================== ============================== =====================================
+:ref:`area_statistics<area_statistics>` [#f4]_                        ``areacella``, ``areacello``   cell_area
+:ref:`mask_landsea<land/sea/ice masking>` [#f4]_                      ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`mask_landseaice<ice masking>` [#f3]_                            ``sftgif``                     land_ice_area_fraction
+:ref:`volume_statistics<volume_statistics>` [#f4]_                    ``volcello``, ``areacello``    ocean_volume, cell_area
+:ref:`weighting_landsea_fraction<land/sea fraction weighting>` [#f3]_ ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
+:ref:`distance_metric<distance_metric>` [#f5]_                        ``areacella``, ``areacello``   cell_area
+:ref:`histogram<histogram>` [#f5]_                                    ``areacella``, ``areacello``   cell_area
+===================================================================== ============================== =====================================
+
+.. [#f3] This preprocessor requires at least one of the mentioned supplementary
+    variables. If none is defined in the recipe, automatically look for them.
+    If none is found, an error will be raised.
+.. [#f4] This preprocessor prefers at least one of the mentioned supplementary
+    variables. If none is defined in the recipe, automatically look for them.
+    If none is found, a warning will be raised (but no error).
+.. [#f5] This preprocessor optionally takes one of the mentioned supplementary
+    variables. If none is defined in the recipe, none is added.
 
 Only one of the listed variables is required. Supplementary variables can be
 defined in the recipe as described in :ref:`supplementary_variables`.
-In some cases, preprocessor functions may work without supplementary variables,
-this is documented case by case in the preprocessor function definition.
-If a preprocessor function requiring supplementary variables is used
-without specifying these in the recipe, these will be automatically
-added.
 If the automatic selection does not give the desired result, specify the
 supplementary variables in the recipe as described in
 :ref:`supplementary_variables`.
@@ -2510,15 +2516,17 @@ the time units in the variable.
 See also :func:`esmvalcore.preprocessor.accumulate_coordinate.`
 
 
-.. _bias:
+.. _comparison_with_ref:
 
-Bias
-====
+Comparison with reference dataset
+=================================
 
-The bias module contains the following preprocessor functions:
+This module contains the following preprocessor functions:
 
 * ``bias``: Calculate absolute or relative biases with respect to a reference
-  dataset
+  dataset.
+* ``distance_metric``: Calculate absolute or relative biases with respect to a
+  reference dataset.
 
 ``bias``
 --------
@@ -2583,6 +2591,178 @@ Example:
 
 See also :func:`esmvalcore.preprocessor.bias`.
 
+.. _distance_metric:
+
+``distance_metric``
+-------------------
+
+This function calculates a distance metric with respect to a given reference
+dataset.
+For this, exactly one input dataset needs to be declared as
+``reference_for_metric: true`` in the recipe, e.g.,
+
+.. code-block:: yaml
+
+  datasets:
+    - {dataset: CanESM5, project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: CESM2,   project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: MIROC6,  project: CMIP6, ensemble: r1i1p1f1, grid: gn}
+    - {dataset: ERA-Interim, project: OBS6, tier: 3, type: reanaly, version: 1,
+       reference_for_metric: true}
+
+In the example above, ERA-Interim is used as reference dataset for the distance
+metric calculation.
+All datasets need to have the same shape and coordinates.
+To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
+:func:`esmvalcore.preprocessor.regrid_time` might be helpful.
+
+The ``distance_metric`` preprocessor supports the following arguments in the
+recipe:
+
+.. _list_of_distance_metrics:
+
+* ``metric`` (:obj:`str`): Distance metric that is calculated.
+  Must be one of
+
+  * ``'rmse'``: `Unweighted root mean square error`_.
+
+  .. math::
+
+    RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N \left( x_i - r_i \right)^2}
+
+  * ``'weighted_rmse'``: `Weighted root mean square error`_.
+
+  .. math::
+
+    WRMSE = \sqrt{\sum_{i=1}^N w_i \left( x_i - r_i \right)^2}
+
+  * ``'pearsonr'``: `Unweighted Pearson correlation coefficient`_.
+
+  .. math::
+
+    r = \frac{
+      \sum_{i=1}^N
+      \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+    }{
+      \sqrt{\sum_{i=1}^N \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N \left( r_i - \bar{r} \right)^2}
+    }
+
+  * ``'weighted_pearsonr'``: `Weighted Pearson correlation coefficient`_.
+
+  .. math::
+
+    r = \frac{
+      \sum_{i=1}^N
+      w_i \left( x_i - \bar{x} \right) \left( r_i - \bar{r} \right)
+    }{
+      \sqrt{\sum_{i=1}^N w_i \left( x_i - \bar{x} \right)^2}
+      \sqrt{\sum_{i=1}^N w_i \left( r_i - \bar{r} \right)^2}
+    }
+
+
+  * ``'emd'``: `Unweighted Earth mover's distance`_ (EMD).
+    The EMD is also known as first Wasserstein metric `W`\ :sub:`1`, which is a
+    metric that measures distance between two probability distributions.
+    For this, discrete probability distributions of the input data are created
+    through binning, which are then used as input for the Wasserstein metric.
+    The metric is also known as `Earth mover's distance` since, intuitively, it
+    can be seen as the minimum "cost" of turning one pile of earth into another
+    one (pile of earth = probability distribution).
+    This is also known as `optimal transport` problem.
+    Formally, this can be described with a joint probability distribution (or
+    `optimal transport matrix`) γ (whose marginals are the input distributions)
+    that minimizes the "transportation cost":
+
+  .. math::
+
+    W_1 = \min_{\gamma \in \mathbb{R}^{n \times n}_{+}} \sum_{i,j}^{n}
+    \gamma_{ij} \lvert X_i - R_i \rvert \\
+    \textrm{with} ~~ \gamma 1 = p_X(X);~ \gamma^T 1 = p_R(R)
+
+  * ``'weighted_emd'``: `Weighted Earth mover's distance`_.
+    Similar to the unweighted EMD (see above), but here weights are considered
+    when calculating the probability distributions (i.e., instead of 1, each
+    element provides a weight in the bin count; see also `weights`
+    argument of :func:`numpy.histogram`).
+
+  Here, `x`\ :sub:`i` and `r`\ :sub:`i` are samples of a variable of interest
+  and a corresponding reference, respectively (a bar over a variable denotes
+  its arithmetic/weighted mean [the latter for weighted metrics]).
+  Capital letters (`X`\ :sub:`i` and `R`\ :sub:`i`) refer to bin centers of a
+  discrete probability distribution with values `p`\ :sub:`X`\ (`X`\ :sub:`i`)
+  or `p`\ :sub:`R`\ (`R`\ :sub:`i`) and a number of bins `n` (see the argument
+  ``n_bins`` below) that has been derived for the variables `x` and `r` through
+  binning.
+  `w`\ :sub:`i` are weights that sum to one (see note below) and `N` is the
+  total number of samples.
+
+  .. note::
+    Metrics starting with `weighted_` will calculate weighted distance metrics
+    if possible.
+    Currently, the following `coords` (or any combinations that include them)
+    will trigger weighting: `time` (will use lengths of time intervals as
+    weights) and `latitude` (will use cell area weights).
+    Time weights are always calculated from the input data.
+    Area weights can be given as supplementary variables to the recipe
+    (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+    calculated from the input data (this only works for regular grids).
+    By default, **NO** supplementary variables will be used; they need to be
+    explicitly requested in the recipe.
+* ``coords`` (:obj:`list` of :obj:`str`, default: ``None``): Coordinates over
+  which the distance metric is calculated.
+  If ``None``, calculate the metric over all coordinates, which results in a
+  scalar cube.
+* ``keep_reference_dataset`` (:obj:`bool`, default: ``True``): If ``True``,
+  also calculate the distance of the reference dataset with itself.
+  If ``False``, drop the reference dataset.
+* ``exclude`` (:obj:`list` of :obj:`str`): Exclude specific datasets from
+  this preprocessor.
+  Note that this option is only available in the recipe, not when using
+  :func:`esmvalcore.preprocessor.distance_metric` directly (e.g., in another
+  python script).
+  If the reference dataset has been excluded, an error is raised.
+* Other parameters are directly used for the metric calculation.
+  The following keyword arguments are supported:
+
+  * `rmse` and `weighted_rmse`: none.
+  * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all keyword
+    arguments are passed to :func:`iris.analysis.stats.pearsonr`, see that link
+    for more details on these arguments).
+    Note: in contrast to :func:`~iris.analysis.stats.pearsonr`,
+    ``common_mask=True`` by default.
+  * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
+    discrete probability distribution of data before calculating the EMD
+    (:obj:`int`, default: 100).
+
+Example:
+
+.. code-block:: yaml
+
+    preprocessors:
+      preproc_pearsonr:
+        distance_metric:
+          metric: weighted_pearsonr
+          coords: [latitude, longitude]
+          keep_reference_dataset: true
+          exclude: [CanESM2]
+          common_mask: true
+
+See also :func:`esmvalcore.preprocessor.distance_metric`.
+
+.. _Unweighted root mean square error: https://en.wikipedia.org/wiki/
+  Root-mean-square_deviation
+.. _Weighted root mean square error: https://en.wikipedia.org/wiki/
+  Root-mean-square_deviation
+.. _Unweighted Pearson correlation coefficient: https://en.wikipedia.org/
+  wiki/Pearson_correlation_coefficient
+.. _Weighted Pearson correlation coefficient: https://en.wikipedia.org/
+  wiki/Pearson_correlation_coefficient
+.. _Unweighted Earth mover's distance: https://pythonot.github.io/
+  quickstart.html#computing-wasserstein-distance
+.. _Weighted Earth mover's distance: https://pythonot.github.io/
+  quickstart.html#computing-wasserstein-distance
+
 
 .. _Memory use:
 
@@ -2649,3 +2829,73 @@ The example below shows how to set all values below zero to zero.
       clip:
         minimum: 0
         maximum: null
+
+.. _histogram:
+
+``histogram``
+-------------------
+
+This function calculates histograms.
+
+The ``histogram`` preprocessor supports the following arguments in the
+recipe:
+
+* ``coords`` (:obj:`list` of :obj:`str`, default: ``None``): Coordinates over
+  which the histogram is calculated.
+  If ``None``, calculate the histogram over all coordinates.
+  The shape of the output cube will be `(x1, x2, ..., n_bins)`, where `xi` are
+  the dimensions of the input cube not appearing in `coords` and `n_bins` is
+  the number of bins.
+* ``bins`` (:obj:`int` or sequence of :obj:`float`, default: 10): If `bins` is
+  an :obj:`int`, it defines the number of equal-width bins in the given
+  `bin_range`.
+  If `bins` is a sequence, it defines a monotonically increasing array of bin
+  edges, including the rightmost edge, allowing for non-uniform bin widths.
+* ``bin_range`` (:obj:`tuple` of :obj:`float` or ``None``, default: ``None``):
+  The lower and upper range of the bins.
+  If ``None``, `bin_range` is simply (``cube.core_data().min(),
+  cube.core_data().max()``).
+  Values outside the range are ignored.
+  The first element of the range must be less than or equal to the second.
+  `bin_range` affects the automatic bin computation as well if `bins` is an
+  :obj:`int` (see description for `bins` above).
+* ``weights`` (array-like, :obj:`bool`, or ``None``, default: ``None``):
+  Weights for the histogram calculation.
+  Each value in the input data only contributes its associated weight towards
+  the bin count (instead of 1).
+  Weights are normalized before entering the calculation if `normalization` is
+  ``'integral'`` or ``'sum'``.
+  Can be an array of the same shape as the input data, ``False`` or ``None``
+  (no weighting), or ``True``.
+  In the latter case, weighting will depend on `coords`, and the following
+  coordinates will trigger weighting: `time` (will use lengths of time
+  intervals as weights) and `latitude` (will use cell area weights).
+  Time weights are always calculated from the input data.
+  Area weights can be given as supplementary variables in the recipe
+  (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+  calculated from the input data (this only works for regular grids).
+  By default, **NO** supplementary variables will be used; they need to be
+  explicitly requested in the recipe.
+* ``normalization`` (``None``, ``'sum'``, or ``'integral'``, default:
+  ``None``): If ``None``, the result will contain the number of samples in each
+  bin.
+  If ``'integral'``, the result is the value of the probability `density`
+  function at the bin, normalized such that the integral over the range is 1.
+  If ``'sum'``, the result is the value of the probability `mass` function at
+  the bin, normalized such that the sum over the whole range is 1.
+  Normalization will be applied across `coords`, not the entire cube.
+
+Example:
+
+.. code-block:: yaml
+
+    preprocessors:
+      preproc_histogram:
+        histogram:
+          coords: [latitude, longitude]
+          bins: 12
+          bin_range: [100.0, 150.0]
+          weights: true
+          normalization: sum
+
+See also :func:`esmvalcore.preprocessor.histogram`.
diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
index abbfc7f307..940a578ae2 100644
--- a/esmvalcore/_recipe/check.py
+++ b/esmvalcore/_recipe/check.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import subprocess
+from functools import partial
 from pprint import pformat
 from shutil import which
 from typing import Any, Iterable
@@ -395,47 +396,86 @@ def differing_timeranges(timeranges, required_vars):
             "Set `timerange` to a common value.")
 
 
-def bias_type(settings: dict) -> None:
-    """Check that bias_type for bias preprocessor is valid."""
-    if 'bias' not in settings:
+def _check_literal(
+    settings: dict,
+    *,
+    step: str,
+    option: str,
+    allowed_values: tuple[str],
+) -> None:
+    """Check that an option for a preprocessor has a valid value."""
+    if step not in settings:
         return
-    valid_options = ('absolute', 'relative')
-    user_bias_type = settings['bias'].get('bias_type', 'absolute')
-    if user_bias_type not in valid_options:
+    user_value = settings[step].get(option, allowed_values[0])
+    if user_value not in allowed_values:
         raise RecipeError(
-            f"Expected one of {valid_options} for `bias_type`, got "
-            f"'{user_bias_type}'"
+            f"Expected one of {allowed_values} for `{option}`, got "
+            f"'{user_value}'"
         )
 
 
-def reference_for_bias_preproc(products):
-    """Check that exactly one reference dataset for bias preproc is given."""
-    step = 'bias'
+bias_type = partial(
+    _check_literal,
+    step='bias',
+    option='bias_type',
+    allowed_values=('absolute', 'relative'),
+)
+
+
+metric_type = partial(
+    _check_literal,
+    step='distance_metric',
+    option='metric',
+    allowed_values=(
+        'rmse',
+        'weighted_rmse',
+        'pearsonr',
+        'weighted_pearsonr',
+        'emd',
+        'weighted_emd',
+    ),
+)
+
+
+def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
+    """Check that exactly one reference dataset is given."""
     products = {p for p in products if step in p.settings}
     if not products:
         return
 
-    # Check that exactly one dataset contains the facet ``reference_for_bias:
-    # true``
+    # Check that exactly one dataset contains the specified facet
     reference_products = []
     for product in products:
-        if product.attributes.get('reference_for_bias', False):
+        if product.attributes.get(attr_name, False):
             reference_products.append(product)
     if len(reference_products) != 1:
         products_str = [p.filename for p in products]
         if not reference_products:
             ref_products_str = ". "
         else:
-            ref_products_str = [p.filename for p in reference_products]
-            ref_products_str = f":\n{pformat(ref_products_str)}.\n"
+            ref_products_str = (
+                f":\n{pformat([p.filename for p in reference_products])}.\n"
+            )
         raise RecipeError(
-            f"Expected exactly 1 dataset with 'reference_for_bias: true' in "
+            f"Expected exactly 1 dataset with '{attr_name}: true' in "
             f"products\n{pformat(products_str)},\nfound "
             f"{len(reference_products):d}{ref_products_str}Please also "
             f"ensure that the reference dataset is not excluded with the "
             f"'exclude' option")
 
 
+reference_for_bias_preproc = partial(
+    _check_ref_attributes, step='bias', attr_name='reference_for_bias'
+)
+
+
+reference_for_distance_metric_preproc = partial(
+    _check_ref_attributes,
+    step='distance_metric',
+    attr_name='reference_for_metric',
+)
+
+
 def statistics_preprocessors(settings: dict) -> None:
     """Check options of statistics preprocessors."""
     mm_stats = (
diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
index e0423fd49a..676e56cb52 100644
--- a/esmvalcore/_recipe/recipe.py
+++ b/esmvalcore/_recipe/recipe.py
@@ -37,13 +37,13 @@
 )
 from esmvalcore.preprocessor._area import _update_shapefile_path
 from esmvalcore.preprocessor._multimodel import _get_stat_identifier
-from esmvalcore.preprocessor._other import _group_products
 from esmvalcore.preprocessor._regrid import (
     _spec_to_latlonvals,
     get_cmor_levels,
     get_reference_levels,
     parse_cell_spec,
 )
+from esmvalcore.preprocessor._shared import _group_products
 
 from . import check
 from .from_datasets import datasets_to_recipe
@@ -555,6 +555,7 @@ def _get_preprocessor_products(
             f'{separator.join(sorted(missing_vars))}')
 
     check.reference_for_bias_preproc(products)
+    check.reference_for_distance_metric_preproc(products)
 
     _configure_multi_product_preprocessor(
         products=products,
@@ -656,6 +657,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
     check.statistics_preprocessors(settings)
     check.regridding_schemes(settings)
     check.bias_type(settings)
+    check.metric_type(settings)
 
 
 def _get_preprocessor_task(datasets, profiles, task_name):
diff --git a/esmvalcore/iris_helpers.py b/esmvalcore/iris_helpers.py
index 3e59932697..1191587546 100644
--- a/esmvalcore/iris_helpers.py
+++ b/esmvalcore/iris_helpers.py
@@ -236,7 +236,7 @@ def rechunk_cube(
         Input cube.
     complete_coords:
         (Names of) coordinates along which the output cubes should not be
-        chunked. The given coordinates must span exactly 1 dimension.
+        chunked.
     remaining_dims:
         Chunksize of the remaining dimensions.
 
@@ -248,17 +248,11 @@ def rechunk_cube(
     """
     cube = cube.copy()  # do not modify input cube
 
-    # Make sure that complete_coords span exactly 1 dimension
     complete_dims = []
     for coord in complete_coords:
         coord = cube.coord(coord)
-        dims = cube.coord_dims(coord)
-        if len(dims) != 1:
-            raise CoordinateMultiDimError(
-                f"Complete coordinates must be 1D coordinates, got "
-                f"{len(dims):d}D coordinate '{coord.name()}'"
-            )
-        complete_dims.append(dims[0])
+        complete_dims.extend(cube.coord_dims(coord))
+    complete_dims = list(set(complete_dims))
 
     # Rechunk data
     if cube.has_lazy_data():
diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index db55ce4f17..3800fb1413 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -22,7 +22,7 @@
     meridional_statistics,
     zonal_statistics,
 )
-from ._bias import bias
+from ._compare_with_refs import bias, distance_metric
 from ._cycles import amplitude
 from ._derive import derive
 from ._detrend import detrend
@@ -46,7 +46,7 @@
     mask_outside_range,
 )
 from ._multimodel import ensemble_statistics, multi_model_statistics
-from ._other import clip
+from ._other import clip, histogram
 from ._regrid import (
     extract_coordinate_points,
     extract_levels,
@@ -175,12 +175,15 @@
     'linear_trend_stderr',
     # Convert units
     'convert_units',
+    # Histograms
+    'histogram',
     # Ensemble statistics
     'ensemble_statistics',
     # Multi model statistics
     'multi_model_statistics',
-    # Bias calculation
+    # Comparison with reference datasets
     'bias',
+    'distance_metric',
     # Remove supplementary variables from cube
     'remove_supplementary_variables',
     # Save to file
@@ -215,6 +218,7 @@
 
 MULTI_MODEL_FUNCTIONS = {
     'bias',
+    'distance_metric',
     'ensemble_statistics',
     'multi_model_statistics',
     'mask_multimodel',
diff --git a/esmvalcore/preprocessor/_bias.py b/esmvalcore/preprocessor/_bias.py
deleted file mode 100644
index a8e0f4f2c0..0000000000
--- a/esmvalcore/preprocessor/_bias.py
+++ /dev/null
@@ -1,188 +0,0 @@
-"""Preprocessor functions to calculate biases from data."""
-from __future__ import annotations
-
-import logging
-from collections.abc import Iterable
-from typing import TYPE_CHECKING, Literal, Optional
-
-import dask.array as da
-from iris.cube import Cube, CubeList
-
-from ._io import concatenate
-
-if TYPE_CHECKING:
-    from esmvalcore.preprocessor import PreprocessorFile
-
-logger = logging.getLogger(__name__)
-
-
-BiasType = Literal['absolute', 'relative']
-
-
-def bias(
-    products: set[PreprocessorFile] | Iterable[Cube],
-    ref_cube: Optional[Cube] = None,
-    bias_type: BiasType = 'absolute',
-    denominator_mask_threshold: float = 1e-3,
-    keep_reference_dataset: bool = False,
-) -> set[PreprocessorFile] | CubeList:
-    """Calculate biases relative to a reference dataset.
-
-    The reference dataset needs to be broadcastable to all input `products`.
-    This supports `iris' rich broadcasting abilities
-    <https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
-    html#calculating-a-cube-anomaly>`__. To ensure this, the preprocessors
-    :func:`esmvalcore.preprocessor.regrid` and/or
-    :func:`esmvalcore.preprocessor.regrid_time` might be helpful.
-
-    Notes
-    -----
-    The reference dataset can be specified with the `ref_cube` argument. If
-    `ref_cube` is ``None``, exactly one input dataset in the `products` set
-    needs to have the facet ``reference_for_bias: true`` defined in the recipe.
-    Please do **not** specify the option `ref_cube` when using this
-    preprocessor function in a recipe.
-
-    Parameters
-    ----------
-    products:
-        Input datasets/cubes for which the bias is calculated relative to a
-        reference dataset/cube.
-    ref_cube:
-        Cube which is used as reference for the bias calculation. If ``None``,
-        `products` needs to be a :obj:`set` of
-        `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
-        dataset in `products` needs the facet ``reference_for_bias: true``.
-    bias_type:
-        Bias type that is calculated. Must be one of ``'absolute'`` (dataset -
-        ref) or ``'relative'`` ((dataset - ref) / ref).
-    denominator_mask_threshold:
-        Threshold to mask values close to zero in the denominator (i.e., the
-        reference dataset) during the calculation of relative biases. All
-        values in the reference dataset with absolute value less than the given
-        threshold are masked out. This setting is ignored when ``bias_type`` is
-        set to ``'absolute'``. Please note that for some variables with very
-        small absolute values (e.g., carbon cycle fluxes, which are usually
-        :math:`< 10^{-6}` kg m :math:`^{-2}` s :math:`^{-1}`) it is absolutely
-        essential to change the default value in order to get reasonable
-        results.
-    keep_reference_dataset:
-        If ``True``, keep the reference dataset in the output. If ``False``,
-        drop the reference dataset. Ignored if `ref_cube` is given.
-
-    Returns
-    -------
-    set[PreprocessorFile] | CubeList
-        Output datasets/cubes. Will be a :obj:`set` of
-        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
-        `products` is also one, a :class:`~iris.cube.CubeList` otherwise.
-
-    Raises
-    ------
-    ValueError
-        Not exactly one input datasets contains the facet
-        ``reference_for_bias: true`` if ``ref_cube=None``; ``ref_cube=None``
-        and the input products are given as iterable of
-        :class:`~iris.cube.Cube` objects; ``bias_type`` is not one of
-        ``'absolute'`` or ``'relative'``.
-
-    """
-    ref_product = None
-    all_cubes_given = all(isinstance(p, Cube) for p in products)
-
-    # Get reference cube if not explicitly given
-    if ref_cube is None:
-        if all_cubes_given:
-            raise ValueError(
-                "A list of Cubes is given to this preprocessor; please "
-                "specify a `ref_cube`"
-            )
-        (ref_cube, ref_product) = _get_ref(products, 'reference_for_bias')
-    else:
-        ref_product = None
-
-    # Mask reference cube appropriately for relative biases
-    if bias_type == 'relative':
-        ref_cube = ref_cube.copy()
-        ref_cube.data = da.ma.masked_inside(
-            ref_cube.core_data(),
-            -denominator_mask_threshold,
-            denominator_mask_threshold,
-        )
-
-    # If input is an Iterable of Cube objects, calculate bias for each element
-    if all_cubes_given:
-        cubes = [_calculate_bias(c, ref_cube, bias_type) for c in products]
-        return CubeList(cubes)
-
-    # Otherwise, iterate over all input products, calculate bias and adapt
-    # metadata and provenance information accordingly
-    output_products = set()
-    for product in products:
-        if product == ref_product:
-            continue
-        cube = concatenate(product.cubes)
-
-        # Calculate bias
-        cube = _calculate_bias(cube, ref_cube, bias_type)
-
-        # Adapt metadata and provenance information
-        product.attributes['units'] = str(cube.units)
-        if ref_product is not None:
-            product.wasderivedfrom(ref_product)
-
-        product.cubes = CubeList([cube])
-        output_products.add(product)
-
-    # Add reference dataset to output if desired
-    if keep_reference_dataset and ref_product is not None:
-        output_products.add(ref_product)
-
-    return output_products
-
-
-def _get_ref(products, ref_tag: str) -> tuple[Cube, PreprocessorFile]:
-    """Get reference cube and product."""
-    ref_products = []
-    for product in products:
-        if product.attributes.get(ref_tag, False):
-            ref_products.append(product)
-    if len(ref_products) != 1:
-        raise ValueError(
-            f"Expected exactly 1 dataset with '{ref_tag}: true', found "
-            f"{len(ref_products):d}"
-        )
-    ref_product = ref_products[0]
-
-    # Extract reference cube
-    # Note: For technical reasons, product objects contain the member
-    # ``cubes``, which is a list of cubes. However, this is expected to be a
-    # list with exactly one element due to the call of concatenate earlier in
-    # the preprocessing chain of ESMValTool. To make sure that this
-    # preprocessor can also be used outside the ESMValTool preprocessing chain,
-    # an additional concatenate call is added here.
-    ref_cube = concatenate(ref_product.cubes)
-
-    return (ref_cube, ref_product)
-
-
-def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
-    """Calculate bias for a single cube relative to a reference cube."""
-    cube_metadata = cube.metadata
-
-    if bias_type == 'absolute':
-        cube = cube - ref_cube
-        new_units = cube.units
-    elif bias_type == 'relative':
-        cube = (cube - ref_cube) / ref_cube
-        new_units = '1'
-    else:
-        raise ValueError(
-            f"Expected one of ['absolute', 'relative'] for bias_type, got "
-            f"'{bias_type}'"
-        )
-
-    cube.metadata = cube_metadata
-    cube.units = new_units
-
-    return cube
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
new file mode 100644
index 0000000000..1634fc1752
--- /dev/null
+++ b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -0,0 +1,573 @@
+"""Preprocessor functions for comparisons of data with reference datasets."""
+from __future__ import annotations
+
+import logging
+from collections.abc import Iterable
+from functools import partial
+from typing import TYPE_CHECKING, Literal, Optional
+
+import dask
+import dask.array as da
+import iris.analysis
+import iris.analysis.stats
+import numpy as np
+from iris.common.metadata import CubeMetadata
+from iris.coords import CellMethod, Coord
+from iris.cube import Cube, CubeList
+from scipy.stats import wasserstein_distance
+
+from esmvalcore.iris_helpers import rechunk_cube
+from esmvalcore.preprocessor._io import concatenate
+from esmvalcore.preprocessor._other import histogram
+from esmvalcore.preprocessor._shared import (
+    get_all_coord_dims,
+    get_all_coords,
+    get_array_module,
+    get_weights,
+    preserve_float_dtype,
+)
+
+if TYPE_CHECKING:
+    from esmvalcore.preprocessor import PreprocessorFile
+
+logger = logging.getLogger(__name__)
+
+
+BiasType = Literal['absolute', 'relative']
+
+
+def bias(
+    products: set[PreprocessorFile] | Iterable[Cube],
+    reference: Optional[Cube] = None,
+    bias_type: BiasType = 'absolute',
+    denominator_mask_threshold: float = 1e-3,
+    keep_reference_dataset: bool = False,
+) -> set[PreprocessorFile] | CubeList:
+    """Calculate biases relative to a reference dataset.
+
+    The reference dataset needs to be broadcastable to all input `products`.
+    This supports `iris' rich broadcasting abilities
+    <https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
+    html#calculating-a-cube-anomaly>`__. To ensure this, the preprocessors
+    :func:`esmvalcore.preprocessor.regrid` and/or
+    :func:`esmvalcore.preprocessor.regrid_time` might be helpful.
+
+    Notes
+    -----
+    The reference dataset can be specified with the `reference` argument. If
+    `reference` is ``None``, exactly one input dataset in the `products` set
+    needs to have the facet ``reference_for_bias: true`` defined in the recipe.
+    Please do **not** specify the option `reference` when using this
+    preprocessor function in a recipe.
+
+    Parameters
+    ----------
+    products:
+        Input datasets/cubes for which the bias is calculated relative to a
+        reference dataset/cube.
+    reference:
+        Cube which is used as reference for the bias calculation. If ``None``,
+        `products` needs to be a :obj:`set` of
+        `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
+        dataset in `products` needs the facet ``reference_for_bias: true``. Do
+        not specify this argument in a recipe.
+    bias_type:
+        Bias type that is calculated. Must be one of ``'absolute'`` (dataset -
+        ref) or ``'relative'`` ((dataset - ref) / ref).
+    denominator_mask_threshold:
+        Threshold to mask values close to zero in the denominator (i.e., the
+        reference dataset) during the calculation of relative biases. All
+        values in the reference dataset with absolute value less than the given
+        threshold are masked out. This setting is ignored when ``bias_type`` is
+        set to ``'absolute'``. Please note that for some variables with very
+        small absolute values (e.g., carbon cycle fluxes, which are usually
+        :math:`< 10^{-6}` kg m :math:`^{-2}` s :math:`^{-1}`) it is absolutely
+        essential to change the default value in order to get reasonable
+        results.
+    keep_reference_dataset:
+        If ``True``, keep the reference dataset in the output. If ``False``,
+        drop the reference dataset. Ignored if `reference` is given.
+
+    Returns
+    -------
+    set[PreprocessorFile] | CubeList
+        Output datasets/cubes. Will be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
+        `products` is also one, a :class:`~iris.cube.CubeList` otherwise.
+
+    Raises
+    ------
+    ValueError
+        Not exactly one input datasets contains the facet ``reference_for_bias:
+        true`` if ``reference=None``; ``reference=None`` and the input products
+        are given as iterable of :class:`~iris.cube.Cube` objects;
+        ``bias_type`` is not one of ``'absolute'`` or ``'relative'``.
+
+    """
+    ref_product = None
+    all_cubes_given = all(isinstance(p, Cube) for p in products)
+
+    # Get reference cube if not explicitly given
+    if reference is None:
+        if all_cubes_given:
+            raise ValueError(
+                "A list of Cubes is given to this preprocessor; please "
+                "specify a `reference`"
+            )
+        (reference, ref_product) = _get_ref(products, 'reference_for_bias')
+    else:
+        ref_product = None
+
+    # Mask reference cube appropriately for relative biases
+    if bias_type == 'relative':
+        reference = reference.copy()
+        npx = get_array_module(reference.core_data())
+        reference.data = npx.ma.masked_inside(
+            reference.core_data(),
+            -denominator_mask_threshold,
+            denominator_mask_threshold,
+        )
+
+    # If input is an Iterable of Cube objects, calculate bias for each element
+    if all_cubes_given:
+        cubes = [_calculate_bias(c, reference, bias_type) for c in products]
+        return CubeList(cubes)
+
+    # Otherwise, iterate over all input products, calculate bias and adapt
+    # metadata and provenance information accordingly
+    output_products = set()
+    for product in products:
+        if product == ref_product:
+            continue
+        cube = concatenate(product.cubes)
+
+        # Calculate bias
+        cube = _calculate_bias(cube, reference, bias_type)
+
+        # Adapt metadata and provenance information
+        product.attributes['units'] = str(cube.units)
+        if ref_product is not None:
+            product.wasderivedfrom(ref_product)
+
+        product.cubes = CubeList([cube])
+        output_products.add(product)
+
+    # Add reference dataset to output if desired
+    if keep_reference_dataset and ref_product is not None:
+        output_products.add(ref_product)
+
+    return output_products
+
+
+def _get_ref(products, ref_tag: str) -> tuple[Cube, PreprocessorFile]:
+    """Get reference cube and product."""
+    ref_products = []
+    for product in products:
+        if product.attributes.get(ref_tag, False):
+            ref_products.append(product)
+    if len(ref_products) != 1:
+        raise ValueError(
+            f"Expected exactly 1 dataset with '{ref_tag}: true', found "
+            f"{len(ref_products):d}"
+        )
+    ref_product = ref_products[0]
+
+    # Extract reference cube
+    # Note: For technical reasons, product objects contain the member
+    # ``cubes``, which is a list of cubes. However, this is expected to be a
+    # list with exactly one element due to the call of concatenate earlier in
+    # the preprocessing chain of ESMValTool. To make sure that this
+    # preprocessor can also be used outside the ESMValTool preprocessing chain,
+    # an additional concatenate call is added here.
+    reference = concatenate(ref_product.cubes)
+
+    return (reference, ref_product)
+
+
+def _calculate_bias(cube: Cube, reference: Cube, bias_type: BiasType) -> Cube:
+    """Calculate bias for a single cube relative to a reference cube."""
+    cube_metadata = cube.metadata
+
+    if bias_type == 'absolute':
+        cube = cube - reference
+        new_units = cube.units
+    elif bias_type == 'relative':
+        cube = (cube - reference) / reference
+        new_units = '1'
+    else:
+        raise ValueError(
+            f"Expected one of ['absolute', 'relative'] for bias_type, got "
+            f"'{bias_type}'"
+        )
+
+    cube.metadata = cube_metadata
+    cube.units = new_units
+
+    return cube
+
+
+MetricType = Literal[
+    'rmse',
+    'weighted_rmse',
+    'pearsonr',
+    'weighted_pearsonr',
+    'emd',
+    'weighted_emd',
+]
+
+
+def distance_metric(
+    products: set[PreprocessorFile] | Iterable[Cube],
+    metric: MetricType,
+    reference: Optional[Cube] = None,
+    coords: Iterable[Coord] | Iterable[str] | None = None,
+    keep_reference_dataset: bool = True,
+    **kwargs,
+) -> set[PreprocessorFile] | CubeList:
+    r"""Calculate distance metrics.
+
+    All input datasets need to have identical dimensional coordinates. This can
+    for example be ensured with the preprocessors
+    :func:`esmvalcore.preprocessor.regrid` and/or
+    :func:`esmvalcore.preprocessor.regrid_time`.
+
+    Notes
+    -----
+    This preprocessor requires a reference dataset, which can be specified with
+    the `reference` argument. If `reference` is ``None``, exactly one input
+    dataset in the `products` set needs to have the facet
+    ``reference_for_metric: true`` defined in the recipe. Please do **not**
+    specify the option `reference` when using this preprocessor function in a
+    recipe.
+
+    Parameters
+    ----------
+    products:
+        Input datasets/cubes for which the distance metric is calculated
+        relative to a reference dataset/cube.
+    metric:
+        Distance metric that is calculated. Must be one of
+
+        * ``'rmse'``: Unweighted root mean square error
+        * ``'weighted_rmse'``: Weighted root mean square error
+        * ``'pearsonr'``: Unweighted Pearson correlation coefficient
+        * ``'weighted_pearsonr'``: Weighted Pearson correlation coefficient
+        * ``'emd'``: Unweighted Earth mover's distance
+        * ``'weighted_emd'``: Weighted Earth mover's distance
+
+        The Earth mover's distance is also known as first Wasserstein metric
+        `W`\ :sub:`1`.
+
+        A detailed description of these metrics can be found :ref:`here
+        <list_of_distance_metrics>`.
+
+        .. note::
+            Metrics starting with `weighted_` will calculate weighted distance
+            metrics if possible. Currently, the following `coords` (or any
+            combinations that include them) will trigger weighting: `time`
+            (will use lengths of time intervals as weights) and `latitude`
+            (will use cell area weights). Time weights are always calculated
+            from the input data. Area weights can be given as supplementary
+            variables to the recipe (`areacella` or `areacello`, see
+            :ref:`supplementary_variables`) or calculated from the input data
+            (this only works for regular grids). By default, **NO**
+            supplementary variables will be used; they need to be explicitly
+            requested in the recipe.
+    reference:
+        Cube which is used as reference for the distance metric calculation. If
+        ``None``, `products` needs to be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects and exactly
+        one dataset in `products` needs the facet ``reference_for_metric:
+        true``. Do not specify this argument in a recipe.
+    coords:
+        Coordinates over which the distance metric is calculated. If ``None``,
+        calculate the metric over all coordinates, which results in a scalar
+        cube.
+    keep_reference_dataset:
+        If ``True``, also calculate the distance of the reference dataset with
+        itself. If ``False``, drop the reference dataset.
+    **kwargs:
+        Additional options for the metric calculation. The following keyword
+        arguments are supported:
+
+        * `rmse` and `weighted_rmse`: none.
+        * `pearsonr` and `weighted_pearsonr`: ``mdtol``, ``common_mask`` (all
+          keyword arguments are passed to :func:`iris.analysis.stats.pearsonr`,
+          see that link for more details on these arguments). Note: in contrast
+          to :func:`~iris.analysis.stats.pearsonr`, ``common_mask=True`` by
+          default.
+        * `emd` and `weighted_emd`: ``n_bins`` = number of bins used to create
+          discrete probability distribition of data before calculating the EMD
+          (:obj:`int`, default: 100).
+
+    Returns
+    -------
+    set of esmvalcore.preprocessor.PreprocessorFile or iris.cube.CubeList
+        Output datasets/cubes. Will be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
+        `products` is also one, a :class:`~iris.cube.CubeList` otherwise.
+
+    Raises
+    ------
+    ValueError
+        Shape and coordinates of products and reference data does not match;
+        not exactly one input datasets contains the facet
+        ``reference_for_metric: true`` if ``reference=None`; ``reference=None``
+        and the input products are given as iterable of
+        :class:`~iris.cube.Cube` objects; an invalid ``metric`` has been given.
+    iris.exceptions.CoordinateNotFoundError
+        `longitude` is not found in cube if a weighted metric shall be
+        calculated, `latitude` is in `coords`, and no `cell_area` is given
+        as :ref:`supplementary_variables`.
+
+    """
+    reference_product = None
+    all_cubes_given = all(isinstance(p, Cube) for p in products)
+
+    # Get reference cube if not explicitly given
+    if reference is None:
+        if all_cubes_given:
+            raise ValueError(
+                "A list of Cubes is given to this preprocessor; please "
+                "specify a `reference`"
+            )
+        reference_products = []
+        for product in products:
+            if product.attributes.get('reference_for_metric', False):
+                reference_products.append(product)
+        if len(reference_products) != 1:
+            raise ValueError(
+                f"Expected exactly 1 dataset with 'reference_for_metric: "
+                f"true', found {len(reference_products):d}"
+            )
+        reference_product = reference_products[0]
+
+        # Extract reference cube
+        # Note: For technical reasons, product objects contain the member
+        # ``cubes``, which is a list of cubes. However, this is expected to be
+        # a list with exactly one element due to the call of concatenate
+        # earlier in the preprocessing chain of ESMValTool. To make sure that
+        # this preprocessor can also be used outside the ESMValTool
+        # preprocessing chain, an additional concatenate call is added here.
+        reference = concatenate(reference_product.cubes)
+
+    # If input is an Iterable of Cube objects, calculate distance metric for
+    # each element
+    if all_cubes_given:
+        cubes = [
+            _calculate_metric(c, reference, metric, coords, **kwargs)
+            for c in products
+        ]
+        return CubeList(cubes)
+
+    # Otherwise, iterate over all input products, calculate metric and adapt
+    # metadata and provenance information accordingly
+    output_products = set()
+    for product in products:
+        if not keep_reference_dataset and product == reference_product:
+            continue
+        cube = concatenate(product.cubes)
+
+        # Calculate distance metric
+        cube = _calculate_metric(cube, reference, metric, coords, **kwargs)
+
+        # Adapt metadata and provenance information
+        product.attributes['standard_name'] = cube.standard_name
+        product.attributes['long_name'] = cube.long_name
+        product.attributes['short_name'] = cube.var_name
+        product.attributes['units'] = str(cube.units)
+        if product != reference_product:
+            product.wasderivedfrom(reference_product)
+
+        product.cubes = CubeList([cube])
+        output_products.add(product)
+
+    return output_products
+
+
+@preserve_float_dtype
+def _calculate_metric(
+    cube: Cube,
+    reference: Cube,
+    metric: MetricType,
+    coords: Iterable[Coord] | Iterable[str] | None,
+    **kwargs,
+) -> Cube:
+    """Calculate metric for a single cube relative to a reference cube."""
+    # Make sure that dimensional metadata of data and ref data is compatible
+    if cube.shape != reference.shape:
+        raise ValueError(
+            f"Expected identical shapes of cube and reference cube for "
+            f"distance metric calculation, got {cube.shape} and "
+            f"{reference.shape}, respectively"
+        )
+    try:
+        cube + reference  # dummy operation to check if cubes are compatible
+    except Exception as exc:
+        raise ValueError(
+            "Cannot calculate distance metric between cube and reference cube "
+        ) from exc
+
+    # Perform the actual calculation of the distance metric
+    # Note: we work on arrays here instead of cube to stay as flexible as
+    # possible since some operations (e.g., sqrt()) are not available for cubes
+    coords = get_all_coords(cube, coords)
+    metrics_funcs = {
+        'rmse': partial(_calculate_rmse, weighted=False, **kwargs),
+        'weighted_rmse': partial(_calculate_rmse, weighted=True, **kwargs),
+        'pearsonr': partial(_calculate_pearsonr, weighted=False, **kwargs),
+        'weighted_pearsonr': partial(
+            _calculate_pearsonr, weighted=True, **kwargs
+        ),
+        'emd': partial(_calculate_emd, weighted=False, **kwargs),
+        'weighted_emd': partial(_calculate_emd, weighted=True, **kwargs),
+    }
+    if metric not in metrics_funcs:
+        raise ValueError(
+            f"Expected one of {list(metrics_funcs)} for metric, got '{metric}'"
+        )
+    (res_data, res_metadata) = metrics_funcs[metric](cube, reference, coords)
+
+    # Get result cube with correct dimensional metadata by using dummy
+    # operation (max)
+    res_cube = cube.collapsed(coords, iris.analysis.MAX)
+    res_cube.data = res_data
+    res_cube.metadata = res_metadata
+    res_cube.cell_methods = [*cube.cell_methods, CellMethod(metric, coords)]
+
+    return res_cube
+
+
+def _calculate_rmse(
+    cube: Cube,
+    reference: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    *,
+    weighted: bool,
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate root mean square error."""
+    # Data
+    axis = get_all_coord_dims(cube, coords)
+    weights = get_weights(cube, coords) if weighted else None
+    squared_error = (cube.core_data() - reference.core_data())**2
+    npx = get_array_module(squared_error)
+    rmse = npx.sqrt(npx.ma.average(squared_error, axis=axis, weights=weights))
+
+    # Metadata
+    metadata = CubeMetadata(
+        None,
+        'RMSE' if cube.long_name is None else f'RMSE of {cube.long_name}',
+        'rmse' if cube.var_name is None else f'rmse_{cube.var_name}',
+        cube.units,
+        cube.attributes,
+        cube.cell_methods,
+    )
+
+    return (rmse, metadata)
+
+
+def _calculate_pearsonr(
+    cube: Cube,
+    reference: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    *,
+    weighted: bool,
+    **kwargs,
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate Pearson correlation coefficient."""
+    # Here, we want to use common_mask=True in iris.analysis.stats.pearsonr
+    # (iris' default is common_mask=False)
+    kwargs.setdefault('common_mask', True)
+
+    # Data
+    weights = get_weights(cube, coords) if weighted else None
+    res_cube = iris.analysis.stats.pearsonr(
+        cube, reference, corr_coords=coords, weights=weights, **kwargs
+    )
+
+    # Metadata
+    metadata = CubeMetadata(
+        None,
+        (
+            "Pearson's r" if cube.long_name is None
+            else f"Pearson's r of {cube.long_name}"
+        ),
+        'pearsonr' if cube.var_name is None else f'pearsonr_{cube.var_name}',
+        '1',
+        cube.attributes,
+        cube.cell_methods,
+    )
+
+    return (res_cube.core_data(), metadata)
+
+
+def _calculate_emd(
+    cube: Cube,
+    reference: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    *,
+    n_bins: int = 100,
+    weighted: bool,
+) -> tuple[np.ndarray | da.Array, CubeMetadata]:
+    """Calculate Earth mover's distance."""
+    weights = get_weights(cube, coords) if weighted else None
+
+    # Get probability mass functions (using histogram preprocessor)
+    all_data = da.stack([cube.core_data(), reference.core_data()])
+    bin_range = dask.compute(all_data.min(), all_data.max())
+    pmf = histogram(
+        cube,
+        coords=coords,
+        bins=n_bins,
+        bin_range=bin_range,
+        weights=weights,
+        normalization='sum',
+    )
+    pmf_ref = histogram(
+        reference,
+        coords=coords,
+        bins=n_bins,
+        bin_range=bin_range,
+        weights=weights,
+        normalization='sum',
+    )
+    bin_centers = pmf.coord(cube.name()).points
+
+    # Make sure that data is not chunked along `coords`
+    pmf = rechunk_cube(pmf, [cube.name()])
+    pmf_ref = rechunk_cube(pmf_ref, [reference.name()])
+
+    # Data
+    if cube.has_lazy_data() and reference.has_lazy_data():
+        emd = da.apply_gufunc(
+            _get_emd,
+            '(i),(i),(i)->()',
+            pmf.lazy_data(),
+            pmf_ref.lazy_data(),
+            bin_centers,
+            axes=[(-1,), (-1,), (-1,), ()],
+            output_dtypes=pmf.dtype,
+            vectorize=True,
+        )
+    else:
+        v_get_emd = np.vectorize(_get_emd, signature='(n),(n),(n)->()')
+        emd = v_get_emd(pmf.data, pmf_ref.data, bin_centers)
+
+    # Metadata
+    metadata = CubeMetadata(
+        None,
+        'EMD' if cube.long_name is None else f'EMD of {cube.long_name}',
+        'emd' if cube.var_name is None else f'emd_{cube.var_name}',
+        cube.units,
+        cube.attributes,
+        cube.cell_methods,
+    )
+
+    return (emd, metadata)
+
+
+def _get_emd(arr, ref_arr, bin_centers):
+    """Calculate Earth mover's distance (non-lazy)."""
+    if np.ma.is_masked(arr) or np.ma.is_masked(ref_arr):
+        return np.ma.masked  # this is safe because PMFs will be masked arrays
+    return wasserstein_distance(bin_centers, bin_centers, arr, ref_arr)
diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
index 5390517a26..d1e0d90e74 100644
--- a/esmvalcore/preprocessor/_multimodel.py
+++ b/esmvalcore/preprocessor/_multimodel.py
@@ -26,13 +26,14 @@
 from iris.util import equalise_attributes, new_axis
 
 from esmvalcore.iris_helpers import date2num
+from esmvalcore.preprocessor._shared import (
+    _group_products,
+    get_iris_aggregator,
+)
 from esmvalcore.preprocessor._supplementary_vars import (
     remove_supplementary_variables,
 )
 
-from ._other import _group_products
-from ._shared import get_iris_aggregator
-
 if TYPE_CHECKING:
     from esmvalcore.preprocessor import PreprocessorFile
 
diff --git a/esmvalcore/preprocessor/_other.py b/esmvalcore/preprocessor/_other.py
index 14594ba7cf..3d047a4e24 100644
--- a/esmvalcore/preprocessor/_other.py
+++ b/esmvalcore/preprocessor/_other.py
@@ -1,10 +1,26 @@
 """Preprocessor functions that do not fit into any of the categories."""
+from __future__ import annotations
 
 import logging
-from collections import defaultdict
+import string
+from collections.abc import Iterable, Sequence
+from typing import Literal
 
+import dask
 import dask.array as da
+import iris.analysis
 import numpy as np
+from iris.coords import Coord, DimCoord
+from iris.cube import Cube
+
+from esmvalcore.iris_helpers import rechunk_cube
+from esmvalcore.preprocessor._shared import (
+    get_all_coord_dims,
+    get_all_coords,
+    get_array_module,
+    get_weights,
+    preserve_float_dtype,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -39,49 +55,377 @@ def clip(cube, minimum=None, maximum=None):
     return cube
 
 
-def _groupby(iterable, keyfunc):
-    """Group iterable by key function.
+@preserve_float_dtype
+def histogram(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None = None,
+    bins: int | Sequence[float] = 10,
+    bin_range: tuple[float, float] | None = None,
+    weights: np.ndarray | da.Array | bool | None = None,
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> Cube:
+    """Calculate histogram.
+
+    Very similar to :func:`numpy.histogram`, but calculates histogram only over
+    the given coordinates.
 
-    The items are grouped by the value that is returned by the `keyfunc`
+    Handles lazy data and masked data.
 
     Parameters
     ----------
-    iterable : list, tuple or iterable
-        List of items to group
-    keyfunc : callable
-        Used to determine the group of each item. These become the keys
-        of the returned dictionary
+    cube:
+        Input cube.
+    coords:
+        Coordinates over which the histogram is calculated. If ``None``,
+        calculate the histogram over all coordinates, which results in a scalar
+        cube.
+    bins:
+        If `bins` is an :obj:`int`, it defines the number of equal-width bins
+        in the given `bin_range`. If `bins` is a sequence, it defines a
+        monotonically increasing array of bin edges, including the rightmost
+        edge, allowing for non-uniform bin widths.
+    bin_range:
+        The lower and upper range of the bins. If ``None``, `bin_range` is
+        simply (``cube.core_data().min(), cube.core_data().max()``). Values
+        outside the range are ignored. The first element of the range must be
+        less than or equal to the second. `bin_range` affects the automatic bin
+        computation as well if `bins` is an :obj:`int` (see description for
+        `bins` above).
+    weights:
+        Weights for the histogram calculation. Each value in the input data
+        only contributes its associated weight towards the bin count (instead
+        of 1). Weights are normalized before entering the calculation if
+        `normalization` is ``'integral'`` or ``'sum'``. Can be an array of the
+        same shape as the input data, ``False`` or ``None`` (no weighting), or
+        ``True``. In the latter case, weighting will depend on `coords`, and
+        the following coordinates will trigger weighting: `time` (will use
+        lengths of time intervals as weights) and `latitude` (will use cell
+        area weights). Time weights are always calculated from the input data.
+        Area weights can be given as supplementary variables to the recipe
+        (`areacella` or `areacello`, see :ref:`supplementary_variables`) or
+        calculated from the input data (this only works for regular grids). By
+        default, **NO** supplementary variables will be used; they need to be
+        explicitly requested in the recipe.
+    normalization:
+        If ``None``, the result will contain the number of samples in each bin.
+        If ``'integral'``, the result is the value of the probability `density`
+        function at the bin, normalized such that the integral over the range
+        is 1. If ``'sum'``, the result is the value of the probability
+        `mass` function at the bin, normalized such that the sum over
+        the range is 1. Normalization will be applied across `coords`, not the
+        entire cube.
 
     Returns
     -------
-    dict
-        Returns a dictionary with the grouped values.
+    Cube
+        Histogram cube. The shape of this cube will be `(x1, x2, ..., n_bins)`,
+        where `xi` are the dimensions of the input cube not appearing in
+        `coords` and `n_bins` is the number of bins.
+
+    Raises
+    ------
+    TypeError
+        Invalid `bin` type given
+    ValueError
+        Invalid `normalization` or `bin_range` given or `bin_range` is ``None``
+        and data is fully masked.
+    iris.exceptions.CoordinateNotFoundError
+        `longitude` is not found in cube if `weights=True`, `latitude` is in
+        `coords`, and no `cell_area` is given as
+        :ref:`supplementary_variables`.
+
+    """
+    # Check arguments
+    if isinstance(bins, str):
+        raise TypeError(
+            f"bins cannot be a str (got '{bins}'), must be int or Sequence of "
+            f"int"
+        )
+    allowed_norms = (None, 'sum', 'integral')
+    if normalization is not None and normalization not in allowed_norms:
+        raise ValueError(
+            f"Expected one of {allowed_norms} for normalization, got "
+            f"'{normalization}'"
+        )
+
+    # If histogram is calculated over all coordinates, we can use
+    # dask.array.histogram and do not need to worry about chunks; otherwise,
+    # make sure that the cube is not chunked along the given coordinates
+    coords = get_all_coords(cube, coords)
+    axes = get_all_coord_dims(cube, coords)
+    if cube.has_lazy_data() and len(axes) != cube.ndim:
+        cube = rechunk_cube(cube, coords)
+
+    # Calculate histogram
+    weights = _get_histogram_weights(cube, coords, weights, normalization)
+    (bin_range, bin_edges) = _get_bins(cube, bins, bin_range)
+    if cube.has_lazy_data():
+        func = _calculate_histogram_lazy  # type: ignore
+    else:
+        func = _calculate_histogram_eager  # type: ignore
+    hist_data = func(
+        cube.core_data(),
+        weights,  # type: ignore
+        along_axes=axes,
+        bin_edges=bin_edges,
+        bin_range=bin_range,
+        normalization=normalization,
+    )
+
+    # Get final cube
+    hist_cube = _get_histogram_cube(
+        cube, hist_data, coords, bin_edges, normalization
+    )
+
+    return hist_cube
+
+
+def _get_bins(
+    cube: Cube,
+    bins: int | Sequence[float],
+    bin_range: tuple[float, float] | None,
+) -> tuple[tuple[float, float], np.ndarray]:
+    """Calculate bin range and edges."""
+    if bin_range is None:
+        bin_range = dask.compute(
+            cube.core_data().min(), cube.core_data().max()
+        )
+    if isinstance(bins, int):
+        bin_edges = np.linspace(
+            bin_range[0], bin_range[1], bins + 1, dtype=np.float64
+        )
+    else:
+        bin_edges = np.array(bins, dtype=np.float64)
+
+    finite_bin_range = [bool(np.isfinite(r)) for r in bin_range]
+    if not all(finite_bin_range):
+        raise ValueError(
+            f"Cannot calculate histogram for bin_range={bin_range} (or for "
+            f"fully masked data when `bin_range` is not given)"
+        )
+
+    return (bin_range, bin_edges)
+
+
+def _get_histogram_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+    weights: np.ndarray | da.Array | bool | None,
+    normalization: Literal['sum', 'integral'] | None,
+) -> np.ndarray | da.Array:
+    """Get histogram weights."""
+    axes = get_all_coord_dims(cube, coords)
+    npx = get_array_module(cube.core_data())
+
+    weights_array: np.ndarray | da.Array
+    if weights is None or weights is False:
+        weights_array = npx.ones_like(cube.core_data())
+    elif weights is True:
+        weights_array = get_weights(cube, coords)
+    else:
+        weights_array = weights
+
+    if normalization is not None:
+        norm = npx.sum(weights_array, axis=axes, keepdims=True)
+        weights_array = weights_array / norm
+
+    # For lazy arrays, make sure that the chunks of the cube data and weights
+    # match
+    if isinstance(weights_array, da.Array):
+        weights_array = weights_array.rechunk(cube.lazy_data().chunks)
+
+    return weights_array
+
+
+def _calculate_histogram_lazy(
+    data: da.Array,
+    weights: da.Array,
+    *,
+    along_axes: tuple[int, ...],
+    bin_edges: np.ndarray,
+    bin_range: tuple[float, float],
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> da.Array:
+    """Calculate histogram over data along axes (lazy version).
+
+    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
+    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
+    of bins.
+
     """
-    grouped = defaultdict(set)
-    for item in iterable:
-        key = keyfunc(item)
-        grouped[key].add(item)
+    n_axes = len(along_axes)
+
+    # (1) If histogram is calculated over all axes, use the efficient
+    # da.histogram function
+    if n_axes == data.ndim:
+        data = data.ravel()
+        weights = weights.ravel()
+        mask = da.ma.getmaskarray(data)
+        data = data[~mask]
+        weights = weights[~mask]
+        hist = da.histogram(
+            data, bins=bin_edges, range=bin_range, weights=weights
+        )[0]
+        hist_sum = hist.sum()
+        hist = da.ma.masked_array(hist, mask=da.allclose(hist_sum, 0.0))
+        if normalization == 'sum':
+            hist = hist / hist_sum
+        elif normalization == 'integral':
+            diffs = np.array(np.diff(bin_edges), dtype=data.dtype)
+            hist = hist / hist_sum / diffs
+        hist = da.ma.masked_invalid(hist)
 
-    return grouped
+    # (2) Otherwise, use da.apply_gufunc with the eager version
+    # _calculate_histogram_eager
+    else:
+        # da.apply_gufunc transposes the input array so that the axes given by
+        # the `axes` argument to da.apply_gufunc are the rightmost dimensions.
+        # Thus, we need to use `along_axes=(ndim-n_axes, ..., ndim-2, ndim-1)`
+        # for _calculate_histogram_eager here.
+        axes_in_chunk = tuple(range(data.ndim - n_axes,  data.ndim))
 
+        # The call signature depends also on the number of axes in `axes`, and
+        # will be (a,b,...)->(nbins) where a,b,... are the data dimensions that
+        # are collapsed, and nbins the number of bin centers
+        in_signature = f"({','.join(list(string.ascii_lowercase)[:n_axes])})"
+        hist = da.apply_gufunc(
+            _calculate_histogram_eager,
+            f"{in_signature},{in_signature}->(nbins)",
+            data,
+            weights,
+            axes=[along_axes, along_axes, (-1,)],
+            output_sizes={'nbins': len(bin_edges) - 1},
+            along_axes=axes_in_chunk,
+            bin_edges=bin_edges,
+            bin_range=bin_range,
+            normalization=normalization,
+        )
 
-def _group_products(products, by_key):
-    """Group products by the given list of attributes."""
-    def grouper(product):
-        return product.group(by_key)
+    return hist
 
-    grouped = _groupby(products, keyfunc=grouper)
-    return grouped.items()
 
+def _calculate_histogram_eager(
+    data: np.ndarray,
+    weights: np.ndarray,
+    *,
+    along_axes: tuple[int, ...],
+    bin_edges: np.ndarray,
+    bin_range: tuple[float, float],
+    normalization: Literal['sum', 'integral'] | None = None,
+) -> np.ndarray:
+    """Calculate histogram over data along axes (eager version).
 
-def get_array_module(*args):
-    """Return the best matching array module.
+    This will return an array of shape `(x1, x2, ..., n_bins)` where `xi` are
+    the dimensions of `data` not appearing in `axes` and `n_bins` is the number
+    of bins.
 
-    If at least one of the arguments is a :class:`dask.array.Array` object,
-    the :mod:`dask.array` module is returned. In all other cases the
-    :mod:`numpy` module is returned.
     """
-    for arg in args:
-        if isinstance(arg, da.Array):
-            return da
-    return np
+    # Create array with shape (x1, x2, ..., y) where `y` is the product of all
+    # dimensions in `axes` and the `xi` are the remaining dimensions
+    remaining_dims = tuple(a for a in range(data.ndim) if a not in along_axes)
+    reshaped_data = np.transpose(data, axes=(*remaining_dims, *along_axes))
+    reshaped_weights = np.transpose(
+        weights, axes=(*remaining_dims, *along_axes)
+    )
+    shape_rem_dims = tuple(data.shape[a] for a in remaining_dims)
+    reshaped_data = reshaped_data.reshape(*shape_rem_dims, -1)
+    reshaped_weights = reshaped_weights.reshape(*shape_rem_dims, -1)
+
+    # Apply vectorized version of np.histogram
+    def _get_hist_values(arr, wgts):
+        mask = np.ma.getmaskarray(arr)
+        arr = arr[~mask]
+        wgts = wgts[~mask]
+        return np.histogram(
+            arr, bins=bin_edges, range=bin_range, weights=wgts
+        )[0]
+
+    v_histogram = np.vectorize(_get_hist_values, signature='(n),(n)->(m)')
+    hist = v_histogram(reshaped_data, reshaped_weights)
+
+    # Mask points where all input data were masked (these are the ones where
+    # the histograms sums to 0)
+    hist_sum = hist.sum(axis=-1, keepdims=True)
+    mask = np.isclose(hist_sum, 0.0)
+    hist = np.ma.array(hist, mask=np.broadcast_to(mask, hist.shape))
+
+    # Apply normalization
+    if normalization == 'sum':
+        hist = hist / np.ma.array(hist_sum, mask=mask)
+    elif normalization == 'integral':
+        hist = (
+            hist /
+            np.ma.array(hist_sum, mask=mask) /
+            np.ma.array(np.diff(bin_edges), dtype=data.dtype)
+        )
+
+    return hist
+
+
+def _get_histogram_cube(
+    cube: Cube,
+    data: np.ndarray | da.Array,
+    coords: Iterable[Coord] | Iterable[str],
+    bin_edges: np.ndarray,
+    normalization: Literal['sum', 'integral'] | None,
+):
+    """Get cube with correct metadata for histogram."""
+    # Calculate bin centers using 2-window running mean and get corresponding
+    # coordinate
+    bin_centers = np.convolve(bin_edges, np.ones(2), 'valid') / 2.0
+    bin_coord = DimCoord(
+        bin_centers,
+        bounds=np.stack((bin_edges[:-1], bin_edges[1:]), axis=-1),
+        standard_name=cube.standard_name,
+        long_name=cube.long_name,
+        var_name=cube.var_name,
+        units=cube.units,
+    )
+
+    # Get result cube with correct dimensional metadata by using dummy
+    # operation (max)
+    cell_methods = cube.cell_methods
+    cube = cube.collapsed(coords, iris.analysis.MAX)
+
+    # Get histogram cube
+    long_name_suffix = (
+        '' if cube.long_name is None else f' of {cube.long_name}'
+    )
+    var_name_suffix = '' if cube.var_name is None else f'_{cube.var_name}'
+    dim_spec = (
+        [(d, cube.coord_dims(d)) for d in cube.dim_coords] +
+        [(bin_coord, cube.ndim)]
+    )
+    if normalization == 'sum':
+        long_name = f"Relative Frequency{long_name_suffix}"
+        var_name = f"relative_frequency{var_name_suffix}"
+        units = '1'
+    elif normalization == 'integral':
+        long_name = f"Density{long_name_suffix}"
+        var_name = f"density{var_name_suffix}"
+        units = cube.units**-1
+    else:
+        long_name = f"Frequency{long_name_suffix}"
+        var_name = f"frequency{var_name_suffix}"
+        units = '1'
+    hist_cube = Cube(
+        data,
+        standard_name=None,
+        long_name=long_name,
+        var_name=var_name,
+        units=units,
+        attributes=cube.attributes,
+        cell_methods=cell_methods,
+        dim_coords_and_dims=dim_spec,
+        aux_coords_and_dims=[(a, cube.coord_dims(a)) for a in cube.aux_coords],
+        aux_factories=cube.aux_factories,
+        ancillary_variables_and_dims=[
+            (a, cube.ancillary_variable_dims(a)) for a in
+            cube.ancillary_variables()
+        ],
+        cell_measures_and_dims=[
+            (c, cube.cell_measure_dims(c)) for c in cube.cell_measures()
+        ],
+    )
+
+    return hist_cube
diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py
index a3fbc7032e..c7913df0d7 100644
--- a/esmvalcore/preprocessor/_regrid.py
+++ b/esmvalcore/preprocessor/_regrid.py
@@ -11,7 +11,6 @@
 import warnings
 from copy import deepcopy
 from decimal import Decimal
-from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional
 
@@ -30,8 +29,11 @@
 from esmvalcore.cmor.table import CMOR_TABLES
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
 from esmvalcore.iris_helpers import has_irregular_grid, has_unstructured_grid
-from esmvalcore.preprocessor._other import get_array_module
-from esmvalcore.preprocessor._shared import preserve_float_dtype
+from esmvalcore.preprocessor._shared import (
+    broadcast_to_shape,
+    get_array_module,
+    preserve_float_dtype,
+)
 from esmvalcore.preprocessor._supplementary_vars import (
     add_ancillary_variable,
     add_cell_measure,
@@ -1033,52 +1035,6 @@ def _create_cube(src_cube, data, src_levels, levels):
     return result
 
 
-def is_lazy_masked_data(array):
-    """Similar to `iris._lazy_data.is_lazy_masked_data`."""
-    return isinstance(array, da.Array) and isinstance(
-        da.utils.meta_from_array(array), np.ma.MaskedArray)
-
-
-def broadcast_to_shape(array, shape, dim_map, chunks=None):
-    """Copy of `iris.util.broadcast_to_shape` that allows specifying chunks."""
-    if isinstance(array, da.Array):
-        if chunks is not None:
-            chunks = list(chunks)
-            for src_idx, tgt_idx in enumerate(dim_map):
-                # Only use the specified chunks along new dimensions or on
-                # dimensions that have size 1 in the source array.
-                if array.shape[src_idx] != 1:
-                    chunks[tgt_idx] = array.chunks[src_idx]
-        broadcast = partial(da.broadcast_to, shape=shape, chunks=chunks)
-    else:
-        broadcast = partial(np.broadcast_to, shape=shape)
-
-    n_orig_dims = len(array.shape)
-    n_new_dims = len(shape) - n_orig_dims
-    array = array.reshape(array.shape + (1,) * n_new_dims)
-
-    # Get dims in required order.
-    array = np.moveaxis(array, range(n_orig_dims), dim_map)
-    new_array = broadcast(array)
-
-    if np.ma.isMA(array):
-        # broadcast_to strips masks so we need to handle them explicitly.
-        mask = np.ma.getmask(array)
-        if mask is np.ma.nomask:
-            new_mask = np.ma.nomask
-        else:
-            new_mask = broadcast(mask)
-        new_array = np.ma.array(new_array, mask=new_mask)
-
-    elif is_lazy_masked_data(array):
-        # broadcast_to strips masks so we need to handle them explicitly.
-        mask = da.ma.getmaskarray(array)
-        new_mask = broadcast(mask)
-        new_array = da.ma.masked_array(new_array, new_mask)
-
-    return new_array
-
-
 def _vertical_interpolate(cube, src_levels, levels, interpolation,
                           extrapolation):
     """Perform vertical interpolation."""
diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py
index 0a5f5627dd..addd3617ac 100644
--- a/esmvalcore/preprocessor/_shared.py
+++ b/esmvalcore/preprocessor/_shared.py
@@ -8,23 +8,25 @@
 import logging
 import re
 import warnings
-from collections.abc import Callable
-from functools import wraps
+from collections import defaultdict
+from collections.abc import Callable, Iterable
+from functools import partial, wraps
 from typing import Any, Literal, Optional
 
 import dask.array as da
 import iris.analysis
 import numpy as np
-from iris.coords import DimCoord
+from iris.coords import CellMeasure, Coord, DimCoord
 from iris.cube import Cube
+from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError
 
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
+from esmvalcore.iris_helpers import has_regular_grid
 from esmvalcore.typing import DataType
 
 logger = logging.getLogger(__name__)
 
 
-# guess bounds tool
 def guess_bounds(cube, coords):
     """Guess bounds of a cube, or not."""
     # check for bounds just in case
@@ -266,3 +268,310 @@ def wrapper(data: DataType, *args: Any, **kwargs: Any) -> DataType:
         return result
 
     return wrapper
+
+
+def _groupby(iterable, keyfunc):
+    """Group iterable by key function.
+
+    The items are grouped by the value that is returned by the `keyfunc`
+
+    Parameters
+    ----------
+    iterable : list, tuple or iterable
+        List of items to group
+    keyfunc : callable
+        Used to determine the group of each item. These become the keys
+        of the returned dictionary
+
+    Returns
+    -------
+    dict
+        Returns a dictionary with the grouped values.
+    """
+    grouped = defaultdict(set)
+    for item in iterable:
+        key = keyfunc(item)
+        grouped[key].add(item)
+
+    return grouped
+
+
+def _group_products(products, by_key):
+    """Group products by the given list of attributes."""
+    def grouper(product):
+        return product.group(by_key)
+
+    grouped = _groupby(products, keyfunc=grouper)
+    return grouped.items()
+
+
+def get_array_module(*args):
+    """Return the best matching array module.
+
+    If at least one of the arguments is a :class:`dask.array.Array` object,
+    the :mod:`dask.array` module is returned. In all other cases the
+    :mod:`numpy` module is returned.
+    """
+    for arg in args:
+        if isinstance(arg, da.Array):
+            return da
+    return np
+
+
+def broadcast_to_shape(array, shape, dim_map, chunks=None):
+    """Copy of `iris.util.broadcast_to_shape` that allows specifying chunks."""
+    if isinstance(array, da.Array):
+        if chunks is not None:
+            chunks = list(chunks)
+            for src_idx, tgt_idx in enumerate(dim_map):
+                # Only use the specified chunks along new dimensions or on
+                # dimensions that have size 1 in the source array.
+                if array.shape[src_idx] != 1:
+                    chunks[tgt_idx] = array.chunks[src_idx]
+        broadcast = partial(da.broadcast_to, shape=shape, chunks=chunks)
+    else:
+        broadcast = partial(np.broadcast_to, shape=shape)
+
+    n_orig_dims = len(array.shape)
+    n_new_dims = len(shape) - n_orig_dims
+    array = array.reshape(array.shape + (1,) * n_new_dims)
+
+    # Get dims in required order.
+    array = np.moveaxis(array, range(n_orig_dims), dim_map)
+    new_array = broadcast(array)
+
+    if np.ma.isMA(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = np.ma.getmask(array)
+        if mask is np.ma.nomask:
+            new_mask = np.ma.nomask
+        else:
+            new_mask = broadcast(mask)
+        new_array = np.ma.array(new_array, mask=new_mask)
+
+    elif _is_lazy_masked_data(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = da.ma.getmaskarray(array)
+        new_mask = broadcast(mask)
+        new_array = da.ma.masked_array(new_array, new_mask)
+
+    return new_array
+
+
+def _is_lazy_masked_data(array):
+    """Similar to `iris._lazy_data.is_lazy_masked_data`."""
+    return isinstance(array, da.Array) and isinstance(
+        da.utils.meta_from_array(array), np.ma.MaskedArray)
+
+
+def get_weights(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> np.ndarray | da.Array:
+    """Calculate suitable weights for given coordinates."""
+    npx = get_array_module(cube.core_data())
+    weights = npx.ones_like(cube.core_data())
+
+    # Time weights: lengths of time interval
+    if 'time' in coords:
+        weights *= broadcast_to_shape(
+            npx.array(get_time_weights(cube)),
+            cube.shape,
+            cube.coord_dims('time'),
+        )
+
+    # Latitude weights: cell areas
+    if 'latitude' in coords:
+        cube = cube.copy()  # avoid overwriting input cube
+        if (
+                not cube.cell_measures('cell_area') and
+                not cube.coords('longitude')
+        ):
+            raise CoordinateNotFoundError(
+                f"Cube {cube.summary(shorten=True)} needs a `longitude` "
+                f"coordinate to calculate cell area weights for weighted "
+                f"distance metric over coordinates {coords} (alternatively, "
+                f"a `cell_area` can be given to the cube as supplementary "
+                f"variable)"
+            )
+        try_adding_calculated_cell_area(cube)
+        weights *= broadcast_to_shape(
+            cube.cell_measure('cell_area').core_data(),
+            cube.shape,
+            cube.cell_measure_dims('cell_area'),
+        )
+
+    return weights
+
+
+def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array:
+    """Compute the weighting of the time axis.
+
+    Parameters
+    ----------
+    cube:
+        Input cube.
+
+    Returns
+    -------
+    np.ndarray or da.Array
+        Array of time weights for averaging. Returns a
+        :class:`dask.array.Array` if the input cube has lazy data; a
+        :class:`numpy.ndarray` otherwise.
+
+    """
+    time = cube.coord('time')
+    coord_dims = cube.coord_dims('time')
+
+    # Multidimensional time coordinates are not supported: In this case,
+    # weights cannot be simply calculated as difference between the bounds
+    if len(coord_dims) > 1:
+        raise ValueError(
+            f"Weighted statistical operations are not supported for "
+            f"{len(coord_dims):d}D time coordinates, expected 0D or 1D"
+        )
+
+    # Extract 1D time weights (= lengths of time intervals)
+    time_weights = time.lazy_bounds()[:, 1] - time.lazy_bounds()[:, 0]
+    if cube.has_lazy_data():
+        # Align the weight chunks with the data chunks to avoid excessively
+        # large chunks as a result of broadcasting.
+        time_chunks = cube.lazy_data().chunks[coord_dims[0]]
+        time_weights = time_weights.rechunk(time_chunks)
+    else:
+        time_weights = time_weights.compute()
+    return time_weights
+
+
+def try_adding_calculated_cell_area(cube: Cube) -> None:
+    """Try to add calculated cell measure 'cell_area' to cube (in-place)."""
+    if cube.cell_measures('cell_area'):
+        return
+
+    logger.debug(
+        "Found no cell measure 'cell_area' in cube %s. Check availability of "
+        "supplementary variables",
+        cube.summary(shorten=True),
+    )
+    logger.debug("Attempting to calculate grid cell area")
+
+    rotated_pole_grid = all([
+        cube.coord('latitude').core_points().ndim == 2,
+        cube.coord('longitude').core_points().ndim == 2,
+        cube.coords('grid_latitude'),
+        cube.coords('grid_longitude'),
+    ])
+
+    # For regular grids, calculate grid cell areas with iris function
+    if has_regular_grid(cube):
+        cube = guess_bounds(cube, ['latitude', 'longitude'])
+        logger.debug("Calculating grid cell areas for regular grid")
+        cell_areas = _compute_area_weights(cube)
+
+    # For rotated pole grids, use grid_latitude and grid_longitude to calculate
+    # grid cell areas
+    elif rotated_pole_grid:
+        cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude'])
+        cube_tmp = cube.copy()
+        cube_tmp.remove_coord('latitude')
+        cube_tmp.coord('grid_latitude').rename('latitude')
+        cube_tmp.remove_coord('longitude')
+        cube_tmp.coord('grid_longitude').rename('longitude')
+        logger.debug("Calculating grid cell areas for rotated pole grid")
+        cell_areas = _compute_area_weights(cube_tmp)
+
+    # For all other cases, grid cell areas cannot be calculated
+    else:
+        logger.error(
+            "Supplementary variables are needed to calculate grid cell "
+            "areas for irregular or unstructured grid of cube %s",
+            cube.summary(shorten=True),
+        )
+        raise CoordinateMultiDimError(cube.coord('latitude'))
+
+    # Add new cell measure
+    cell_measure = CellMeasure(
+        cell_areas, standard_name='cell_area', units='m2', measure='area',
+    )
+    cube.add_cell_measure(cell_measure, np.arange(cube.ndim))
+
+
+def _compute_area_weights(cube):
+    """Compute area weights."""
+    with warnings.catch_warnings(record=True) as caught_warnings:
+        warnings.filterwarnings(
+            'always',
+            message="Using DEFAULT_SPHERICAL_EARTH_RADIUS.",
+            category=UserWarning,
+            module='iris.analysis.cartography',
+        )
+        # TODO: replace the following line with
+        # weights = iris.analysis.cartography.area_weights(
+        #     cube, compute=not cube.has_lazy_data()
+        # )
+        # once https://github.com/SciTools/iris/pull/5658 is available
+        weights = _get_area_weights(cube)
+
+        for warning in caught_warnings:
+            logger.debug(
+                "%s while computing area weights of the following cube:\n%s",
+                warning.message, cube)
+    return weights
+
+
+def _get_area_weights(cube: Cube) -> np.ndarray | da.Array:
+    """Get area weights.
+
+    For non-lazy data, simply use the according iris function. For lazy data,
+    calculate area weights for a single lat-lon slice and broadcast it to the
+    correct shape.
+
+    Note
+    ----
+    This is a temporary workaround to get lazy area weights. Can be removed
+    once https://github.com/SciTools/iris/pull/5658 is available.
+
+    """
+    if not cube.has_lazy_data():
+        return iris.analysis.cartography.area_weights(cube)
+
+    lat_lon_dims = sorted(
+        tuple(set(cube.coord_dims('latitude') + cube.coord_dims('longitude')))
+    )
+    lat_lon_slice = next(cube.slices(['latitude', 'longitude'], ordered=False))
+    weights_2d = iris.analysis.cartography.area_weights(lat_lon_slice)
+    weights = broadcast_to_shape(
+        da.array(weights_2d),
+        cube.shape,
+        lat_lon_dims,
+        chunks=cube.lazy_data().chunks,
+    )
+    return weights
+
+
+def get_all_coords(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str] | None,
+) -> Iterable[Coord] | Iterable[str]:
+    """Get all desired coordinates in a cube."""
+    if coords is None:
+        coords = [c.name() for c in cube.dim_coords]
+        if len(coords) != cube.ndim:
+            raise ValueError(
+                f"If coords=None is specified, the cube "
+                f"{cube.summary(shorten=True)} must not have unnamed "
+                f"dimensions"
+            )
+    return coords
+
+
+def get_all_coord_dims(
+    cube: Cube,
+    coords: Iterable[Coord] | Iterable[str],
+) -> tuple[int, ...]:
+    """Get sorted list of all coordinate dimensions from coordinates."""
+    all_coord_dims = []
+    for coord in coords:
+        all_coord_dims.extend(cube.coord_dims(coord))
+    sorted_all_coord_dims = sorted(list(set(all_coord_dims)))
+    return tuple(sorted_all_coord_dims)
diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py
index 635861b86c..b89cc92b0a 100644
--- a/esmvalcore/preprocessor/_time.py
+++ b/esmvalcore/preprocessor/_time.py
@@ -32,6 +32,7 @@
 from esmvalcore.iris_helpers import date2num, rechunk_cube
 from esmvalcore.preprocessor._shared import (
     get_iris_aggregator,
+    get_time_weights,
     preserve_float_dtype,
     update_weights_kwargs,
 )
@@ -383,45 +384,6 @@ def extract_month(cube: Cube, month: int) -> Cube:
     return result
 
 
-def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array:
-    """Compute the weighting of the time axis.
-
-    Parameters
-    ----------
-    cube:
-        Input cube.
-
-    Returns
-    -------
-    np.ndarray or da.Array
-        Array of time weights for averaging. Returns a
-        :class:`dask.array.Array` if the input cube has lazy data; a
-        :class:`numpy.ndarray` otherwise.
-
-    """
-    time = cube.coord('time')
-    coord_dims = cube.coord_dims('time')
-
-    # Multidimensional time coordinates are not supported: In this case,
-    # weights cannot be simply calculated as difference between the bounds
-    if len(coord_dims) > 1:
-        raise ValueError(
-            f"Weighted statistical operations are not supported for "
-            f"{len(coord_dims):d}D time coordinates, expected 0D or 1D"
-        )
-
-    # Extract 1D time weights (= lengths of time intervals)
-    time_weights = time.lazy_bounds()[:, 1] - time.lazy_bounds()[:, 0]
-    if cube.has_lazy_data():
-        # Align the weight chunks with the data chunks to avoid excessively
-        # large chunks as a result of broadcasting.
-        time_chunks = cube.lazy_data().chunks[coord_dims[0]]
-        time_weights = time_weights.rechunk(time_chunks)
-    else:
-        time_weights = time_weights.compute()
-    return time_weights
-
-
 def _aggregate_time_fx(result_cube, source_cube):
     time_dim = set(source_cube.coord_dims(source_cube.coord('time')))
     if source_cube.cell_measures():
diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py
index 239fa99758..3d04f23500 100644
--- a/esmvalcore/preprocessor/_volume.py
+++ b/esmvalcore/preprocessor/_volume.py
@@ -15,11 +15,11 @@
 from iris.coords import AuxCoord, CellMeasure
 from iris.cube import Cube
 
-from ._area import _try_adding_calculated_cell_area
 from ._shared import (
     get_iris_aggregator,
     get_normalized_cube,
     preserve_float_dtype,
+    try_adding_calculated_cell_area,
     update_weights_kwargs,
 )
 from ._supplementary_vars import register_supplementaries
@@ -155,7 +155,7 @@ def calculate_volume(cube: Cube) -> da.core.Array:
 
     # Get or calculate the horizontal areas of the cube
     has_cell_measure = bool(cube.cell_measures('cell_area'))
-    _try_adding_calculated_cell_area(cube)
+    try_adding_calculated_cell_area(cube)
     area = cube.cell_measure('cell_area').copy()
     area_dim = cube.cell_measure_dims(area)
 
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
index bcee6eac5b..c50d837b68 100644
--- a/tests/integration/recipe/test_recipe.py
+++ b/tests/integration/recipe/test_recipe.py
@@ -2983,7 +2983,7 @@ def test_bias_two_refs(tmp_path, patched_datafinder, session):
     assert "found 2" in exc.value.failed_tasks[0].message
 
 
-def test_inlvaid_bias_type(tmp_path, patched_datafinder, session):
+def test_invalid_bias_type(tmp_path, patched_datafinder, session):
     content = dedent("""
         preprocessors:
           test_bias:
@@ -3185,3 +3185,108 @@ def test_wildcard_derived_var(
     assert dataset.facets['dataset'] == 'BBB'
     assert dataset.facets['institute'] == 'B'
     assert dataset.facets['short_name'] == 'swcre'
+
+
+def test_distance_metric_no_ref(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: emd
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5}
+                  - {dataset: CESM2}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true' in "
+        "products"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert msg in exc.value.failed_tasks[0].message
+    assert "found 0" in exc.value.failed_tasks[0].message
+
+
+def test_distance_metric_two_refs(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: emd
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5, reference_for_metric: true}
+                  - {dataset: CESM2, reference_for_metric: true}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true' in "
+        "products"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert msg in exc.value.failed_tasks[0].message
+    assert "found 2" in exc.value.failed_tasks[0].message
+
+
+def test_invalid_metric(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            distance_metric:
+              metric: INVALID
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5}
+                  - {dataset: CESM2, reference_for_metric: true}
+
+            scripts: null
+        """)
+    msg = (
+        "Expected one of ('rmse', 'weighted_rmse', 'pearsonr', "
+        "'weighted_pearsonr', 'emd', 'weighted_emd') for `metric`, got "
+        "'INVALID'"
+    )
+    with pytest.raises(RecipeError) as exc:
+        get_recipe(tmp_path, content, session)
+    assert str(exc.value) == INITIALIZATION_ERROR_MSG
+    assert exc.value.failed_tasks[0].message == msg
diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py
index a7374a5e8b..e7c3f998c5 100644
--- a/tests/unit/preprocessor/_area/test_area.py
+++ b/tests/unit/preprocessor/_area/test_area.py
@@ -2,7 +2,6 @@
 import unittest
 from pathlib import Path
 
-import dask.array as da
 import fiona
 import iris
 import numpy as np
@@ -18,7 +17,6 @@
 import tests
 from esmvalcore.preprocessor._area import (
     _crop_cube,
-    _get_area_weights,
     _get_requested_geometries,
     _update_shapefile_path,
     area_statistics,
@@ -1447,22 +1445,5 @@ def test_time_dependent_volcello():
     assert cube.shape == cube.cell_measure('ocean_volume').shape
 
 
-@pytest.mark.parametrize('lazy', [True, False])
-def test_get_area_weights(lazy):
-    """Test _get_area_weights."""
-    cube = _create_sample_full_cube()
-    if lazy:
-        cube.data = cube.lazy_data()
-    weights = _get_area_weights(cube)
-    if lazy:
-        assert isinstance(weights, da.Array)
-        assert weights.chunks == cube.lazy_data().chunks
-    else:
-        assert isinstance(weights, np.ndarray)
-    np.testing.assert_allclose(
-        weights, iris.analysis.cartography.area_weights(cube)
-    )
-
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/unit/preprocessor/_bias/test_bias.py b/tests/unit/preprocessor/_bias/test_bias.py
deleted file mode 100644
index 9adab3872e..0000000000
--- a/tests/unit/preprocessor/_bias/test_bias.py
+++ /dev/null
@@ -1,333 +0,0 @@
-"""Unit tests for :mod:`esmvalcore.preprocessor._bias`."""
-
-import iris
-import numpy as np
-import pytest
-from cf_units import Unit
-from iris.cube import Cube, CubeList
-
-from esmvalcore.preprocessor._bias import bias
-from tests import PreprocessorFile
-
-
-def assert_array_equal(array_1, array_2):
-    """Assert that (masked) array 1 equals (masked) array 2."""
-    if np.ma.is_masked(array_1) or np.ma.is_masked(array_2):
-        np.testing.assert_array_equal(np.ma.getmaskarray(array_1),
-                                      np.ma.getmaskarray(array_2))
-        mask = np.ma.getmaskarray(array_1)
-        np.testing.assert_array_equal(array_1[~mask], array_2[~mask])
-    else:
-        np.testing.assert_array_equal(array_1, array_2)
-
-
-def products_set_to_dict(products):
-    """Convert :obj:`set` of products to :obj:`dict`."""
-    new_dict = {}
-    for product in products:
-        new_dict[product.filename] = product
-    return new_dict
-
-
-def get_3d_cube(data, **cube_kwargs):
-    """Create 3D cube."""
-    time_units = Unit('days since 1850-01-01 00:00:00')
-    times = iris.coords.DimCoord([0.0, 1.0], standard_name='time',
-                                 var_name='time', long_name='time',
-                                 units=time_units)
-    lats = iris.coords.DimCoord([0.0, 10.0], standard_name='latitude',
-                                var_name='lat', long_name='latitude',
-                                units='degrees_north')
-    lons = iris.coords.DimCoord([20.0, 30.0], standard_name='longitude',
-                                var_name='lon', long_name='longitude',
-                                units='degrees_east')
-    coord_specs = [(times, 0), (lats, 1), (lons, 2)]
-    cube = Cube(data.astype('float32'),
-                dim_coords_and_dims=coord_specs, **cube_kwargs)
-    return cube
-
-
-@pytest.fixture
-def regular_cubes():
-    """Regular cube."""
-    cube_data = np.arange(8.0).reshape(2, 2, 2)
-    cube = get_3d_cube(cube_data, standard_name='air_temperature',
-                       var_name='tas', units='K')
-    return CubeList([cube])
-
-
-@pytest.fixture
-def ref_cubes():
-    """Reference cube."""
-    cube_data = np.full((2, 2, 2), 2.0)
-    cube_data[1, 1, 1] = 4.0
-    cube = get_3d_cube(cube_data, standard_name='air_temperature',
-                       var_name='tas', units='K')
-    return CubeList([cube])
-
-
-TEST_BIAS = [
-    ('absolute', [[[-2.0, -1.0], [0.0, 1.0]], [[2.0, 3.0], [4.0, 3.0]]], 'K'),
-    ('relative', [[[-1.0, -0.5], [0.0, 0.5]], [[1.0, 1.5], [2.0, 0.75]]], '1'),
-]
-
-
-@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
-def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
-    """Test calculation of bias with products."""
-    ref_product = PreprocessorFile(ref_cubes, 'REF',
-                                   {'reference_for_bias': True})
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
-        PreprocessorFile(regular_cubes, 'B', {'dataset': 'b'}),
-        ref_product,
-    }
-    out_products = bias(products, bias_type=bias_type)
-
-    assert isinstance(out_products, set)
-    out_dict = products_set_to_dict(out_products)
-    assert len(out_dict) == 2
-
-    product_a = out_dict['A']
-    assert product_a.filename == 'A'
-    assert product_a.attributes == {'units': units, 'dataset': 'a'}
-    assert len(product_a.cubes) == 1
-    out_cube = product_a.cubes[0]
-    assert_array_equal(out_cube.data, data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == units
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-    product_a.wasderivedfrom.assert_called_once()
-    assert product_a.mock_ancestors == {ref_product}
-
-    product_b = out_dict['B']
-    assert product_b.filename == 'B'
-    assert product_b.attributes == {'units': units, 'dataset': 'b'}
-    assert len(product_b.cubes) == 1
-    out_cube = product_b.cubes[0]
-    assert_array_equal(out_cube.data, data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == units
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-    product_b.wasderivedfrom.assert_called_once()
-    assert product_b.mock_ancestors == {ref_product}
-
-
-@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
-def test_bias_cubes(regular_cubes, ref_cubes, bias_type, data, units):
-    """Test calculation of bias with cubes."""
-    ref_cube = ref_cubes[0]
-    out_cubes = bias(regular_cubes, ref_cube, bias_type=bias_type)
-
-    assert isinstance(out_cubes, CubeList)
-    assert len(out_cubes) == 1
-    out_cube = out_cubes[0]
-
-    assert_array_equal(out_cube.data, data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == units
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-
-
-TEST_BIAS_BROADCASTABLE = [
-    ('absolute', [[[-2.0, -1.0], [0.0, 1.0]], [[2.0, 3.0], [4.0, 5.0]]], 'K'),
-    ('relative', [[[-1.0, -0.5], [0.0, 0.5]], [[1.0, 1.5], [2.0, 2.5]]], '1'),
-]
-
-
-@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS_BROADCASTABLE)
-def test_bias_cubes_broadcastable(
-    regular_cubes, ref_cubes, bias_type, data, units
-):
-    """Test calculation of bias with cubes."""
-    ref_cube = ref_cubes[0][0]  # only select one time step
-    out_cubes = bias(regular_cubes, ref_cube, bias_type=bias_type)
-
-    assert isinstance(out_cubes, CubeList)
-    assert len(out_cubes) == 1
-    out_cube = out_cubes[0]
-
-    assert_array_equal(out_cube.data, data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == units
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-
-
-def test_denominator_mask_threshold_products(regular_cubes, ref_cubes):
-    """Test denominator_mask_threshold argument with products."""
-    ref_product = PreprocessorFile(ref_cubes, 'REF',
-                                   {'reference_for_bias': True})
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
-        ref_product,
-    }
-    out_products = bias(
-        products, bias_type='relative', denominator_mask_threshold=3.0
-    )
-
-    assert isinstance(out_products, set)
-    out_dict = products_set_to_dict(out_products)
-    assert len(out_dict) == 1
-
-    product_a = out_dict['A']
-    assert product_a.filename == 'A'
-    assert product_a.attributes == {'units': '1', 'dataset': 'a'}
-    assert len(product_a.cubes) == 1
-    out_cube = product_a.cubes[0]
-    expected_data = np.ma.masked_equal([[[42.0, 42.0],
-                                         [42.0, 42.0]],
-                                        [[42.0, 42.0],
-                                         [42.0, 0.75]]], 42.0)
-    assert_array_equal(out_cube.data, expected_data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == '1'
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-    product_a.wasderivedfrom.assert_called_once()
-    assert product_a.mock_ancestors == {ref_product}
-
-
-def test_denominator_mask_threshold_cubes(regular_cubes, ref_cubes):
-    """Test denominator_mask_threshold argument with cubes."""
-    ref_cube = ref_cubes[0]
-    out_cubes = bias(
-        regular_cubes,
-        ref_cube,
-        bias_type='relative',
-        denominator_mask_threshold=3.0,
-    )
-
-    assert isinstance(out_cubes, CubeList)
-    assert len(out_cubes) == 1
-    out_cube = out_cubes[0]
-
-    expected_data = np.ma.masked_equal([[[42.0, 42.0],
-                                         [42.0, 42.0]],
-                                        [[42.0, 42.0],
-                                         [42.0, 0.75]]], 42.0)
-    assert_array_equal(out_cube.data, expected_data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == '1'
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-
-
-@pytest.mark.parametrize('bias_type', ['absolute', 'relative'])
-def test_keep_reference_dataset(regular_cubes, ref_cubes, bias_type):
-    """Test denominator_mask_threshold argument."""
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
-        PreprocessorFile(ref_cubes, 'REF', {'reference_for_bias': True})
-    }
-    out_products = bias(
-        products, bias_type=bias_type, keep_reference_dataset=True
-    )
-
-    assert isinstance(out_products, set)
-    out_dict = products_set_to_dict(out_products)
-    assert len(out_dict) == 2
-
-    product_ref = out_dict['REF']
-    assert product_ref.filename == 'REF'
-    assert product_ref.attributes == {'reference_for_bias': True}
-    assert len(product_ref.cubes) == 1
-    out_cube = product_ref.cubes[0]
-    expected_data = [[[2.0, 2.0], [2.0, 2.0]], [[2.0, 2.0], [2.0, 4.0]]]
-    assert_array_equal(out_cube.data, expected_data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == 'K'
-    assert out_cube.dim_coords == ref_cubes[0].dim_coords
-    assert out_cube.aux_coords == ref_cubes[0].aux_coords
-
-
-@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
-@pytest.mark.parametrize('keep_ref', [True, False])
-def test_bias_products_and_ref_cube(
-    regular_cubes, ref_cubes, keep_ref, bias_type, data, units
-):
-    """Test calculation of bias with products and ref_cube given."""
-    ref_cube = ref_cubes[0]
-    products = set([PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'})])
-
-    out_products = bias(
-        products,
-        ref_cube=ref_cube,
-        bias_type=bias_type,
-        keep_reference_dataset=keep_ref,
-    )
-
-    assert isinstance(out_products, set)
-    out_dict = products_set_to_dict(out_products)
-    assert len(out_dict) == 1
-
-    product_a = out_dict['A']
-    assert product_a.filename == 'A'
-    assert product_a.attributes == {'units': units, 'dataset': 'a'}
-    assert len(product_a.cubes) == 1
-    out_cube = product_a.cubes[0]
-    assert_array_equal(out_cube.data, data)
-    assert out_cube.var_name == 'tas'
-    assert out_cube.standard_name == 'air_temperature'
-    assert out_cube.units == units
-    assert out_cube.dim_coords == regular_cubes[0].dim_coords
-    assert out_cube.aux_coords == regular_cubes[0].aux_coords
-    product_a.wasderivedfrom.assert_not_called()
-    assert product_a.mock_ancestors == set()
-
-
-def test_no_reference_for_bias(regular_cubes, ref_cubes):
-    """Test fail when no reference_for_bias is given."""
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {}),
-        PreprocessorFile(regular_cubes, 'B', {}),
-        PreprocessorFile(ref_cubes, 'REF', {}),
-    }
-    msg = "Expected exactly 1 dataset with 'reference_for_bias: true', found 0"
-    with pytest.raises(ValueError, match=msg):
-        bias(products)
-
-
-def test_two_references_for_bias(regular_cubes, ref_cubes):
-    """Test fail when two reference_for_bias products is given."""
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {'reference_for_bias': False}),
-        PreprocessorFile(ref_cubes, 'REF1', {'reference_for_bias': True}),
-        PreprocessorFile(ref_cubes, 'REF2', {'reference_for_bias': True}),
-    }
-    msg = "Expected exactly 1 dataset with 'reference_for_bias: true', found 2"
-    with pytest.raises(ValueError, match=msg):
-        bias(products)
-
-
-def test_invalid_bias_type(regular_cubes, ref_cubes):
-    """Test fail when invalid bias_type is given."""
-    products = {
-        PreprocessorFile(regular_cubes, 'A', {}),
-        PreprocessorFile(regular_cubes, 'B', {}),
-        PreprocessorFile(ref_cubes, 'REF', {'reference_for_bias': True}),
-    }
-    msg = (r"Expected one of \['absolute', 'relative'\] for bias_type, got "
-           r"'invalid_bias_type'")
-    with pytest.raises(ValueError, match=msg):
-        bias(products, bias_type='invalid_bias_type')
-
-
-def test_ref_cube_non_cubes(regular_cubes):
-    """Test ref_cube=None with with cubes."""
-    msg = (
-        "A list of Cubes is given to this preprocessor; please specify a "
-        "`ref_cube`"
-    )
-    with pytest.raises(ValueError, match=msg):
-        bias(regular_cubes)
diff --git a/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
new file mode 100644
index 0000000000..7def9afe1b
--- /dev/null
+++ b/tests/unit/preprocessor/_compare_with_refs/test_compare_with_refs.py
@@ -0,0 +1,809 @@
+"""Unit tests for :mod:`esmvalcore.preprocessor._compare_with_refs`."""
+
+import contextlib
+
+import dask.array as da
+import iris
+import numpy as np
+import pytest
+from cf_units import Unit
+from iris.coords import CellMeasure, CellMethod
+from iris.cube import Cube, CubeList
+from iris.exceptions import CoordinateNotFoundError
+
+from esmvalcore.preprocessor._compare_with_refs import bias, distance_metric
+from tests import PreprocessorFile
+
+
+def assert_allclose(array_1, array_2):
+    """Assert that (masked) array 1 is close to (masked) array 2."""
+    if np.ma.is_masked(array_1) or np.ma.is_masked(array_2):
+        mask_1 = np.ma.getmaskarray(array_1)
+        mask_2 = np.ma.getmaskarray(array_2)
+        np.testing.assert_equal(mask_1, mask_2)
+        np.testing.assert_allclose(array_1[~mask_1], array_2[~mask_2])
+    else:
+        np.testing.assert_allclose(array_1, array_2)
+
+
+def products_set_to_dict(products):
+    """Convert :obj:`set` of products to :obj:`dict`."""
+    new_dict = {}
+    for product in products:
+        new_dict[product.filename] = product
+    return new_dict
+
+
+def get_3d_cube(data, **cube_kwargs):
+    """Create 3D cube."""
+    time_units = Unit('days since 1850-01-01 00:00:00')
+    times = iris.coords.DimCoord([3.0, 7.0],
+                                 bounds=[[0.0, 6.0], [6.0, 8.0]],
+                                 standard_name='time',
+                                 var_name='time', long_name='time',
+                                 units=time_units)
+    lats = iris.coords.DimCoord([0.0, 10.0], standard_name='latitude',
+                                var_name='lat', long_name='latitude',
+                                units='degrees_north')
+    lons = iris.coords.DimCoord([20.0, 30.0], standard_name='longitude',
+                                var_name='lon', long_name='longitude',
+                                units='degrees_east')
+    coord_specs = [(times, 0), (lats, 1), (lons, 2)]
+    cube = Cube(data.astype('float32'),
+                dim_coords_and_dims=coord_specs, **cube_kwargs)
+    return cube
+
+
+@pytest.fixture
+def regular_cubes():
+    """Regular cubes."""
+    cube_data = np.arange(8.0).reshape(2, 2, 2)
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
+    return CubeList([cube])
+
+
+@pytest.fixture
+def ref_cubes():
+    """Reference cubes."""
+    cube_data = np.full((2, 2, 2), 2.0)
+    cube_data[1, 1, 1] = 4.0
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
+    return CubeList([cube])
+
+
+TEST_BIAS = [
+    ('absolute', [[[-2.0, -1.0], [0.0, 1.0]], [[2.0, 3.0], [4.0, 3.0]]], 'K'),
+    ('relative', [[[-1.0, -0.5], [0.0, 0.5]], [[1.0, 1.5], [2.0, 0.75]]], '1'),
+]
+
+
+@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
+def test_bias_products(regular_cubes, ref_cubes, bias_type, data, units):
+    """Test calculation of bias with products."""
+    ref_product = PreprocessorFile(ref_cubes, 'REF',
+                                   {'reference_for_bias': True})
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        PreprocessorFile(regular_cubes, 'B', {'dataset': 'b'}),
+        ref_product,
+    }
+    out_products = bias(products, bias_type=bias_type)
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 2
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {'units': units, 'dataset': 'a'}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == units
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+    product_b = out_dict['B']
+    assert product_b.filename == 'B'
+    assert product_b.attributes == {'units': units, 'dataset': 'b'}
+    assert len(product_b.cubes) == 1
+    out_cube = product_b.cubes[0]
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == units
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    product_b.wasderivedfrom.assert_called_once()
+    assert product_b.mock_ancestors == {ref_product}
+
+
+@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
+def test_bias_cubes(regular_cubes, ref_cubes, bias_type, data, units):
+    """Test calculation of bias with cubes."""
+    ref_cube = ref_cubes[0]
+    out_cubes = bias(regular_cubes, ref_cube, bias_type=bias_type)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == units
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+
+
+TEST_BIAS_BROADCASTABLE = [
+    ('absolute', [[[-2.0, -1.0], [0.0, 1.0]], [[2.0, 3.0], [4.0, 5.0]]], 'K'),
+    ('relative', [[[-1.0, -0.5], [0.0, 0.5]], [[1.0, 1.5], [2.0, 2.5]]], '1'),
+]
+
+
+@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS_BROADCASTABLE)
+def test_bias_cubes_broadcastable(
+    regular_cubes, ref_cubes, bias_type, data, units
+):
+    """Test calculation of bias with cubes."""
+    ref_cube = ref_cubes[0][0]  # only select one time step
+    out_cubes = bias(regular_cubes, ref_cube, bias_type=bias_type)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == units
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+
+
+def test_denominator_mask_threshold_products(regular_cubes, ref_cubes):
+    """Test denominator_mask_threshold argument with products."""
+    ref_product = PreprocessorFile(ref_cubes, 'REF',
+                                   {'reference_for_bias': True})
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        ref_product,
+    }
+    out_products = bias(
+        products, bias_type='relative', denominator_mask_threshold=3.0
+    )
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 1
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {'units': '1', 'dataset': 'a'}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
+    expected_data = np.ma.masked_equal([[[42.0, 42.0],
+                                         [42.0, 42.0]],
+                                        [[42.0, 42.0],
+                                         [42.0, 0.75]]], 42.0)
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == '1'
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+
+def test_denominator_mask_threshold_cubes(regular_cubes, ref_cubes):
+    """Test denominator_mask_threshold argument with cubes."""
+    ref_cube = ref_cubes[0]
+    out_cubes = bias(
+        regular_cubes,
+        ref_cube,
+        bias_type='relative',
+        denominator_mask_threshold=3.0,
+    )
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+    assert out_cube.dtype == np.float32
+    expected_data = np.ma.masked_equal([[[42.0, 42.0],
+                                         [42.0, 42.0]],
+                                        [[42.0, 42.0],
+                                         [42.0, 0.75]]], 42.0)
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == '1'
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+
+
+@pytest.mark.parametrize('bias_type', ['absolute', 'relative'])
+def test_keep_reference_dataset(regular_cubes, ref_cubes, bias_type):
+    """Test denominator_mask_threshold argument."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        PreprocessorFile(ref_cubes, 'REF', {'reference_for_bias': True})
+    }
+    out_products = bias(
+        products, bias_type=bias_type, keep_reference_dataset=True
+    )
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 2
+
+    product_ref = out_dict['REF']
+    assert product_ref.filename == 'REF'
+    assert product_ref.attributes == {'reference_for_bias': True}
+    assert len(product_ref.cubes) == 1
+    out_cube = product_ref.cubes[0]
+    assert out_cube.dtype == np.float32
+    expected_data = [[[2.0, 2.0], [2.0, 2.0]], [[2.0, 2.0], [2.0, 4.0]]]
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == 'K'
+    assert out_cube.dim_coords == ref_cubes[0].dim_coords
+    assert out_cube.aux_coords == ref_cubes[0].aux_coords
+
+
+@pytest.mark.parametrize('bias_type,data,units', TEST_BIAS)
+@pytest.mark.parametrize('keep_ref', [True, False])
+def test_bias_products_and_ref_cube(
+    regular_cubes, ref_cubes, keep_ref, bias_type, data, units
+):
+    """Test calculation of bias with products and ref_cube given."""
+    ref_cube = ref_cubes[0]
+    products = set([PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'})])
+
+    out_products = bias(
+        products,
+        reference=ref_cube,
+        bias_type=bias_type,
+        keep_reference_dataset=keep_ref,
+    )
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 1
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {'units': units, 'dataset': 'a'}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, data)
+    assert out_cube.var_name == 'tas'
+    assert out_cube.standard_name == 'air_temperature'
+    assert out_cube.units == units
+    assert out_cube.dim_coords == regular_cubes[0].dim_coords
+    assert out_cube.aux_coords == regular_cubes[0].aux_coords
+    product_a.wasderivedfrom.assert_not_called()
+    assert product_a.mock_ancestors == set()
+
+
+def test_no_reference_for_bias(regular_cubes, ref_cubes):
+    """Test fail when no reference_for_bias is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {}),
+    }
+    msg = "Expected exactly 1 dataset with 'reference_for_bias: true', found 0"
+    with pytest.raises(ValueError, match=msg):
+        bias(products)
+
+
+def test_two_references_for_bias(regular_cubes, ref_cubes):
+    """Test fail when two reference_for_bias products are given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'reference_for_bias': False}),
+        PreprocessorFile(ref_cubes, 'REF1', {'reference_for_bias': True}),
+        PreprocessorFile(ref_cubes, 'REF2', {'reference_for_bias': True}),
+    }
+    msg = "Expected exactly 1 dataset with 'reference_for_bias: true', found 2"
+    with pytest.raises(ValueError, match=msg):
+        bias(products)
+
+
+def test_invalid_bias_type(regular_cubes, ref_cubes):
+    """Test fail when invalid bias_type is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {'reference_for_bias': True}),
+    }
+    msg = (r"Expected one of \['absolute', 'relative'\] for bias_type, got "
+           r"'invalid_bias_type'")
+    with pytest.raises(ValueError, match=msg):
+        bias(products, bias_type='invalid_bias_type')
+
+
+def test_reference_none_cubes(regular_cubes):
+    """Test reference=None with with cubes."""
+    msg = (
+        "A list of Cubes is given to this preprocessor; please specify a "
+        "`reference`"
+    )
+    with pytest.raises(ValueError, match=msg):
+        bias(regular_cubes)
+
+
+TEST_DISTANCE_METRICS = [
+    ('rmse', 2.34520788, 0.0, 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_rmse', 2.0, 0.0, 'RMSE', 'rmse_tas', 'K'),
+    ('pearsonr', 0.57735026, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('weighted_pearsonr', np.nan, 1.0, "Pearson's r", 'pearsonr_tas', '1'),
+    ('emd', 1.98625, 0.0, 'EMD', 'emd_tas', 'K'),
+    ('weighted_emd', 0.9975, 0.0, 'EMD', 'emd_tas', 'K'),
+]
+AREA_WEIGHTS = CellMeasure(
+    np.array([0.0, 0.0, 2.0, 0.0]).reshape(2, 2),
+    standard_name='cell_area',
+    units='m2',
+)
+
+
+@pytest.mark.parametrize(
+    'metric,data,ref_data,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric(
+    regular_cubes,
+    ref_cubes,
+    metric,
+    data,
+    ref_data,
+    long_name,
+    var_name,
+    units,
+):
+    """Test `distance_metric`."""
+    regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
+    ref_product = PreprocessorFile(
+        ref_cubes, 'REF', {'reference_for_metric': True}
+    )
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        PreprocessorFile(regular_cubes, 'B', {'dataset': 'b'}),
+        ref_product,
+    }
+
+    out_products = distance_metric(products, metric)
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 3
+    expected_attrs = {
+        'standard_name': None,
+        'long_name': long_name,
+        'short_name': var_name,
+        'units': units,
+    }
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {'dataset': 'a', **expected_attrs}
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+    product_b = out_dict['B']
+    assert product_b.filename == 'B'
+    assert product_b.attributes == {'dataset': 'b', **expected_attrs}
+    assert len(product_b.cubes) == 1
+    out_cube = product_b.cubes[0]
+    assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+    product_b.wasderivedfrom.assert_called_once()
+    assert product_b.mock_ancestors == {ref_product}
+
+    product_ref = out_dict['REF']
+    assert product_ref.filename == 'REF'
+    assert product_ref.attributes == {
+        'reference_for_metric': True, **expected_attrs
+    }
+    assert len(product_ref.cubes) == 1
+    out_cube = product_ref.cubes[0]
+    assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, ref_data)
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+    product_ref.wasderivedfrom.assert_not_called()
+    assert product_ref.mock_ancestors == set()
+
+
+TEST_DISTANCE_METRICS_LAZY = [
+    ('rmse', [1.224744871, 3.082207001], 'RMSE', 'rmse_tas', 'K'),
+    ('weighted_rmse', [1.2278657, 3.0784798], 'RMSE', 'rmse_tas', 'K'),
+    ('pearsonr', [np.nan, 0.77459663], "Pearson's r", 'pearsonr_tas', '1'),
+    (
+        'weighted_pearsonr',
+        [np.nan, 0.7745946],
+        "Pearson's r",
+        'pearsonr_tas',
+        '1',
+    ),
+    ('emd', [0.98, 2.9925], 'EMD', 'emd_tas', 'K'),
+    ('weighted_emd', [0.9837506, 2.9888833], 'EMD', 'emd_tas', 'K'),
+]
+
+
+@pytest.mark.parametrize(
+    'metric,data,long_name,var_name,units', TEST_DISTANCE_METRICS_LAZY
+)
+def test_distance_metric_lazy(
+    regular_cubes, ref_cubes, metric, data, long_name, var_name, units
+):
+    """Test `distance_metric` with lazy data."""
+    regular_cubes[0].data = da.array(regular_cubes[0].data)
+    ref_cubes[0].data = da.array(ref_cubes[0].data)
+    ref_product = PreprocessorFile(
+        ref_cubes, 'REF', {'reference_for_metric': True}
+    )
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'dataset': 'a'}),
+        ref_product,
+    }
+
+    out_products = distance_metric(
+        products,
+        metric,
+        coords=['latitude', 'longitude'],
+        keep_reference_dataset=False,
+    )
+
+    assert isinstance(out_products, set)
+    out_dict = products_set_to_dict(out_products)
+    assert len(out_dict) == 1
+
+    product_a = out_dict['A']
+    assert product_a.filename == 'A'
+    assert product_a.attributes == {
+        'dataset': 'a',
+        'standard_name': None,
+        'long_name': long_name,
+        'short_name': var_name,
+        'units': units,
+    }
+    assert len(product_a.cubes) == 1
+    out_cube = product_a.cubes[0]
+    assert out_cube.shape == (2,)
+    assert out_cube.dtype == np.float32
+    assert out_cube.has_lazy_data()
+    assert_allclose(
+        out_cube.data,
+        np.ma.masked_invalid(np.array(data, dtype=np.float32)),
+    )
+    assert out_cube.coord('time') == regular_cubes[0].coord('time')
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['latitude', 'longitude']),
+    )
+    product_a.wasderivedfrom.assert_called_once()
+    assert product_a.mock_ancestors == {ref_product}
+
+
+@pytest.mark.parametrize(
+    'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_cubes(
+    regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units
+):
+    """Test `distance_metric` with cubes."""
+    regular_cubes[0].add_cell_measure(AREA_WEIGHTS, (1, 2))
+    out_cubes = distance_metric(regular_cubes, metric, reference=ref_cubes[0])
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    assert out_cube.dtype == np.float32
+    assert_allclose(out_cube.data, np.array(data, dtype=np.float32))
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+
+
+@pytest.mark.parametrize('lazy', [True, False])
+@pytest.mark.parametrize(
+    'metric,data,_,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_masked_data(
+    regular_cubes, ref_cubes, metric, data, _, long_name, var_name, units, lazy
+):
+    """Test `distance_metric` with masked data."""
+    # Test cube
+    time_units = Unit('days since 1850-01-01 00:00:00')
+    times = iris.coords.DimCoord([3.0, 7.0, 9.0],
+                                 bounds=[[0.0, 6.0], [6.0, 8.0], [8.0, 10.0]],
+                                 standard_name='time',
+                                 var_name='time', long_name='time',
+                                 units=time_units)
+    lats = regular_cubes[0].coord('latitude')
+    lons = regular_cubes[0].coord('longitude')
+    coord_specs = [(times, 0), (lats, 1), (lons, 2)]
+    cube_data = np.pad(
+        regular_cubes[0].data,
+        ((0, 1), (0, 0), (0, 0)),
+        'constant',
+        constant_values=np.nan,
+    )
+    cube = Cube(
+        np.ma.masked_invalid(cube_data), dim_coords_and_dims=coord_specs
+    )
+    cube.metadata = regular_cubes[0].metadata
+    cube.add_cell_measure(AREA_WEIGHTS, (1, 2))
+
+    # Ref cube
+    ref_cube = cube.copy()
+    ref_data = np.pad(
+        ref_cubes[0].data,
+        ((0, 1), (0, 0), (0, 0)),
+        'constant',
+        constant_values=np.nan,
+    )
+    ref_cube.data = np.ma.masked_invalid(ref_data)
+    ref_cube.metadata = ref_cubes[0].metadata
+
+    if lazy:
+        cube.data = da.array(cube.data)
+        ref_cube.data = da.array(ref_cube.data)
+
+    out_cubes = distance_metric([cube], metric, reference=ref_cube)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    if lazy:
+        assert out_cube.has_lazy_data()
+    else:
+        assert not out_cube.has_lazy_data()
+    assert out_cube.dtype == np.float32
+
+    # Mask handling differs for dask and numpy
+    if np.isnan(data) and not lazy:
+        expected_data = np.ma.masked_invalid(data)
+    else:
+        expected_data = np.array(data, dtype=np.float32)
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+
+
+@pytest.mark.parametrize('lazy', [True, False])
+@pytest.mark.parametrize(
+    'metric,_,__,long_name,var_name,units', TEST_DISTANCE_METRICS
+)
+def test_distance_metric_fully_masked_data(
+    regular_cubes, ref_cubes, metric, _, __, long_name, var_name, units, lazy
+):
+    """Test `distance_metric` with fully_masked data."""
+    cube = regular_cubes[0]
+    cube.data = np.ma.masked_invalid(np.full(cube.shape, np.nan))
+    cube.add_cell_measure(AREA_WEIGHTS, (1, 2))
+    ref_cube = ref_cubes[0]
+
+    if lazy:
+        cube.data = da.array(cube.data)
+        ref_cube.data = da.array(ref_cube.data)
+
+    out_cubes = distance_metric([cube], metric, reference=ref_cube)
+
+    assert isinstance(out_cubes, CubeList)
+    assert len(out_cubes) == 1
+    out_cube = out_cubes[0]
+
+    assert out_cube.shape == ()
+    if lazy:
+        assert out_cube.has_lazy_data()
+    else:
+        assert not out_cube.has_lazy_data()
+    assert out_cube.dtype == np.float64
+
+    expected_data = np.ma.masked_all(())
+    assert_allclose(out_cube.data, expected_data)
+    assert out_cube.var_name == var_name
+    assert out_cube.long_name == long_name
+    assert out_cube.standard_name is None
+    assert out_cube.units == units
+    assert out_cube.cell_methods == (
+        CellMethod(metric, ['time', 'latitude', 'longitude']),
+    )
+
+
+TEST_METRICS = [
+    'rmse',
+    'weighted_rmse',
+    'pearsonr',
+    'weighted_pearsonr',
+    'emd',
+    'weighted_emd',
+]
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_no_reference_for_metric(regular_cubes, ref_cubes, metric):
+    """Test fail when no reference_for_metric is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {}),
+    }
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true', found 0"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, metric)
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_two_references_for_metric(regular_cubes, ref_cubes, metric):
+    """Test fail when two reference_for_metric products are given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {'reference_for_metric': False}),
+        PreprocessorFile(ref_cubes, 'REF1', {'reference_for_metric': True}),
+        PreprocessorFile(ref_cubes, 'REF2', {'reference_for_metric': True}),
+    }
+    msg = (
+        "Expected exactly 1 dataset with 'reference_for_metric: true', found 2"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, 'rmse')
+
+
+def test_invalid_metric(regular_cubes, ref_cubes):
+    """Test fail when invalid metric is given."""
+    products = {
+        PreprocessorFile(regular_cubes, 'A', {}),
+        PreprocessorFile(regular_cubes, 'B', {}),
+        PreprocessorFile(ref_cubes, 'REF', {'reference_for_metric': True}),
+    }
+    msg = (
+        r"Expected one of \['rmse', 'weighted_rmse', 'pearsonr', "
+        r"'weighted_pearsonr', 'emd', 'weighted_emd'\] for metric, got "
+        r"'invalid'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(products, 'invalid')
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_distance_metric_reference_none_cubes(regular_cubes, metric):
+    """Test distance metric with reference=None with with cubes."""
+    msg = (
+        "A list of Cubes is given to this preprocessor; please specify a "
+        "`reference`"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric)
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_distance_metric_no_named_dimensions(metric):
+    """Test distance metric with reference=None with with cubes."""
+    ref_cube = Cube([0, 1])
+    cubes = CubeList([ref_cube])
+    msg = (
+        "If coords=None is specified, the cube .* must not have unnamed "
+        "dimensions"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(cubes, metric, reference=ref_cube)
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_distance_metric_non_matching_shapes(regular_cubes, metric):
+    """Test distance metric with cubes of different shapes."""
+    ref_cube = Cube(0)
+    msg = (
+        r"Expected identical shapes of cube and reference cube for distance "
+        r"metric calculation, got \(2, 2, 2\) and \(\), respectively"
+    )
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric, reference=ref_cube)
+
+
+@pytest.mark.parametrize('metric', TEST_METRICS)
+def test_distance_metric_non_matching_dims(regular_cubes, metric):
+    """Test distance metric with cubes with difference dimensions."""
+    ref_cube = regular_cubes[0].copy()
+    ref_cube.remove_coord('time')
+    new_coord = iris.coords.DimCoord([0.0, 1.0], var_name='not_time')
+    ref_cube.add_dim_coord(new_coord, 0)
+    msg = "Cannot calculate distance metric between cube and reference cube"
+    with pytest.raises(ValueError, match=msg):
+        distance_metric(regular_cubes, metric, reference=ref_cube)
+
+
+@pytest.mark.parametrize(
+    'metric,error',
+    [
+        ('rmse', False),
+        ('weighted_rmse', True),
+        ('pearsonr', False),
+        ('weighted_pearsonr', True),
+        ('emd', False),
+        ('weighted_emd', True),
+    ]
+)
+def test_distance_metric_no_lon_for_area_weights(regular_cubes, metric, error):
+    """Test distance metric with cubes that have no longitude."""
+    regular_cubes[0].remove_coord('longitude')
+    ref_cube = regular_cubes[0].copy()
+    msg = (
+        r"Cube .* needs a `longitude` coordinate to calculate cell area "
+        r"weights for weighted distance metric over coordinates \['time', "
+        r"'latitude'\] \(alternatively, a `cell_area` can be given to the "
+        r"cube as supplementary variable\)"
+    )
+    if error:
+        context = pytest.raises(CoordinateNotFoundError, match=msg)
+    else:
+        context = contextlib.nullcontext()
+    with context:
+        distance_metric(
+            regular_cubes,
+            metric,
+            reference=ref_cube,
+            coords=['time', 'latitude']
+        )
diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py
index 803b66d997..e5d6a871e1 100644
--- a/tests/unit/preprocessor/_other/test_other.py
+++ b/tests/unit/preprocessor/_other/test_other.py
@@ -6,15 +6,22 @@
 import iris.coord_categorisation
 import iris.coords
 import numpy as np
+import pytest
 from cf_units import Unit
+from iris.aux_factory import AtmosphereSigmaFactory
+from iris.coords import (
+    AncillaryVariable,
+    AuxCoord,
+    CellMeasure,
+    CellMethod,
+    DimCoord,
+)
 from iris.cube import Cube
 from numpy.testing import assert_array_equal
 
-from esmvalcore.preprocessor import PreprocessorFile
-from esmvalcore.preprocessor._other import (
-    _group_products,
-    clip,
-    get_array_module,
+from esmvalcore.preprocessor._other import clip, histogram
+from tests.unit.preprocessor._compare_with_refs.test_compare_with_refs import (
+    get_3d_cube,
 )
 
 
@@ -50,45 +57,376 @@ def test_clip(self):
             clip(cube, 10, 8)
 
 
-def test_group_products_string_list():
-    products = [
-        PreprocessorFile(
-            filename='A_B.nc',
-            attributes={
-                'project': 'A',
-                'dataset': 'B',
-            },
-        ),
-        PreprocessorFile(
-            filename='A_C.nc',
-            attributes={
-                'project': 'A',
-                'dataset': 'C',
-            }
-        ),
+@pytest.fixture
+def cube():
+    """Regular cube."""
+    cube_data = np.ma.masked_inside(
+        np.arange(8.0, dtype=np.float32).reshape(2, 2, 2), 1.5, 3.5
+    )
+    cube_data = np.swapaxes(cube_data, 0, -1)
+    cube = get_3d_cube(
+        cube_data, standard_name='air_temperature', var_name='tas', units='K'
+    )
+    return cube
+
+
+def assert_metadata(cube, normalization=None):
+    """Assert correct metadata."""
+    assert cube.standard_name is None
+    if normalization == 'sum':
+        assert cube.long_name == 'Relative Frequency'
+        assert cube.var_name == 'relative_frequency_tas'
+        assert cube.units == '1'
+    elif normalization == 'integral':
+        assert cube.long_name == 'Density'
+        assert cube.var_name == 'density_tas'
+        assert cube.units == 'K-1'
+    else:
+        assert cube.long_name == 'Frequency'
+        assert cube.var_name == 'frequency_tas'
+        assert cube.units == '1'
+    assert cube.attributes == {}
+    assert cube.cell_methods == ()
+    assert cube.coords('air_temperature')
+    bin_coord = cube.coord('air_temperature')
+    assert bin_coord.standard_name == 'air_temperature'
+    assert bin_coord.var_name == 'tas'
+    assert bin_coord.long_name is None
+    assert bin_coord.units == 'K'
+    assert bin_coord.attributes == {}
+
+
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_defaults(cube, lazy):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(input_cube)
+
+    assert input_cube == cube
+    assert_metadata(result)
+    assert result.shape == (10,)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    np.testing.assert_allclose(
+        result.data, [1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0]
+    )
+    np.testing.assert_allclose(result.data.mask, [False] * 10)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(
+        bin_coord.points,
+        [0.35, 1.05, 1.75, 2.45, 3.15, 3.85, 4.55, 5.25, 5.95, 6.65],
+    )
+    np.testing.assert_allclose(
+        bin_coord.bounds,
+        [
+            [0.0, 0.7],
+            [0.7, 1.4],
+            [1.4, 2.1],
+            [2.1, 2.8],
+            [2.8, 3.5],
+            [3.5, 4.2],
+            [4.2, 4.9],
+            [4.9, 5.6],
+            [5.6, 6.3],
+            [6.3, 7.0],
+        ],
+    )
+
+
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('weights', [False, None])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_over_time(cube, lazy, weights, normalization):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time'],
+        bins=[4.5, 6.5, 8.5, 10.5],
+        bin_range=(4.5, 10.5),
+        weights=weights,
+        normalization=normalization,
+    )
+
+    assert input_cube == cube
+    assert_metadata(result, normalization=normalization)
+    assert result.coord('latitude') == input_cube.coord('latitude')
+    assert result.coord('longitude') == input_cube.coord('longitude')
+    assert result.shape == (2, 2, 3)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    if normalization == 'integral':
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [0.5, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [0.25, 0.25, 0.0]],
+        ])
+    elif normalization == 'sum':
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [1.0, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [0.5, 0.5, 0.0]],
+        ])
+    else:
+        expected_data = np.ma.masked_invalid([
+            [[np.nan, np.nan, np.nan], [1.0, 0.0, 0.0]],
+            [[np.nan, np.nan, np.nan], [1.0, 1.0, 0.0]],
+        ])
+    np.testing.assert_allclose(result.data, expected_data)
+    np.testing.assert_allclose(result.data.mask, expected_data.mask)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(bin_coord.points, [5.5, 7.5, 9.5])
+    np.testing.assert_allclose(
+        bin_coord.bounds, [[4.5, 6.5], [6.5, 8.5], [8.5, 10.5]],
+    )
+
+
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_fully_masked(cube, lazy, normalization):
+    """Test `histogram`."""
+    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
+    if lazy:
+        cube.data = cube.lazy_data()
+
+    result = histogram(cube, bin_range=(0, 10), normalization=normalization)
+
+    assert_metadata(result, normalization=normalization)
+    assert result.shape == (10,)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    np.testing.assert_allclose(result.data, np.ma.masked_all(10,))
+    np.testing.assert_equal(result.data.mask, [True] * 10)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(
+        bin_coord.points,
+        [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5],
+    )
+    np.testing.assert_allclose(
+        bin_coord.bounds,
+        [
+            [0.0, 1.0],
+            [1.0, 2.0],
+            [2.0, 3.0],
+            [3.0, 4.0],
+            [4.0, 5.0],
+            [5.0, 6.0],
+            [6.0, 7.0],
+            [7.0, 8.0],
+            [8.0, 9.0],
+            [9.0, 10.0],
+        ],
+    )
+
+
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize(
+    'weights',
+    [
+        True,
+        np.array([[[6, 6], [6, 6]], [[2, 2], [2, 2]]]),
+        da.array([[[6, 6], [6, 6]], [[2, 2], [2, 2]]]),
     ]
-    grouped_by_string = _group_products(products, 'project')
-    grouped_by_list = _group_products(products, ['project'])
+)
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_weights(cube, lazy, weights, normalization):
+    """Test `histogram`."""
+    if lazy:
+        cube.data = cube.lazy_data()
+    input_cube = cube.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time', 'longitude'],
+        bins=[0.0, 2.0, 4.0, 8.0],
+        weights=weights,
+        normalization=normalization,
+    )
+
+    assert input_cube == cube
+    assert_metadata(result, normalization=normalization)
+    assert result.coord('latitude') == input_cube.coord('latitude')
+    assert result.shape == (2, 3)
+    if lazy:
+        assert result.has_lazy_data()
+    else:
+        assert not result.has_lazy_data()
+    assert result.dtype == np.float32
+    if normalization == 'integral':
+        expected_data = np.ma.masked_invalid(
+            [[0.25, 0.0, 0.125], [0.0, 0.0, 0.25]]
+        )
+    elif normalization == 'sum':
+        expected_data = np.ma.masked_invalid(
+            [[0.5, 0.0, 0.5], [0.0, 0.0, 1.0]]
+        )
+    else:
+        expected_data = np.ma.masked_invalid(
+            [[8.0, 0.0, 8.0], [0.0, 0.0, 8.0]]
+        )
+    np.testing.assert_allclose(result.data, expected_data)
+    np.testing.assert_allclose(result.data.mask, expected_data.mask)
+    bin_coord = result.coord('air_temperature')
+    bin_coord.shape == (10,)
+    bin_coord.dtype == np.float64
+    bin_coord.bounds_dtype == np.float64
+    np.testing.assert_allclose(bin_coord.points, [1.0, 3.0, 6.0])
+    np.testing.assert_allclose(
+        bin_coord.bounds, [[0.0, 2.0], [2.0, 4.0], [4.0, 8.0]],
+    )
+
 
-    assert grouped_by_list == grouped_by_string
+@pytest.fixture
+def cube_with_rich_metadata():
+    """Cube with rich metadata."""
+    time = DimCoord([0], bounds=[[-1, 1]], var_name='time', units='s')
+    sigma = DimCoord([0], var_name='sigma', units='1')
+    lat = DimCoord([0], var_name='lat', units='degrees')
+    lon = DimCoord([0], var_name='lon', units='degrees')
+    ptop = AuxCoord(0, var_name='ptop', units='Pa')
+    psur = AuxCoord([[0]], var_name='ps', units='Pa')
+    sigma_factory = AtmosphereSigmaFactory(ptop, sigma, psur)
+    cell_area = CellMeasure([[1]], var_name='area', units='m2', measure='area')
+    anc = AncillaryVariable([0], var_name='anc')
+    cube = Cube(
+        np.ones((1, 1, 1, 1), dtype=np.float32),
+        standard_name=None,
+        long_name='Air Temperature',
+        var_name=None,
+        units='K',
+        attributes={'test': '1'},
+        cell_methods=(CellMethod('point', 'sigma'),),
+        dim_coords_and_dims=[(time, 0), (sigma, 1), (lat, 2), (lon, 3)],
+        aux_coords_and_dims=[(ptop, ()), (psur, (2, 3))],
+        aux_factories=[sigma_factory],
+        ancillary_variables_and_dims=[(anc, 1)],
+        cell_measures_and_dims=[(cell_area, (2, 3))],
+    )
+    return cube
 
 
-def test_get_array_module_da():
+@pytest.mark.parametrize('normalization', [None, 'sum', 'integral'])
+@pytest.mark.parametrize('weights', [True, False, None])
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_metadata(
+    cube_with_rich_metadata, lazy, weights, normalization
+):
+    """Test `histogram`."""
+    if lazy:
+        cube_with_rich_metadata.data = cube_with_rich_metadata.lazy_data()
+    input_cube = cube_with_rich_metadata.copy()
+
+    result = histogram(
+        input_cube,
+        coords=['time'],
+        bins=[0.0, 1.0, 2.0],
+        bin_range=(0.0, 2.0),
+        weights=weights,
+        normalization=normalization,
+    )
+
+    assert input_cube == cube_with_rich_metadata
+    assert result.shape == (1, 1, 1, 2)
+
+    assert result.standard_name is None
+    if normalization == 'sum':
+        assert result.long_name == 'Relative Frequency of Air Temperature'
+        assert result.var_name == 'relative_frequency'
+        assert result.units == '1'
+    elif normalization == 'integral':
+        assert result.long_name == 'Density of Air Temperature'
+        assert result.var_name == 'density'
+        assert result.units == 'K-1'
+    else:
+        assert result.long_name == 'Frequency of Air Temperature'
+        assert result.var_name == 'frequency'
+        assert result.units == '1'
+    assert result.attributes == {'test': '1'}
+    assert result.cell_methods == (CellMethod('point', 'sigma'),)
+
+    assert not result.coords('time', dim_coords=True)
+    for dim_coord in ('sigma', 'lat', 'lon'):
+        assert (
+            result.coord(dim_coord, dim_coords=True) ==
+            input_cube.coord(dim_coord, dim_coords=True)
+        )
+        assert (
+            result.coord_dims(dim_coord) ==
+            (input_cube.coord_dims(dim_coord)[0] - 1,)
+        )
+    assert result.coords('Air Temperature', dim_coords=True)
+    bin_coord = result.coord('Air Temperature')
+    assert result.coord_dims(bin_coord) == (3,)
+    assert bin_coord.standard_name is None
+    assert bin_coord.long_name == 'Air Temperature'
+    assert bin_coord.var_name is None
+    assert bin_coord.units == 'K'
+    assert bin_coord.attributes == {}
+
+    assert result.coords('time', dim_coords=False)
+    assert result.coord_dims('time') == ()
+    assert result.coord('ptop') == input_cube.coord('ptop')
+    assert result.coord('ps') == input_cube.coord('ps')
+    assert len(result.aux_factories) == 1
+    assert isinstance(result.aux_factories[0], AtmosphereSigmaFactory)
+    assert result.ancillary_variables() == input_cube.ancillary_variables()
+    assert result.cell_measures() == input_cube.cell_measures()
 
-    npx = get_array_module(da.array([1, 2]))
-    assert npx is da
 
+@pytest.mark.parametrize('lazy', [False, True])
+def test_histogram_fully_masked_no_bin_range(cube, lazy):
+    """Test `histogram`."""
+    cube.data = np.ma.masked_all((2, 2, 2), dtype=np.float32)
+    if lazy:
+        cube.data = cube.lazy_data()
 
-def test_get_array_module_np():
+    msg = (
+        r"Cannot calculate histogram for bin_range=\(masked, masked\) \(or "
+        r"for fully masked data when `bin_range` is not given\)"
+    )
+    with pytest.raises(ValueError, match=msg):
+        histogram(cube)
 
-    npx = get_array_module(np.array([1, 2]))
-    assert npx is np
 
+def test_histogram_invalid_bins(cube):
+    """Test `histogram`."""
+    msg = (
+        r"bins cannot be a str \(got 'auto'\), must be int or Sequence of int"
+    )
+    with pytest.raises(TypeError, match=msg):
+        histogram(cube, bins='auto')
 
-def test_get_array_module_mixed():
 
-    npx = get_array_module(da.array([1]), np.array([1]))
-    assert npx is da
+def test_histogram_invalid_normalization(cube):
+    """Test `histogram`."""
+    msg = (
+        r"Expected one of \(None, 'sum', 'integral'\) for normalization, got "
+        r"'invalid'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        histogram(cube, normalization='invalid')
 
 
 if __name__ == '__main__':
diff --git a/tests/unit/preprocessor/test_shared.py b/tests/unit/preprocessor/test_shared.py
index f931f449a9..4f70b2b419 100644
--- a/tests/unit/preprocessor/test_shared.py
+++ b/tests/unit/preprocessor/test_shared.py
@@ -6,13 +6,20 @@
 import iris.analysis
 import numpy as np
 import pytest
+from cf_units import Unit
+from iris.coords import AuxCoord
 from iris.cube import Cube
 
 from esmvalcore.exceptions import ESMValCoreDeprecationWarning
+from esmvalcore.preprocessor import PreprocessorFile
 from esmvalcore.preprocessor._shared import (
+    _get_area_weights,
+    _group_products,
     aggregator_accept_weights,
+    get_array_module,
     get_iris_aggregator,
     preserve_float_dtype,
+    try_adding_calculated_cell_area,
 )
 
 
@@ -236,3 +243,107 @@ def test_preserve_float_dtype(data, dtype):
     assert _dummy_func.__name__ == '_dummy_func'
     signature = inspect.signature(_dummy_func)
     assert list(signature.parameters) == ['obj', 'arg', 'kwarg']
+
+
+def test_get_array_module_da():
+    npx = get_array_module(da.array([1, 2]))
+    assert npx is da
+
+
+def test_get_array_module_np():
+    npx = get_array_module(np.array([1, 2]))
+    assert npx is np
+
+
+def test_get_array_module_mixed():
+    npx = get_array_module(da.array([1]), np.array([1]))
+    assert npx is da
+
+
+def _create_sample_full_cube():
+    cube = Cube(np.zeros((4, 180, 360)), var_name='co2', units='J')
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.array([10., 40., 70., 110.]),
+            standard_name='time',
+            units=Unit('days since 1950-01-01 00:00:00', calendar='gregorian'),
+        ),
+        0,
+    )
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.arange(-90., 90., 1.),
+            standard_name='latitude',
+            units='degrees',
+        ),
+        1,
+    )
+    cube.add_dim_coord(
+        iris.coords.DimCoord(
+            np.arange(0., 360., 1.),
+            standard_name='longitude',
+            units='degrees',
+        ),
+        2,
+    )
+
+    cube.coord("time").guess_bounds()
+    cube.coord("longitude").guess_bounds()
+    cube.coord("latitude").guess_bounds()
+
+    return cube
+
+
+@pytest.mark.parametrize('lazy', [True, False])
+def test_get_area_weights(lazy):
+    """Test _get_area_weights."""
+    cube = _create_sample_full_cube()
+    if lazy:
+        cube.data = cube.lazy_data()
+    weights = _get_area_weights(cube)
+    if lazy:
+        assert isinstance(weights, da.Array)
+        assert weights.chunks == cube.lazy_data().chunks
+    else:
+        assert isinstance(weights, np.ndarray)
+    np.testing.assert_allclose(
+        weights, iris.analysis.cartography.area_weights(cube)
+    )
+
+
+def test_group_products_string_list():
+    products = [
+        PreprocessorFile(
+            filename='A_B.nc',
+            attributes={
+                'project': 'A',
+                'dataset': 'B',
+            },
+        ),
+        PreprocessorFile(
+            filename='A_C.nc',
+            attributes={
+                'project': 'A',
+                'dataset': 'C',
+            }
+        ),
+    ]
+    grouped_by_string = _group_products(products, 'project')
+    grouped_by_list = _group_products(products, ['project'])
+
+    assert grouped_by_list == grouped_by_string
+
+
+def test_try_adding_calculated_cell_area():
+    """Test ``try_adding_calculated_cell_area``."""
+    cube = _create_sample_full_cube()
+    cube.coord('latitude').rename('grid_latitude')
+    cube.coord('longitude').rename('grid_longitude')
+    lat = AuxCoord(np.zeros((180, 360)), standard_name='latitude')
+    lon = AuxCoord(np.zeros((180, 360)), standard_name='longitude')
+    cube.add_aux_coord(lat, (1, 2))
+    cube.add_aux_coord(lon, (1, 2))
+
+    try_adding_calculated_cell_area(cube)
+
+    assert cube.cell_measures('cell_area')
diff --git a/tests/unit/test_iris_helpers.py b/tests/unit/test_iris_helpers.py
index a9abafc15b..e7b18e4c67 100644
--- a/tests/unit/test_iris_helpers.py
+++ b/tests/unit/test_iris_helpers.py
@@ -303,7 +303,8 @@ def test_rechunk_cube_fully_lazy(cube_3d):
     assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2)
 
 
-def test_rechunk_cube_partly_lazy(cube_3d):
+@pytest.mark.parametrize('complete_dims', [['x', 'y'], ['xy']])
+def test_rechunk_cube_partly_lazy(cube_3d, complete_dims):
     """Test ``rechunk_cube``."""
     input_cube = cube_3d.copy()
 
@@ -313,7 +314,7 @@ def test_rechunk_cube_partly_lazy(cube_3d):
     input_cube.coord('xyz').bounds
     input_cube.cell_measure('cell_measure').data
 
-    result = rechunk_cube(input_cube, ['x', 'y'], remaining_dims=2)
+    result = rechunk_cube(input_cube, complete_dims, remaining_dims=2)
 
     assert input_cube == cube_3d
     assert result == cube_3d
@@ -334,15 +335,6 @@ def test_rechunk_cube_partly_lazy(cube_3d):
     assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2)
 
 
-def test_rechunk_cube_invalid_coord_fail(cube_3d):
-    """Test ``rechunk_cube``."""
-    msg = (
-        "Complete coordinates must be 1D coordinates, got 2D coordinate 'xy'"
-    )
-    with pytest.raises(CoordinateMultiDimError, match=msg):
-        rechunk_cube(cube_3d, ['xy'])
-
-
 @pytest.fixture
 def lat_coord_1d():
     """1D latitude coordinate."""