tidymodels · EmilHvitfeldt · Oct 27, 2023 · May 15, 2023 · May 15, 2023 · May 15, 2023
diff --git a/NAMESPACE b/NAMESPACE
@@ -130,12 +130,16 @@ export(concordance_survival_vec)
 export(conf_mat)
 export(curve_metric_summarizer)
 export(curve_survival_metric_summarizer)
+export(demographic_parity)
 export(detection_prevalence)
 export(detection_prevalence_vec)
 export(dots_to_estimate)
 export(dynamic_survival_metric_summarizer)
+export(equal_opportunity)
+export(equalized_odds)
 export(f_meas)
 export(f_meas_vec)
+export(fairness_metric)
 export(finalize_estimator)
 export(finalize_estimator_internal)
 export(gain_capture)

diff --git a/NEWS.md b/NEWS.md
@@ -14,6 +14,8 @@ calculated with `roc_auc_survival()`.
 
 * `metric_set()` can now be used with a combination of dynamic and static survival metrics.
 
+* `demographic_parity()`, `equalized_odds()`, and `equal_opportunity()` are new metrics for measuring model fairness. Each is implemented with the `fairness_metric()` constructor, a general interface for defining group-aware metrics that allows for quickly and flexibly defining fairness metrics with the problem context in mind.
+
 # yardstick 1.2.0
 
 ## New Metrics

diff --git a/R/metrics.R → R/aaa-metrics.R b/R/metrics.R → R/aaa-metrics.R
diff --git a/R/aaa.R b/R/aaa.R
@@ -6,6 +6,7 @@ utils::globalVariables(
   c(
     # for class prob metrics
     "estimate",
+    ".estimator",
     "threshold",
     "specificity",
     ".level",

diff --git a/R/fair-aaa.R b/R/fair-aaa.R
@@ -0,0 +1,181 @@
+#' Create fairness metrics
+#'
+#' Fairness metrics quantify the disparity in value of a metric across a number
+#' of groups. Fairness metrics with a value of zero indicate that the
+#' underlying metric has parity across groups. yardstick defines
+#' several common fairness metrics using this function, such as
+#' [demographic_parity()], [equal_opportunity()], and [equalized_odds()].
+#'
+#' @param .fn A yardstick metric function or metric set.
+#' @param .name The name of the metric to place in the `.metric` column
+#' of the output.
+#' @param .post A function to post-process the generated metric set results `x`.
+#' In many cases, `~diff(range(x$.estimate))` or
+#' `~r <- range(x$.estimate); r[1]/r[2]`.
+#'
+#' @section Relevant Group Level:
+#' Additional arguments can be passed to the function outputted by
+#' the function that this function outputs. That is:
+#'
+#' ```
+#' res_fairness <- fairness_metric(...)
+#' res_by <- res_fairness(by)
+#' res_by(..., additional_arguments_to_.post = TRUE)
+#' ```
+#'
+#' For finer control of how groups in `by` are treated, use the
+#' `.post` argument.
+#'
+#' @return
+#' This function is a
+#' [function factory](https://adv-r.hadley.nz/function-factories.html); it's
+#' output is itself a function. Further, the functions that this function
+#' outputs are also function factories. More explicitly, this looks like:
+#'
+#' ```
+#' # a function with similar implementation to `demographic_parity()`:
+#' diff_range <- function(x, ...) {diff(range(x$.estimate))}
+#'
+#' dem_parity <-
+#'   fairness_metric(
+#'     .fn = detection_prevalence,
+#'     .name = "dem_parity",
+#'     .post = diff_range
+#'   )
+#' ```
+#'
+#' The outputted `dem_parity` is a function that takes one argument, `by`,
+#' indicating the data-masked variable giving the sensitive feature.
+#'
+#' When called with a `by` argument, `dem_parity` will return a yardstick
+#' metric function like any other:
+#'
+#' ```
+#' dem_parity_by_gender <- dem_parity(gender)
+#' ```
+#'
+#' Note that `dem_parity` doesn't take any arguments other than `by`, and thus
+#' knows nothing about the data it will be applied to other than that it ought
+#' to have a column with name `"gender"` in it.
+#'
+#' The output `dem_parity_by_gender` is a metric function that takes the
+#' same arguments as the function supplied as `fn`, in this case
+#' `detection_prevalence`. It will thus interface like any other yardstick
+#' function except that it will look for a `"gender"` column in
+#' the data it's supplied.
+#'
+#' In addition to the examples below, see the documentation on the
+#' return value of fairness metrics like [demographic_parity()],
+#' [equal_opportunity()], or [equalized_odds()] to learn more about how the
+#' output of this function can be used.
+#'
+#' @examples
+#' data(hpc_cv)
+#'
+#' # `demographic_parity`, among other fairness metrics,
+#' # is generated with `fairness_metric()`:
+#' diff_range <- function(x, ...) {diff(range(x$.estimate))}
+#' demographic_parity_ <-
+#'   fairness_metric(
+#'     .fn = detection_prevalence,
+#'     .name = "demographic_parity",
+#'     .post = diff_range
+#'   )
+#'
+#' m_set <- metric_set(demographic_parity_(Resample))
+#'
+#' m_set(hpc_cv, truth = obs, estimate = pred)
+#'
+#' # the `post` argument can be used to accommodate a wide
+#' # variety of parameterizations. to encode demographic
+#' # parity as a ratio inside of a difference, for example:
+#' ratio_range <- function(x, ...) {
+#'   range <- range(x$.estimate)
+#'   range[1] / range[2]
+#' }
+#'
+#' demographic_parity_ratio <-
+#'   fairness_metric(
+#'     .fn = detection_prevalence,
+#'     .name = "demographic_parity_ratio",
+#'     .post = ratio_range
+#'   )
+#'
+#' @export
+fairness_metric <- function(.fn, .name, .post) {
+  if (is_missing(.fn) || !inherits_any(.fn, c("metric", "metric_set"))) {
+    abort("`.fn` must be a metric function or metric set.")
+  }
+  if (is_missing(.name) || !is_string(.name)) {
+    abort("`.name` must be a string.")
+  }
+  if (is_missing(.post) || !is_function(.post)) {
+    abort("`.post` must be a function.")
+  }
+
+  function(by) {
+    by_str <- as_string(enexpr(by))
+    res <-
+      function(data, ...) {
+        gp_vars <- dplyr::group_vars(data)
+
+        res <- .fn(dplyr::group_by(data, {{by}}, .add = TRUE), ...)
+
+        # restore to the grouping structure in the supplied data
+        if (length(gp_vars) > 0) {
+          res <- dplyr::group_by(res, !!!dplyr::groups(data), .add = FALSE)
+        }
+
+        group_rows <- dplyr::group_rows(res)
+        group_keys <- dplyr::group_keys(res)
+        res <- dplyr::ungroup(res)
+        groups <- vec_chop(res, indices = group_rows)
+        out <- vector("list", length = length(groups))
+
+        for (i in seq_along(groups)) {
+          group <- groups[[i]]
+
+          .estimate <- .post(group, ...)
+
+          if (!is_bare_numeric(.estimate)) {
+            abort(
+              "`.post` must return a single numeric value.",
+              call = call2("fairness_metric")
+            )
+          }
+
+          elt_out <- list(
+            .metric = .name,
+            .by = by_str,
+            .estimator = group$.estimator[1],
+            .estimate = .estimate
+          )
+
+          out[[i]] <- tibble::new_tibble(elt_out)
+        }
+
+        group_keys <- vctrs::vec_rep_each(group_keys, times = list_sizes(out))
+        out <- vec_rbind(!!!out)
+        out <- vec_cbind(group_keys, out)
+
+        out
+      }
+    res <- new_class_metric(res, direction = "minimize")
+    attr(res, "by") <- by_str
+    res
+  }
+}
+
+diff_range <- function(x, ...) {
+  estimates <- x$.estimate
+
+  max(estimates) - min(estimates)
+}
+
+max_positive_rate_diff <- function(x, ...) {
+  metric_values <- vec_split(x, x$.metric)
+
+  positive_rate_diff <- vapply(metric_values$val, diff_range, numeric(1), ...)
+
+  max(positive_rate_diff)
+}
diff --git a/R/fair-demographic_parity.R b/R/fair-demographic_parity.R
@@ -0,0 +1,47 @@
+#' Demographic parity
+#'
+#' @description
+#' Demographic parity is satisfied when a model's predictions have the
+#' same predicted positive rate across groups. A value of 0 indicates parity
+#' across groups. Note that this definition does not depend on the true
+#' outcome; the `truth` argument is included in outputted metrics
+#' for consistency.
+#'
+#' Demographic parity is sometimes referred to as group fairness,
+#' disparate impact, or statistical parity.
+#'
+#' See the "Measuring Disparity" section for details on implementation.
+#'
+#' @param by The column identifier for the sensitive feature. This should be an
+#' unquoted column name referring to a column in the un-preprocessed data.
+#'
+#' @templateVar fn demographic_parity
+#' @templateVar internal_.fn detection_prevalence
+#' @templateVar internal_fn [detection_prevalence()]
+#' @template return-fair
+#' @template event-fair
+#' @template examples-fair
+#'
+#' @family fairness metrics
+#'
+#' @references
+#'
+#' Agarwal, A., Beygelzimer, A., Dudik, M., Langford, J., & Wallach, H. (2018).
+#' "A Reductions Approach to Fair Classification." Proceedings of the 35th
+#' International Conference on Machine Learning, in Proceedings of Machine
+#' Learning Research. 80:60-69.
+#'
+#' Verma, S., & Rubin, J. (2018). "Fairness definitions explained". In
+#' Proceedings of the international workshop on software fairness (pp. 1-7).
+#'
+#' Bird, S., Dudík, M., Edgar, R., Horn, B., Lutz, R., Milan, V., ... & Walker,
+#' K. (2020). "Fairlearn: A toolkit for assessing and improving fairness in AI".
+#' Microsoft, Tech. Rep. MSR-TR-2020-32.
+#'
+#' @export
+demographic_parity <-
+  fairness_metric(
+    .fn = detection_prevalence,
+    .name = "demographic_parity",
+    .post = diff_range
+  )
diff --git a/R/fair-equal_opportunity.R b/R/fair-equal_opportunity.R
@@ -0,0 +1,43 @@
+#' Equal opportunity
+#'
+#' @description
+#'
+#' Equal opportunity is satisfied when a model's predictions have the same
+#' true positive and false negative rates across protected groups. A value of
+#' 0 indicates parity across groups.
+#'
+#' Equal opportunity is sometimes referred to as conditional procedure accuracy
+#' equality or disparate mistreatment.
+#'
+#' See the "Measuring Disparity" section for details on implementation.
+#'
+#' @inheritParams demographic_parity
+#'
+#' @templateVar fn equal_opportunity
+#' @templateVar internal_.fn sens
+#' @templateVar internal_fn [sens()]
+#' @template return-fair
+#' @template event-fair
+#' @template examples-fair
+#'
+#' @family fairness metrics
+#'
+#' @references
+#'
+#' Hardt, M., Price, E., & Srebro, N. (2016). "Equality of opportunity in
+#' supervised learning". Advances in neural information processing systems, 29.
+#'
+#' Verma, S., & Rubin, J. (2018). "Fairness definitions explained". In
+#' Proceedings of the international workshop on software fairness (pp. 1-7).
+#'
+#' Bird, S., Dudík, M., Edgar, R., Horn, B., Lutz, R., Milan, V., ... & Walker,
+#' K. (2020). "Fairlearn: A toolkit for assessing and improving fairness in AI".
+#' Microsoft, Tech. Rep. MSR-TR-2020-32.
+#'
+#' @export
+equal_opportunity <-
+  fairness_metric(
+    .fn = sens,
+    .name = "equal_opportunity",
+    .post = diff_range
+  )
diff --git a/R/fair-equalized_odds.R b/R/fair-equalized_odds.R
@@ -0,0 +1,70 @@
+#' Equalized odds
+#'
+#' @description
+#'
+#' Equalized odds is satisfied when a model's predictions have the same false
+#' positive, true positive, false negative, and true negative rates across
+#' protected groups. A value of 0 indicates parity across groups.
+#'
+#' Equalized odds is sometimes referred to as conditional procedure accuracy
+#' equality or disparate mistreatment.
+#'
+#' See the "Measuring disparity" section for details on implementation.
+#'
+#' @inheritParams demographic_parity
+#'
+#' @templateVar fn equalized_odds
+#' @templateVar internal_fn [sens()] and [spec()]
+#' @template return-fair
+#' @template examples-fair
+#'
+#' @section Measuring Disparity:
+#' By default, this function takes the maximum difference in range of [sens()]
+#' and [spec()] `.estimate`s across groups. That is, the maximum pair-wise
+#' disparity in [sens()] or [spec()] between groups is the return value of
+#' `equalized_odds()`'s `.estimate`.
+#'
+#' For finer control of group treatment, construct a context-aware fairness
+#' metric with the [fairness_metric()] function by passing a custom `.post`
+#' function:
+#'
+#' ```
+#' # see yardstick:::max_positive_rate_diff for the actual `.post()`
+#' diff_range <- function(x, ...) {diff(range(x$.estimate))}
+#'
+#' equalized_odds_2 <-
+#'   fairness_metric(
+#'     .fn = metric_set(sens, spec),
+#'     .name = "equalized_odds_2",
+#'     .post = diff_range
+#'   )
+#' ```
+#'
+#' In `.post()`, `x` is the [metric_set()] output with [sens()] and [spec()]
+#' values for each group, and `...` gives additional arguments (such as a grouping
+#' level to refer to as the "baseline") to pass to the function outputted
+#' by `equalized_odds_2()` for context.
+#'
+#' @family fairness metrics
+#'
+#' @references
+#'
+#' Agarwal, A., Beygelzimer, A., Dudik, M., Langford, J., & Wallach, H. (2018).
+#' "A Reductions Approach to Fair Classification." Proceedings of the 35th
+#' International Conference on Machine Learning, in Proceedings of Machine
+#' Learning Research. 80:60-69.
+#'
+#' Verma, S., & Rubin, J. (2018). "Fairness definitions explained". In
+#' Proceedings of the international workshop on software fairness (pp. 1-7).
+#'
+#' Bird, S., Dudík, M., Edgar, R., Horn, B., Lutz, R., Milan, V., ... & Walker,
+#' K. (2020). "Fairlearn: A toolkit for assessing and improving fairness in AI".
+#' Microsoft, Tech. Rep. MSR-TR-2020-32.
+#'
+#' @export
+equalized_odds <-
+  fairness_metric(
+    .fn = metric_set(sens, spec),
+    .name = "equalized_odds",
+    .post = max_positive_rate_diff
+  )
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -68,6 +68,13 @@ reference:
     - iic
     - poisson_log_loss
 
+  - title: Fairness Metrics
+    contents:
+    - fairness_metric
+    - demographic_parity
+    - equalized_odds
+    - equal_opportunity
+
   - title: Curve Functions
     contents:
     - roc_curve