diff --git a/DESCRIPTION b/DESCRIPTION index 753e7dd49a..2ca50dc4cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -247,6 +247,7 @@ Collate: 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' + 'stat-connect.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' diff --git a/NAMESPACE b/NAMESPACE index 6dd4dd92bd..ed35dffe93 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -257,6 +257,7 @@ export(StatBin2d) export(StatBindot) export(StatBinhex) export(StatBoxplot) +export(StatConnect) export(StatContour) export(StatContourFilled) export(StatCount) @@ -684,6 +685,7 @@ export(stat_bin_2d) export(stat_bin_hex) export(stat_binhex) export(stat_boxplot) +export(stat_connect) export(stat_contour) export(stat_contour_filled) export(stat_count) diff --git a/NEWS.md b/NEWS.md index 2f490a4d33..194f6c7d3f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* New `stat_connect()` to connect points via steps or other shapes + (@teunbrand, #6228) * Fixed regression with incorrectly drawn gridlines when using `coord_flip()` (@teunbrand, #6293). * Deprecated functions and arguments prior to ggplot2 3.0.0 throw errors instead diff --git a/R/stat-connect.R b/R/stat-connect.R new file mode 100644 index 0000000000..48a193fdf1 --- /dev/null +++ b/R/stat-connect.R @@ -0,0 +1,162 @@ +#' Connect observations +#' +#' Connect successive points with lines of different shapes. +#' +#' @inheritParams layer +#' @inheritParams geom_point +#' @param connection A specification of how two points are connected. Can be one +#' of the folloing: +#' * A string giving a named connection. These options are: +#' * `"hv"` to first jump horizontally, then vertically. +#' * `"vh"` to first jump vertically, then horizontally. +#' * `"mid"` to step half-way between adjacent x-values. +#' * `"linear"` to use a straight segment. +#' * A numeric matrix with two columns giving x and y coordinates respectively. +#' The coordinates should describe points on a path that connect point A +#' at location (0, 0) and point B at location (1, 1). At least one of these +#' two points is expected to be included in the coordinates. +#' +#' @eval rd_aesthetics("stat", "connect") +#' @export +#' +#' @examples +#' ggplot(head(economics, 20), aes(date, unemploy)) + +#' stat_connect(connection = "hv") +#' +#' # Setup custom connections +#' x <- seq(0, 1, length.out = 20)[-1] +#' smooth <- cbind(x, scales::rescale(1 / (1 + exp(-(x * 10 - 5))))) +#' zigzag <- cbind(c(0.4, 0.6, 1), c(0.75, 0.25, 1)) +#' +#' ggplot(head(economics, 10), aes(date, unemploy)) + +#' geom_point() + +#' stat_connect(aes(colour = "zigzag"), connection = zigzag) + +#' stat_connect(aes(colour = "smooth"), connection = smooth) +stat_connect <- function( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + connection = "hv", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE) { + layer( + data = data, + mapping = mapping, + stat = StatConnect, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + connection = connection, + ... + ) + ) +} + +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export +StatConnect <- ggproto( + "StatConnect", Stat, + + required_aes = c("x|xmin|xmax", "y|ymin|ymax"), + + setup_params = function(data, params) { + params$flipped_aes <- has_flipped_aes( + data, params, + range_is_orthogonal = TRUE, ambiguous = TRUE + ) + + connection <- params$connection %||% "hv" + + if (is.character(connection)) { + check_string(connection) + connection <- switch( + arg_match0(connection, c("hv", "vh", "mid", "linear")), + hv = matrix(c(1, 1, 0, 1), 2, 2), + vh = matrix(c(0, 0, 0, 1), 2, 2), + mid = matrix(c(0.5, 0.5, 0, 1), 2, 2), + linear = matrix(c(0, 1, 0, 1), 2, 2) + ) + } + + if (!is.matrix(connection) || + !typeof(connection) %in% c("integer", "double") || + !identical(dim(connection)[2], 2L)) { + extra <- "" + if (!is.null(dim(connection)[2])) { + extra <- paste0(" with ", dim(connection)[2], " column(s)") + } + cli::cli_abort( + "{.arg connection} must be a numeric {.cls matrix} with 2 columns, \\ + not {.obj_type_friendly {connection}}{extra}." + ) + } + + if (any(!is.finite(connection))) { + cli::cli_abort( + "{.arg connection} cannot contain missing or other non-finite values." + ) + } + + if (nrow(connection) < 1) { + connection <- NULL + } + + params$connection <- connection + params + }, + + compute_group = function(data, scales, connection = "hv", flipped_aes = FALSE) { + + data <- flip_data(data, flipped_aes) + + n <- nrow(data) + if (n <= 1) { + return(vec_slice(data, 0)) + } + + if (!is.matrix(connection)) { + return(data) + } + m <- nrow(connection) + + before <- rep(seq_len(n - 1), each = m) + after <- rep(seq_len(n)[-1], each = m) + + data <- vec_slice(data, order(data$x %||% data$xmin)) + + # Interpolate x + # Note that `length(x) != length(xjust)`, but these are kept in sync due to + # the matrix recycling rules (effectively `rep(xjust, ncol(x))`) + x <- as.matrix(data[intersect(names(data), ggplot_global$x_aes)]) + xjust <- rep(connection[, 1], n - 1L) + x <- vec_slice(x, before) * (1 - xjust) + vec_slice(x, after) * xjust + + # Interpolate y + y <- as.matrix(data[intersect(names(data), ggplot_global$y_aes)]) + yjust <- rep(connection[, 2], n - 1L) + y <- vec_slice(y, before) * (1 - yjust) + vec_slice(y, after) * yjust + + # Reconstitute data + new_data <- vec_slice(data, before) + new_data[colnames(x)] <- split_matrix(x) + new_data[colnames(y)] <- split_matrix(y) + + # Esnure data starts and ends are intact + if (!all(connection[1, ] == c(0, 0))) { + new_data <- vec_c(vec_slice(data, 1), new_data) + } + if (!all(connection[m, ] == c(1, 1))) { + new_data <- vec_c(new_data, vec_slice(data, n)) + } + flip_data(new_data, flipped_aes) + } + +) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0259312234..5b0505afd8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,6 +69,7 @@ reference: - stat_unique - stat_sf_coordinates - stat_manual + - stat_connect - after_stat - subtitle: Position adjustment diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index 6658fdafb9..d4dfa8ab1f 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -21,11 +21,11 @@ % R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, % R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, % R/stat-bin.R, R/stat-summary-2d.R, R/stat-bin2d.R, R/stat-bindot.R, -% R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, -% R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, -% R/stat-function.R, R/stat-identity.R, R/stat-manual.R, R/stat-qq-line.R, -% R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-binhex.R, R/stat-boxplot.R, R/stat-connect.R, R/stat-contour.R, +% R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, +% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-manual.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, +% R/stat-sum.R, R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, % R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} @@ -131,6 +131,7 @@ \alias{StatBindot} \alias{StatBinhex} \alias{StatBoxplot} +\alias{StatConnect} \alias{StatContour} \alias{StatContourFilled} \alias{StatCount} diff --git a/man/stat_connect.Rd b/man/stat_connect.Rd new file mode 100644 index 0000000000..d62ba610a5 --- /dev/null +++ b/man/stat_connect.Rd @@ -0,0 +1,153 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-connect.R +\name{stat_connect} +\alias{stat_connect} +\title{Connect observations} +\usage{ +stat_connect( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + connection = "hv", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{connection}{A specification of how two points are connected. Can be one +of the folloing: +\itemize{ +\item A string giving a named connection. These options are: +\itemize{ +\item \code{"hv"} to first jump horizontally, then vertically. +\item \code{"vh"} to first jump vertically, then horizontally. +\item \code{"mid"} to step half-way between adjacent x-values. +\item \code{"linear"} to use a straight segment. +} +\item A numeric matrix with two columns giving x and y coordinates respectively. +The coordinates should describe points on a path that connect point A +at location (0, 0) and point B at location (1, 1). At least one of these +two points is expected to be included in the coordinates. +}} + +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with +a warning. If \code{TRUE}, missing values are silently removed.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +Connect successive points with lines of different shapes. +} +\section{Aesthetics}{ + +\code{stat_connect()} understands the following aesthetics (required aesthetics are in bold): +\itemize{ +\item \strong{\code{\link[=aes_position]{x}} \emph{or} \code{\link[=aes_position]{xmin}} \emph{or} \code{\link[=aes_position]{xmax}}} +\item \strong{\code{\link[=aes_position]{y}} \emph{or} \code{\link[=aes_position]{ymin}} \emph{or} \code{\link[=aes_position]{ymax}}} +\item \code{\link[=aes_group_order]{group}} +} +Learn more about setting these aesthetics in \code{vignette("ggplot2-specs")}. +} + +\examples{ +ggplot(head(economics, 20), aes(date, unemploy)) + + stat_connect(connection = "hv") + +# Setup custom connections +x <- seq(0, 1, length.out = 20)[-1] +smooth <- cbind(x, scales::rescale(1 / (1 + exp(-(x * 10 - 5))))) +zigzag <- cbind(c(0.4, 0.6, 1), c(0.75, 0.25, 1)) + +ggplot(head(economics, 10), aes(date, unemploy)) + + geom_point() + + stat_connect(aes(colour = "zigzag"), connection = zigzag) + + stat_connect(aes(colour = "smooth"), connection = smooth) +} diff --git a/tests/testthat/_snaps/stat-connect.md b/tests/testthat/_snaps/stat-connect.md new file mode 100644 index 0000000000..98c4fd7df5 --- /dev/null +++ b/tests/testthat/_snaps/stat-connect.md @@ -0,0 +1,24 @@ +# stat_connect rejects invalid connections + + Code + test_setup(connection = "foobar") + Condition + Error in `setup_params()`: + ! `connection` must be one of "hv", "vh", "mid", or "linear", not "foobar". + +--- + + Code + test_setup(connection = matrix(1:3, ncol = 1)) + Condition + Error in `setup_params()`: + ! `connection` must be a numeric with 2 columns, not an integer matrix with 1 column(s). + +--- + + Code + test_setup(connection = matrix(c(1:3, NA), ncol = 2)) + Condition + Error in `setup_params()`: + ! `connection` cannot contain missing or other non-finite values. + diff --git a/tests/testthat/test-stat-connect.R b/tests/testthat/test-stat-connect.R new file mode 100644 index 0000000000..16c3ed44fd --- /dev/null +++ b/tests/testthat/test-stat-connect.R @@ -0,0 +1,85 @@ +test_that("stat_connect closes off ends", { + + data <- data.frame(x = 1:3, y = c(1, 2, 0)) + + ld <- get_layer_data( + ggplot(data, aes(x, y)) + + stat_connect(connection = "mid") + ) + + i <- c(1L, nrow(ld)) + j <- c(1L, nrow(data)) + expect_equal(ld$x[i], data$x[j]) + expect_equal(ld$y[i], data$y[j]) + +}) + +test_that("stat_connect works with 1-row connections", { + data <- data.frame(x = 1:3, y = c(1, 2, 0)) + + ld <- get_layer_data( + ggplot(data, aes(x, y)) + + stat_connect(connection = cbind(0.5, 0.5)) + ) + + expect_equal(ld$x, c(1, 1.5, 2.5, 3)) + expect_equal(ld$y, c(1, 1.5, 1.0, 0)) +}) + +test_that("stat_connect works with ribbons in both orientations", { + + data <- data.frame(x = 1:4, ymin = c(1, 2, 0, 1), ymax = c(3, 4, 3, 4)) + expected <- data.frame( + x = c(1, 2, 2, 3, 3, 4, 4), + ymin = c(1, 1, 2, 2, 0, 0, 1), + ymax = c(3, 3, 4, 4, 3, 3, 4) + ) + + ld <- layer_data( + ggplot(data, aes(x, ymin = ymin, ymax = ymax)) + + geom_ribbon(stat = "connect", connection = "hv") + ) + + expect_equal(ld[c("x", "ymin", "ymax")], expected) + + ld <- layer_data( + ggplot(data, aes(y = x, xmin = ymin, xmax = ymax)) + + geom_ribbon(stat = "connect", connection = "hv", orientation = "y") + ) + + expect_equal(ld[c("y", "xmin", "xmax")], flip_data(expected, TRUE)) +}) + +test_that("stat_connect rejects invalid connections", { + + test_setup <- function(...) { + StatConnect$setup_params(NULL, list(...)) + } + + # Accept keyword parameter + p <- test_setup(connection = "linear") + expect_vector(p$connection, size = 2L, ptype = matrix(NA_real_, 0, 2)) + + # Accept xy coord matrix + p <- test_setup(connection = cbind(c(0, 1), c(0, 1))) + expect_vector(p$connection, size = 2L, ptype = matrix(NA_real_, 0, 2)) + + + p <- test_setup(connection = matrix(NA_real_, 0, 2)) + expect_null(p$connection) + + expect_snapshot( + test_setup(connection = "foobar"), + error = TRUE + ) + + expect_snapshot( + test_setup(connection = matrix(1:3, ncol = 1)), + error = TRUE + ) + + expect_snapshot( + test_setup(connection = matrix(c(1:3, NA), ncol = 2)), + error = TRUE + ) +})