Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update functions to use linelist style tagging #68

Merged
merged 21 commits into from
Feb 5, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@ S3method("$<-",safeframe)
S3method("[",safeframe)
S3method("[<-",safeframe)
S3method("[[<-",safeframe)
S3method("names<-",safeframe)
S3method(print,safeframe)
export(get_lost_tags_action)
export(has_tag)
2 changes: 1 addition & 1 deletion R/drop_safeframe.R
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ drop_safeframe <- function(x, remove_tags = TRUE) {
if (remove_tags) {
# Set the label attribute to NULL for all variables in x
for (var in names(x)) {
attr(x[[var]], "label") <- NULL
x <- remove_tag(x, var)
}
}
x
14 changes: 5 additions & 9 deletions R/has_tag.R
Original file line number Diff line number Diff line change
@@ -5,30 +5,26 @@
#' @returns A numeric vector containing the position of the columns with the
#' requested tags
#'
#' @note Using this in a pipeline results in a 'safeframe' object, but does not
#' maintain the variable tags at this time. It is primarily useful to make
#' your pipelines human readable.
#'
#' @export
#'
#' @examples
#' ## create safeframe
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' head(x)
#'
#' if (require(dplyr) && require(magrittr)) {
#' x %>%
#' select(has_tag(c("Miles per hour", "Distance in miles"))) %>%
#' select(has_tag(c("mph", "distance"))) %>%
#' head()
#' }
has_tag <- function(tags) {
dat <- tidyselect::peek_data(fn = "has_tag")
dat_tags <- tags(dat)

cols_to_extract <- dat_tags[dat_tags %in% tags]
cols_to_extract <- dat_tags[names(dat_tags) %in% tags]

which(colnames(dat) %in% names(cols_to_extract))
which(colnames(dat) %in% cols_to_extract)
}
14 changes: 6 additions & 8 deletions R/make_safeframe.R
Original file line number Diff line number Diff line change
@@ -7,10 +7,8 @@
#'
#' @param x a `data.frame` or a `tibble`
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A named list with variable
#' names in `x` as list names and the tags as list values. Values set to
#' `NULL` remove the tag When specifying tags, please also see
#' `default_values`.
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A series of tags provided as
#' `tag_name = "column_name"`
#'
#' @seealso
#'
@@ -26,8 +24,8 @@
#' @examples
#'
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#'
#' ## print result - just first few entries
@@ -38,8 +36,8 @@
#'
#' ## tags can also be passed as a list with the splice operator (!!!)
#' my_tags <- list(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' new_x <- make_safeframe(cars, !!!my_tags)
#'
66 changes: 0 additions & 66 deletions R/names.R

This file was deleted.

8 changes: 4 additions & 4 deletions R/print.safeframe.R
Original file line number Diff line number Diff line change
@@ -13,8 +13,8 @@
#' @examples
#' ## create safeframe
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#'
#' ## print object - using only the first few entries
@@ -25,8 +25,8 @@
#' cars %>%
#' tibble() %>%
#' make_safeframe(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' }
print.safeframe <- function(x, ...) {
27 changes: 14 additions & 13 deletions R/restore_tags.R
Original file line number Diff line number Diff line change
@@ -7,36 +7,35 @@
#'
#' @param x a `data.frame`
#'
#' @param tags a list of tags as returned by [tags()]; if default values
#' are missing, they will be added to the new list of tags. Matches column
#' names with `x` to restore tags. Throws an error if no matches are found.
#' @param tags a list of tags as returned by [tags()].
#'
#' @param lost_action a `character` indicating the behaviour to adopt when
#' tagged variables have been lost: "error" (default) will issue an error;
#' "warning" will issue a warning; "none" will do nothing
#'
#' @noRd
#'
#' @return The function returns a `safeframe` object with updated tags.
#' @return The function returns a `safeframe` object with restored tags.
#'

restore_tags <- function(x, newTags,
restore_tags <- function(x, tags,
lost_action = c("error", "warning", "none")) {
# assertions
checkmate::assertClass(x, "data.frame")
checkmate::assertClass(newTags, "list")
checkmate::assertClass(tags, "list")
lost_action <- match.arg(lost_action)

# Match the remaining variables to the provided tags
common_vars <- intersect(names(x), names(newTags))
common_vars <- intersect(names(x), tags)
if (length(common_vars) == 0 && length(names(x)) > 0) {
stop("No matching tags provided.")
}

lost_vars <- setdiff(names(newTags), names(x))

# We do not use setdiff because R has become inconsistent for our purposes
# Since https://github.com/wch/r-source/commit/6dedb304cfd66f0e5775cdd5c0bae6340ac48e84 # nolint: line_length_linter.
lost_vars <- tags[!unlist(tags) %in% names(x)]
if (lost_action != "none" && length(lost_vars) > 0) {
lost_tags <- lapply(lost_vars, function(tag) newTags[[tag]])
lost_tags <- names(lost_vars)

lost_msg <- vars_tags(lost_vars, lost_tags)
msg <- paste(
@@ -52,10 +51,12 @@ restore_tags <- function(x, newTags,
stop(errorCondition(msg, class = "safeframe_error"))
}
}
# Get names that are in tags but not in lost_vars
remaining_names <- setdiff(names(tags), names(lost_vars))
# Subset tags with remaining names
tags <- tags[remaining_names]

for (name in common_vars) {
attr(x[[name]], "label") <- newTags[[name]]
}
x <- tag_variables(x, tags)

# Ensure class consistency
if (!inherits(x, "safeframe")) {
21 changes: 11 additions & 10 deletions R/safeframe-package.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Base Tools for tagging and Validating Data
#' Base Tools for Tagging and Validating Data
#'
#' The \pkg{safeframe} package provides tools to help tag and validate data.
#' The 'safeframe' class adds column level attributes to a 'data.frame'.
@@ -33,7 +33,7 @@
#' pipelines).
#'
#' * `names() <-` (and related functions, such as [dplyr::rename()]) will
#' rename tags as needed
#' rename variables and carry forward the existing tags
#'
#' * `x[...] <-` and `x[[...]] <-` (see [sub_safeframe]): will adopt the
#' desired behaviour when tagged variables are lost
@@ -43,14 +43,15 @@
#'
#' @note The package does not aim to have complete integration with \pkg{dplyr}
#' functions. For example, [dplyr::mutate()] and [dplyr::bind_rows()] will
#' not preserve tags. We only provide compatibility for [dplyr::rename()].
#' not preserve tags in all cases. We only provide compatibility for
#' [dplyr::rename()].
#'
#' @examples
#'
#' # using base R style
#' x <- make_safeframe(cars[1:50, ],
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' x
#'
@@ -70,7 +71,7 @@
#'
#' ## to trigger errors when tags are dropped
#' # lost_tags_action("error")
#' # x[, 2:5]
#' # x[, 1]
#'
#' ## reset default behaviour
#' lost_tags_action()
@@ -84,17 +85,17 @@
#' x <- cars %>%
#' tibble() %>%
#' make_safeframe(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' ) %>%
#' mutate(result = if_else(speed > 50, "fast", "slow")) %>%
#' set_tags(result = "Ticket yes/no")
#' set_tags(ticket = "result")
#'
#' head(x)
#'
#' ## extract tagged variables
#' x %>%
#' select(has_tag(c("Ticket yes/no")))
#' select(has_tag(c("ticket")))
#'
#' ## Retrieve all tags
#' x %>%
17 changes: 10 additions & 7 deletions R/set_tags.R
Original file line number Diff line number Diff line change
@@ -14,16 +14,16 @@
#' @examples
#'
#' ## create a safeframe
#' x <- make_safeframe(cars, speed = "Miles per hour")
#' x <- make_safeframe(cars, mph = "speed")
#' tags(x)
#'
#' ## add new tags and fix an existing one
#' x <- set_tags(x, dist = "Distance")
#' x <- set_tags(x, distance = "dist")
#' tags(x)
#'
#' ## remove tags by setting them to NULL
#' old_tags <- tags(x)
#' x <- set_tags(x, speed = NULL, dist = NULL)
#' x <- set_tags(x, mph = NULL, distance = NULL)
#' tags(x)
#'
#' ## setting tags providing a list (used to restore old tags here)
@@ -32,6 +32,7 @@
set_tags <- function(x, ...) {
# assert inputs
checkmate::assertClass(x, "safeframe")

orig_class <- class(x)

# For some reason, we cannot remove tags from safeframe objects by setting
@@ -42,11 +43,13 @@ set_tags <- function(x, ...) {
# 3. readding the tags and the safeframe class

new_tags <- rlang::list2(...)
existing_tags <- tags(x)

x <- drop_safeframe(x, remove_tags = TRUE)
old_tags <- tags(x)
x <- drop_safeframe(x)

x <- tag_variables(x, utils::modifyList(existing_tags, new_tags))
x <- tag_variables(
x,
utils::modifyList(old_tags, new_tags, keep.null = TRUE)
)

class(x) <- orig_class

Loading
Loading