Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update functions to use linelist style tagging #68

Merged
merged 21 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ S3method("$<-",safeframe)
S3method("[",safeframe)
S3method("[<-",safeframe)
S3method("[[<-",safeframe)
S3method("names<-",safeframe)
S3method(print,safeframe)
export(get_lost_tags_action)
export(has_tag)
Expand Down
2 changes: 1 addition & 1 deletion R/drop_safeframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ drop_safeframe <- function(x, remove_tags = TRUE) {
if (remove_tags) {
# Set the label attribute to NULL for all variables in x
for (var in names(x)) {
attr(x[[var]], "label") <- NULL
x <- remove_tag(x, var)
}
}
x
Expand Down
14 changes: 5 additions & 9 deletions R/has_tag.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,26 @@
#' @returns A numeric vector containing the position of the columns with the
#' requested tags
#'
#' @note Using this in a pipeline results in a 'safeframe' object, but does not
#' maintain the variable tags at this time. It is primarily useful to make
#' your pipelines human readable.
#'
#' @export
#'
#' @examples
#' ## create safeframe
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' head(x)
#'
#' if (require(dplyr) && require(magrittr)) {
#' x %>%
#' select(has_tag(c("Miles per hour", "Distance in miles"))) %>%
#' select(has_tag(c("mph", "distance"))) %>%
#' head()
#' }
has_tag <- function(tags) {
dat <- tidyselect::peek_data(fn = "has_tag")
dat_tags <- tags(dat)

cols_to_extract <- dat_tags[dat_tags %in% tags]
cols_to_extract <- dat_tags[names(dat_tags) %in% tags]

which(colnames(dat) %in% names(cols_to_extract))
which(colnames(dat) %in% cols_to_extract)
}
16 changes: 7 additions & 9 deletions R/make_safeframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
#'
#' @param x a `data.frame` or a `tibble`
#'
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A named list with variable
#' names in `x` as list names and the tags as list values. Values set to
#' `NULL` remove the tag When specifying tags, please also see
#' `default_values`.
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A series of tags provided as
#' `tag_name = "column_name"`
#'
#' @seealso
#'
Expand All @@ -26,8 +24,8 @@
#' @examples
#'
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#'
#' ## print result - just first few entries
Expand All @@ -38,8 +36,8 @@
#'
#' ## tags can also be passed as a list with the splice operator (!!!)
#' my_tags <- list(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' new_x <- make_safeframe(cars, !!!my_tags)
#'
Expand All @@ -56,6 +54,6 @@ make_safeframe <- function(x,
x <- tag_variables(x, tags)

# shape output and return object
class(x) <- c("safeframe", class(x))
class(x) <- unique(c("safeframe", class(x)))
chartgerink marked this conversation as resolved.
Show resolved Hide resolved
x
}
66 changes: 0 additions & 66 deletions R/names.R

This file was deleted.

8 changes: 4 additions & 4 deletions R/print.safeframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#' @examples
#' ## create safeframe
#' x <- make_safeframe(cars,
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#'
#' ## print object - using only the first few entries
Expand All @@ -25,8 +25,8 @@
#' cars %>%
#' tibble() %>%
#' make_safeframe(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' }
print.safeframe <- function(x, ...) {
Expand Down
24 changes: 12 additions & 12 deletions R/restore_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,34 @@
#'
#' @param x a `data.frame`
#'
#' @param tags a list of tags as returned by [tags()]; if default values
#' are missing, they will be added to the new list of tags. Matches column
#' names with `x` to restore tags. Throws an error if no matches are found.
#' @param tags a list of tags as returned by [tags()].
#'
#' @param lost_action a `character` indicating the behaviour to adopt when
#' tagged variables have been lost: "error" (default) will issue an error;
#' "warning" will issue a warning; "none" will do nothing
#'
#' @noRd
#'
#' @return The function returns a `safeframe` object with updated tags.
#' @return The function returns a `safeframe` object with restored tags.
#'

restore_tags <- function(x, newTags,
restore_tags <- function(x, tags,
lost_action = c("error", "warning", "none")) {
# assertions
checkmate::assertClass(x, "data.frame")
checkmate::assertClass(newTags, "list")
checkmate::assertClass(tags, "list")
lost_action <- match.arg(lost_action)

# Match the remaining variables to the provided tags
common_vars <- intersect(names(x), names(newTags))
common_vars <- intersect(names(x), tags)
if (length(common_vars) == 0 && length(names(x)) > 0) {
stop("No matching tags provided.")
}

lost_vars <- setdiff(names(newTags), names(x))
lost_vars <- setdiff(tags, names(x))

if (lost_action != "none" && length(lost_vars) > 0) {
lost_tags <- lapply(lost_vars, function(tag) newTags[[tag]])
lost_tags <- names(lost_vars)

lost_msg <- vars_tags(lost_vars, lost_tags)
msg <- paste(
Expand All @@ -52,10 +50,12 @@ restore_tags <- function(x, newTags,
stop(errorCondition(msg, class = "safeframe_error"))
}
}
# Get names that are in tags but not in lost_vars
remaining_names <- setdiff(names(tags), names(lost_vars))
# Subset tags with remaining names
tags <- tags[remaining_names]

for (name in common_vars) {
attr(x[[name]], "label") <- newTags[[name]]
}
x <- tag_variables(x, tags)

# Ensure class consistency
if (!inherits(x, "safeframe")) {
Expand Down
21 changes: 11 additions & 10 deletions R/safeframe-package.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Base Tools for tagging and Validating Data
#' Base Tools for Tagging and Validating Data
#'
#' The \pkg{safeframe} package provides tools to help tag and validate data.
#' The 'safeframe' class adds column level attributes to a 'data.frame'.
Expand Down Expand Up @@ -33,7 +33,7 @@
#' pipelines).
#'
#' * `names() <-` (and related functions, such as [dplyr::rename()]) will
#' rename tags as needed
#' rename variables and carry forward the existing tags
#'
#' * `x[...] <-` and `x[[...]] <-` (see [sub_safeframe]): will adopt the
#' desired behaviour when tagged variables are lost
Expand All @@ -43,14 +43,15 @@
#'
#' @note The package does not aim to have complete integration with \pkg{dplyr}
#' functions. For example, [dplyr::mutate()] and [dplyr::bind_rows()] will
#' not preserve tags. We only provide compatibility for [dplyr::rename()].
#' not preserve tags in all cases. We only provide compatibility for
#' [dplyr::rename()].
#'
#' @examples
#'
#' # using base R style
#' x <- make_safeframe(cars[1:50, ],
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' )
#' x
#'
Expand All @@ -70,7 +71,7 @@
#'
#' ## to trigger errors when tags are dropped
#' # lost_tags_action("error")
#' # x[, 2:5]
#' # x[, 1]
#'
#' ## reset default behaviour
#' lost_tags_action()
Expand All @@ -84,17 +85,17 @@
#' x <- cars %>%
#' tibble() %>%
#' make_safeframe(
#' speed = "Miles per hour",
#' dist = "Distance in miles"
#' mph = "speed",
#' distance = "dist"
#' ) %>%
#' mutate(result = if_else(speed > 50, "fast", "slow")) %>%
#' set_tags(result = "Ticket yes/no")
#' set_tags(ticket = "result")
#'
#' head(x)
#'
#' ## extract tagged variables
#' x %>%
#' select(has_tag(c("Ticket yes/no")))
#' select(has_tag(c("ticket")))
#'
#' ## Retrieve all tags
#' x %>%
Expand Down
17 changes: 10 additions & 7 deletions R/set_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
#' @examples
#'
#' ## create a safeframe
#' x <- make_safeframe(cars, speed = "Miles per hour")
#' x <- make_safeframe(cars, mph = "speed")
#' tags(x)
#'
#' ## add new tags and fix an existing one
#' x <- set_tags(x, dist = "Distance")
#' x <- set_tags(x, distance = "dist")
#' tags(x)
#'
#' ## remove tags by setting them to NULL
#' old_tags <- tags(x)
#' x <- set_tags(x, speed = NULL, dist = NULL)
#' x <- set_tags(x, mph = NULL, distance = NULL)
#' tags(x)
#'
#' ## setting tags providing a list (used to restore old tags here)
Expand All @@ -32,6 +32,7 @@
set_tags <- function(x, ...) {
# assert inputs
checkmate::assertClass(x, "safeframe")

orig_class <- class(x)

# For some reason, we cannot remove tags from safeframe objects by setting
Expand All @@ -42,11 +43,13 @@ set_tags <- function(x, ...) {
# 3. readding the tags and the safeframe class

new_tags <- rlang::list2(...)
existing_tags <- tags(x)

x <- drop_safeframe(x, remove_tags = TRUE)
old_tags <- tags(x)
x <- drop_safeframe(x)

x <- tag_variables(x, utils::modifyList(existing_tags, new_tags))
x <- tag_variables(
x,
utils::modifyList(old_tags, new_tags, keep.null = TRUE)
)

class(x) <- orig_class

Expand Down
Loading
Loading