diff --git a/NAMESPACE b/NAMESPACE index 956a590..0cd8403 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,6 @@ S3method("$<-",safeframe) S3method("[",safeframe) S3method("[<-",safeframe) S3method("[[<-",safeframe) -S3method("names<-",safeframe) S3method(print,safeframe) export(get_lost_tags_action) export(has_tag) diff --git a/R/restore_tags.R b/R/restore_tags.R index 904d17e..625c3d3 100644 --- a/R/restore_tags.R +++ b/R/restore_tags.R @@ -24,18 +24,18 @@ restore_tags <- function(x, tags, checkmate::assertClass(x, "data.frame") checkmate::assertClass(tags, "list") lost_action <- match.arg(lost_action) - + # Match the remaining variables to the provided tags common_vars <- intersect(names(x), tags) if (length(common_vars) == 0 && length(names(x)) > 0) { stop("No matching tags provided.") } - + lost_vars <- setdiff(tags, names(x)) - + if (lost_action != "none" && length(lost_vars) > 0) { lost_tags <- names(lost_vars) - + lost_msg <- vars_tags(lost_vars, lost_tags) msg <- paste( "The following tagged variables are lost:\n", @@ -44,7 +44,6 @@ restore_tags <- function(x, tags, if (lost_action == "warning") { # nolint next: condition_call_linter. warning(warningCondition(msg, class = "safeframe_warning")) - } if (lost_action == "error") { # nolint next: condition_call_linter. @@ -55,7 +54,7 @@ restore_tags <- function(x, tags, remaining_names <- setdiff(names(tags), names(lost_vars)) # Subset tags with remaining names tags <- tags[remaining_names] - + x <- tag_variables(x, tags) # Ensure class consistency diff --git a/R/square_bracket.R b/R/square_bracket.R index 80b8781..ec8fdbd 100644 --- a/R/square_bracket.R +++ b/R/square_bracket.R @@ -37,7 +37,7 @@ #' mph = "speed", #' distance = "dist" #' ) %>% -#' mutate(result = if_else(mph > 50, "fast", "slow")) %>% +#' mutate(result = if_else(speed > 50, "fast", "slow")) %>% #' set_tags(ticket = "result") #' x #' @@ -48,7 +48,7 @@ #' x[[2]] <- NULL #' x #' -#' x$age <- NULL +#' x$speed <- NULL #' x #' } `[.safeframe` <- function(x, i, j, drop = FALSE) { @@ -106,7 +106,7 @@ out <- NextMethod() old_tags <- tags(x) out <- restore_tags(out, old_tags, lost_action) - + out } @@ -118,7 +118,7 @@ lost_action <- get_lost_tags_action() old_tags <- tags(x) new_tags <- old_tags - + lost_tags(old_tags, new_tags, lost_action) class(x) <- setdiff(class(x), "safeframe") diff --git a/R/tag_variables.R b/R/tag_variables.R index ef26c37..20d48ad 100644 --- a/R/tag_variables.R +++ b/R/tag_variables.R @@ -38,27 +38,14 @@ tag_variables <- function(x, tags) { }) checkmate::reportAssertions(tag_errors) - # Add the tags to the right location - # Vectorized approach does not work, so we use a for.. loop instead - # for (tag in names(tags)) { - # var <- tags[[tag]] - # if (is.null(var)) { - # # Find the relevant variable for the tag without a variable - # removeVar <- tags(x)[[tag]] - # if (length(removeVar) > 0) { - # # Remove the tag on the var - # x <- remove_tag(x, removeVar[[1]]) - # } - # } else { - # attr(x[[var]], "label") <- tag - # } - # } - # Split tags into NULL and non-NULL cases - if (any(sapply(tags, is.null))) { - null_tags <- names(tags)[sapply(tags, is.null)] - } else { null_tags <- NULL } - non_null_tags <- names(tags)[!sapply(tags, is.null)] + nullIndex <- vapply(tags, is.null, FUN.VALUE = logical(1)) + if (any(nullIndex)) { + null_tags <- names(tags)[nullIndex] + } else { + null_tags <- NULL + } + non_null_tags <- names(tags)[!nullIndex] # Handle NULL cases (tag removals) if (length(null_tags) > 0) { diff --git a/R/tags.R b/R/tags.R index 03aec6c..df55828 100644 --- a/R/tags.R +++ b/R/tags.R @@ -35,7 +35,9 @@ tags <- function(x, show_null = FALSE) { out <- lapply(names(x), FUN = function(var) { tmpLabel <- attr(x[[var]], "label") if (!is.null(tmpLabel)) { - return(setNames(list(var), tmpLabel)) + tmpVar <- list(var) + names(tmpVar) <- tmpLabel + return(tmpVar) } else { return(NULL) } diff --git a/R/type.R b/R/type.R index ba51cb4..8bdebeb 100644 --- a/R/type.R +++ b/R/type.R @@ -18,8 +18,8 @@ #' #' validate_types( #' x, -#' speed = type("numeric"), -#' dist = "numeric" +#' mph = type("numeric"), +#' distance = "numeric" #' ) #' type <- function(x) { diff --git a/R/validate_safeframe.R b/R/validate_safeframe.R index 0dcb539..f628ed1 100644 --- a/R/validate_safeframe.R +++ b/R/validate_safeframe.R @@ -35,23 +35,23 @@ #' #' ## validation #' validate_safeframe(x, -#' speed = c("numeric", "factor"), -#' dist = "numeric" +#' mph = c("numeric", "factor"), +#' distance = "numeric" #' ) #' #' ## the below issues an error #' ## note: tryCatch is only used to avoid a genuine error in the example #' tryCatch(validate_safeframe(x, -#' speed = c("numeric", "factor"), -#' dist = "factor" +#' mph = c("numeric", "factor"), +#' distance = "factor" #' ), error = paste) validate_safeframe <- function(x, ...) { checkmate::assert_class(x, "safeframe") validate_tags(x) validate_types(x, ...) - + message("'", checkmate::vname(x), "' is a valid safeframe object") - + invisible(x) } diff --git a/R/validate_tags.R b/R/validate_tags.R index e9cc833..f1ebecd 100644 --- a/R/validate_tags.R +++ b/R/validate_tags.R @@ -28,8 +28,8 @@ #' #' ## validation requires you to specify the types directly #' validate_safeframe(x, -#' speed = c("integer", "numeric"), -#' dist = "numeric" +#' mph = c("integer", "numeric"), +#' distance = "numeric" #' ) validate_tags <- function(x) { checkmate::assert_class(x, "safeframe") diff --git a/man/has_tag.Rd b/man/has_tag.Rd index 4518212..e4684e5 100644 --- a/man/has_tag.Rd +++ b/man/has_tag.Rd @@ -16,22 +16,17 @@ requested tags \description{ A selector function to use in \pkg{tidyverse} functions } -\note{ -Using this in a pipeline results in a 'safeframe' object, but does not -maintain the variable tags at this time. It is primarily useful to make -your pipelines human readable. -} \examples{ ## create safeframe x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) head(x) if (require(dplyr) && require(magrittr)) { x \%>\% - select(has_tag(c("Miles per hour", "Distance in miles"))) \%>\% + select(has_tag(c("mph", "distance"))) \%>\% head() } } diff --git a/man/make_safeframe.Rd b/man/make_safeframe.Rd index 2f5837f..a14890a 100644 --- a/man/make_safeframe.Rd +++ b/man/make_safeframe.Rd @@ -9,10 +9,8 @@ make_safeframe(x, ...) \arguments{ \item{x}{a \code{data.frame} or a \code{tibble}} -\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with variable -names in \code{x} as list names and the tags as list values. Values set to -\code{NULL} remove the tag When specifying tags, please also see -\code{default_values}.} +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A series of tags provided as +\code{tag_name = "column_name"}} } \value{ The function returns a \code{safeframe} object. @@ -26,8 +24,8 @@ automatically use tagged fields for further data cleaning and analysis. \examples{ x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) ## print result - just first few entries @@ -38,8 +36,8 @@ tags(x) ## tags can also be passed as a list with the splice operator (!!!) my_tags <- list( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) new_x <- make_safeframe(cars, !!!my_tags) diff --git a/man/names-set-.safeframe.Rd b/man/names-set-.safeframe.Rd deleted file mode 100644 index a2e639b..0000000 --- a/man/names-set-.safeframe.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/names.R -\name{names<-.safeframe} -\alias{names<-.safeframe} -\title{Rename columns of a safeframe} -\usage{ -\method{names}{safeframe}(x) <- value -} -\arguments{ -\item{x}{a \code{safeframe} object} - -\item{value}{a \code{character} vector to set the new names of the columns of \code{x}} -} -\value{ -a \code{safeframe} with new column names -} -\description{ -This function can be used to rename the columns a \code{safeframe} (that is, -adjust -variable names). -} -\examples{ -## create safeframe -x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" -) -head(x) - -## change names -names(x)[1] <- "mph" - -## see results: columns have been updated -head(x) - -# This also works with using `dplyr::rename()` because it uses names<-() -# under the hood -if (require(dplyr) && require(magrittr)) { - x <- x \%>\% - rename(speed = "mph") - head(x) - tags(x) -} -} diff --git a/man/print.safeframe.Rd b/man/print.safeframe.Rd index 2505ad0..da6a608 100644 --- a/man/print.safeframe.Rd +++ b/man/print.safeframe.Rd @@ -20,8 +20,8 @@ This function prints safeframe objects. \examples{ ## create safeframe x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) ## print object - using only the first few entries @@ -32,8 +32,8 @@ if (require(tibble) && require(magrittr)) { cars \%>\% tibble() \%>\% make_safeframe( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) } } diff --git a/man/safeframe-package.Rd b/man/safeframe-package.Rd index 81b773e..1584e07 100644 --- a/man/safeframe-package.Rd +++ b/man/safeframe-package.Rd @@ -4,7 +4,7 @@ \name{safeframe-package} \alias{safeframe-package} \alias{safeframe} -\title{Base Tools for tagging and Validating Data} +\title{Base Tools for Tagging and Validating Data} \description{ The \pkg{safeframe} package provides tools to help tag and validate data. The 'safeframe' class adds column level attributes to a 'data.frame'. @@ -14,7 +14,8 @@ making data pipelines more robust and reliable. \note{ The package does not aim to have complete integration with \pkg{dplyr} functions. For example, \code{\link[dplyr:mutate]{dplyr::mutate()}} and \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}} will -not preserve tags. We only provide compatibility for \code{\link[dplyr:rename]{dplyr::rename()}}. +not preserve tags in all cases. We only provide compatibility for +\code{\link[dplyr:rename]{dplyr::rename()}}. } \section{Main functions}{ @@ -41,7 +42,7 @@ alter or lose tagged variables (and may thus break downstream data pipelines). \itemize{ \item \verb{names() <-} (and related functions, such as \code{\link[dplyr:rename]{dplyr::rename()}}) will -rename tags as needed +rename variables and carry forward the existing tags \item \verb{x[...] <-} and \verb{x[[...]] <-} (see \link{sub_safeframe}): will adopt the desired behaviour when tagged variables are lost \item \code{print()}: prints info about the \code{safeframe} in addition to the @@ -53,8 +54,8 @@ desired behaviour when tagged variables are lost # using base R style x <- make_safeframe(cars[1:50, ], - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) x @@ -74,7 +75,7 @@ x[, 2] ## to trigger errors when tags are dropped # lost_tags_action("error") -# x[, 2:5] +# x[, 1] ## reset default behaviour lost_tags_action() @@ -88,17 +89,17 @@ if (require(dplyr) && require(magrittr)) { x <- cars \%>\% tibble() \%>\% make_safeframe( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) \%>\% mutate(result = if_else(speed > 50, "fast", "slow")) \%>\% - set_tags(result = "Ticket yes/no") + set_tags(ticket = "result") head(x) ## extract tagged variables x \%>\% - select(has_tag(c("Ticket yes/no"))) + select(has_tag(c("ticket"))) ## Retrieve all tags x \%>\% diff --git a/man/set_tags.Rd b/man/set_tags.Rd index f749edc..409b21c 100644 --- a/man/set_tags.Rd +++ b/man/set_tags.Rd @@ -9,10 +9,8 @@ set_tags(x, ...) \arguments{ \item{x}{a \code{data.frame} or a \code{tibble}} -\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with variable -names in \code{x} as list names and the tags as list values. Values set to -\code{NULL} remove the tag When specifying tags, please also see -\code{default_values}.} +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A series of tags provided as +\code{tag_name = "column_name"}} } \value{ The function returns a \code{safeframe} object. @@ -24,16 +22,16 @@ syntax as the constructor \code{\link[=make_safeframe]{make_safeframe()}}. \examples{ ## create a safeframe -x <- make_safeframe(cars, speed = "Miles per hour") +x <- make_safeframe(cars, mph = "speed") tags(x) ## add new tags and fix an existing one -x <- set_tags(x, dist = "Distance") +x <- set_tags(x, distance = "dist") tags(x) ## remove tags by setting them to NULL old_tags <- tags(x) -x <- set_tags(x, speed = NULL, dist = NULL) +x <- set_tags(x, mph = NULL, distance = NULL) tags(x) ## setting tags providing a list (used to restore old tags here) diff --git a/man/sub_safeframe.Rd b/man/sub_safeframe.Rd index ff88620..a85399f 100644 --- a/man/sub_safeframe.Rd +++ b/man/sub_safeframe.Rd @@ -52,11 +52,11 @@ if (require(dplyr) && require(magrittr)) { ## create a safeframe x <- cars \%>\% make_safeframe( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) \%>\% mutate(result = if_else(speed > 50, "fast", "slow")) \%>\% - set_tags(result = "Ticket") + set_tags(ticket = "result") x ## dangerous removal of a tagged column setting it to NULL issues warning @@ -66,7 +66,7 @@ if (require(dplyr) && require(magrittr)) { x[[2]] <- NULL x - x$age <- NULL + x$speed <- NULL x } } diff --git a/man/tags_df.Rd b/man/tags_df.Rd index 8e480cc..e59d64f 100644 --- a/man/tags_df.Rd +++ b/man/tags_df.Rd @@ -20,8 +20,8 @@ the \code{safeframe} object) are renamed. Note that the output is no longer a \examples{ x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) ## get a data.frame with variables renamed based on tags diff --git a/man/type.Rd b/man/type.Rd index 44acbe7..46e8301 100644 --- a/man/type.Rd +++ b/man/type.Rd @@ -20,14 +20,14 @@ These can be used to provide comprehensive typesetting when creating a } \examples{ x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) validate_types( x, - speed = type("numeric"), - dist = "numeric" + mph = type("numeric"), + distance = "numeric" ) } diff --git a/man/validate_safeframe.Rd b/man/validate_safeframe.Rd index 9315175..01df642 100644 --- a/man/validate_safeframe.Rd +++ b/man/validate_safeframe.Rd @@ -9,8 +9,8 @@ validate_safeframe(x, ...) \arguments{ \item{x}{a \code{safeframe} object} -\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with variable -names in \code{x} as list names and the related types as list values.} +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with tags in \code{x} +as list names and the related types as list values.} } \value{ If checks pass, a \code{safeframe} object; otherwise issues an error. @@ -34,22 +34,22 @@ The following checks are performed: ## create a valid safeframe x <- cars |> make_safeframe( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) x ## validation validate_safeframe(x, - speed = c("numeric", "factor"), - dist = "numeric" + mph = c("numeric", "factor"), + distance = "numeric" ) ## the below issues an error ## note: tryCatch is only used to avoid a genuine error in the example tryCatch(validate_safeframe(x, - speed = c("numeric", "factor"), - dist = "factor" + mph = c("numeric", "factor"), + distance = "factor" ), error = paste) } \seealso{ diff --git a/man/validate_tags.Rd b/man/validate_tags.Rd index d41514c..3454267 100644 --- a/man/validate_tags.Rd +++ b/man/validate_tags.Rd @@ -21,8 +21,8 @@ checking that: i) tags are present ii) tags is a \code{list} of \code{character} ## create a valid safeframe x <- cars |> make_safeframe( - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) x @@ -32,8 +32,8 @@ tryCatch(validate_safeframe(x), error = paste) ## validation requires you to specify the types directly validate_safeframe(x, - speed = c("integer", "numeric"), - dist = "numeric" + mph = c("integer", "numeric"), + distance = "numeric" ) } \seealso{ diff --git a/man/validate_types.Rd b/man/validate_types.Rd index 436723a..bcb4024 100644 --- a/man/validate_types.Rd +++ b/man/validate_types.Rd @@ -9,8 +9,8 @@ validate_types(x, ...) \arguments{ \item{x}{a \code{safeframe} object} -\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with variable -names in \code{x} as list names and the related types as list values.} +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> A named list with tags in \code{x} +as list names and the related types as list values.} } \value{ A named \code{list}. @@ -22,8 +22,8 @@ those not provided. } \examples{ x <- make_safeframe(cars, - speed = "Miles per hour", - dist = "Distance in miles" + mph = "speed", + distance = "dist" ) x @@ -32,7 +32,7 @@ x tryCatch(validate_types(x), error = paste) ## to allow other types, e.g. gender to be integer, character or factor -validate_types(x, speed = "numeric", dist = c( +validate_types(x, mph = "numeric", distance = c( "integer", "character", "numeric" )) diff --git a/tests/testthat/test-set_tags.R b/tests/testthat/test-set_tags.R index 43dfa62..2ec6fb4 100644 --- a/tests/testthat/test-set_tags.R +++ b/tests/testthat/test-set_tags.R @@ -4,7 +4,7 @@ test_that("tests for set_tags()", { # Check error messages msg <- "Must inherit from class 'safeframe', but has class 'data.frame'." expect_error(set_tags(cars), msg) - + msg <- "Must be element of set {'speed','dist'}, but is" expect_error(set_tags(x, outcome = "toto"), msg, fixed = TRUE) diff --git a/tests/testthat/test-square_bracket.R b/tests/testthat/test-square_bracket.R index 6f83a3f..e234a66 100644 --- a/tests/testthat/test-square_bracket.R +++ b/tests/testthat/test-square_bracket.R @@ -86,11 +86,11 @@ test_that("tests for [[<- operator", { x <- make_safeframe(cars, mph = "speed", distance = "dist") msg <- "The following tagged variables are lost:\n speed - mph" expect_warning(x[[1]] <- NULL, msg) - + lost_tags_action("error", quiet = TRUE) x <- make_safeframe(cars, mph = "speed", distance = "dist") expect_error(x[[1]] <- NULL, msg) - + # functionalities x <- make_safeframe(cars, mph = "speed", distance = "dist") x[[1]] <- 1L @@ -141,4 +141,3 @@ test_that("no warnings when untagged columns are dropped - #55", { expect_silent(x[, "speed"]) }) - diff --git a/tests/testthat/test-tag_variables.R b/tests/testthat/test-tag_variables.R index 8e0c18a..2ebeec1 100644 --- a/tests/testthat/test-tag_variables.R +++ b/tests/testthat/test-tag_variables.R @@ -34,7 +34,7 @@ test_that("tag_variables() works with specification by variable name", { x <- tag_variables(x, list(distance = NULL)) expect_identical(attr(x$dist, "label"), NULL) expect_identical(attr(x$speed, "label"), NULL) - + # Tag multiple variables at once x <- tag_variables(cars, list(distance = "dist", mph = "speed")) expect_identical(tags(x), list(mph = "speed", distance = "dist")) @@ -65,6 +65,6 @@ test_that("resetting tags to NULL", { x <- tag_variables(cars, list(vitesse = 1, distance = 2)) expect_identical(tags(x), list(vitesse = "speed", distance = "dist")) - x <- tag_variables(x, list(vitesse = NULL)) + x <- tag_variables(x, list(vitesse = NULL)) expect_null(attr(x$speed, "label")) }) diff --git a/tests/testthat/test-validate_safeframe.R b/tests/testthat/test-validate_safeframe.R index 24b5d01..e36e60b 100644 --- a/tests/testthat/test-validate_safeframe.R +++ b/tests/testthat/test-validate_safeframe.R @@ -31,13 +31,14 @@ test_that("validate_safeframe() allows valid objects", { # Print a message expect_message( - validate_safeframe(x, id = 'numeric'), + validate_safeframe(x, id = "numeric"), "valid" ) # And returns invisibly... v <- suppressMessages(expect_invisible( - validate_safeframe(x, id = 'numeric'))) + validate_safeframe(x, id = "numeric") + )) # ...an identical object expect_identical(x, v) diff --git a/tests/testthat/test-validate_types.R b/tests/testthat/test-validate_types.R index 9179bb4..d2153d6 100644 --- a/tests/testthat/test-validate_types.R +++ b/tests/testthat/test-validate_types.R @@ -12,14 +12,14 @@ test_that("validate_types() validates types", { x, validate_types(x, mph = "numeric") ) - + # Failed validations x <- make_safeframe(cars, mph = "speed") expect_error( validate_types(x, mph = "factor"), "mph: Must inherit from class 'factor', but has class 'numeric'" ) - + x <- make_safeframe(cars, mph = "speed", distance = "dist") expect_snapshot_error( validate_types(x, mph = "factor", distance = "character") @@ -37,6 +37,6 @@ test_that("ensure validate_types throws error if no types provided", { test_that("validate_types fails if types are provided for non-existent tags", { x <- make_safeframe(cars, mph = "speed") expect_error( - validate_types(x, distance = 'numeric') + validate_types(x, distance = "numeric") ) -}) \ No newline at end of file +})