Merge remote-tracking branch 'remotes/origin/main' into assert_count_…

…true # Conflicts: # NEWS.md
sfirke · Dec 18, 2024 · de85e90 · de85e90
2 parents 5b4c1fe + 7eaa06d
commit de85e90
Show file tree

Hide file tree

Showing 24 changed files with 201 additions and 61 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -8,6 +8,8 @@ on:
 
 name: R-CMD-check
 
+permissions: read-all
+
 jobs:
   R-CMD-check:
     runs-on: ${{ matrix.config.os }}
@@ -29,7 +31,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -49,3 +51,4 @@ jobs:
       - uses: r-lib/actions/check-r-package@v2
         with:
           upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -11,6 +11,8 @@ on:
 
 name: pkgdown
 
+permissions: read-all
+
 jobs:
   pkgdown:
     runs-on: ubuntu-latest
@@ -19,8 +21,10 @@ jobs:
       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -39,7 +43,7 @@ jobs:
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@v4.4.1
+        uses: JamesIves/github-pages-deploy-action@v4.5.0
         with:
           clean: false
           branch: gh-pages

diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
@@ -6,14 +6,18 @@ on:
 
 name: Style
 
+permissions: read-all
+
 jobs:
   style:
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -46,7 +50,7 @@ jobs:
         shell: Rscript {0}
 
       - name: Cache styler
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.styler-location.outputs.location }}
           key: ${{ runner.os }}-styler-${{ github.sha }}

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -8,14 +8,16 @@ on:
 
 name: test-coverage
 
+permissions: read-all
+
 jobs:
   test-coverage:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -26,27 +28,37 @@ jobs:
           extra-packages: |
             any::sf
             any::covr
+            any::xml2
           needs: coverage
 
       - name: Test coverage
         run: |
-          covr::codecov(
+          cov <- covr::package_coverage(
             quiet = FALSE,
             clean = FALSE,
-            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
           )
+          covr::to_cobertura(cov)
         shell: Rscript {0}
 
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
       - name: Show testthat output
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: c(
     person("Ryan", "Knight", , "[email protected]", role = "ctb"),
     person("Malte", "Grosser", , "[email protected]", role = "ctb"),
     person("Jonathan", "Zadra", , "[email protected]", role = "ctb"),
-    person("Olivier", "Roy", role = "ctb")
+    person("Olivier", "Roy", role = "ctb"),
+    person("Josep", family = "Pueyo-Ros", email = "[email protected]", role = "ctb")
   )
 Description: The main janitor functions can: perfectly format data.frame
     column names; provide quick counts of variable combinations (i.e.,

diff --git a/NAMESPACE b/NAMESPACE
@@ -70,6 +70,7 @@ importFrom(lubridate,second)
 importFrom(lubridate,ymd)
 importFrom(lubridate,ymd_hms)
 importFrom(magrittr,"%>%")
+importFrom(rlang,"%||%")
 importFrom(rlang,dots_n)
 importFrom(rlang,expr)
 importFrom(rlang,syms)

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,10 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`.
 
+* When `tabyl()` is called on a data.frame containing labels, it now displays the label attribute as the name of the first column in the the resulting `tabyl` object (@olivroy, #394). This may break subsequent code that refers to the output of such a `tabyl` by column name. To maintain the previous behavior of ignoring variable labels, you can remove the labels with a function like `haven::zap_labels()` or `labelled::remove_labels()` before calling `tabyl()`.
+
+* `sas_numeric_to_date()` now warns for timezones other than "UTC" due to the way that SAS loads timezones, and the default timezone for `sas_numeric_to_date()` is now "UTC" instead of "" (#583, @billdenney)
+
 ## New features
 
 * A new function `paste_skip_na()` pastes without including NA values (#537).
@@ -15,7 +19,6 @@ These are all minor breaking changes resulting from enhancements and are not exp
 * The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds.  (#245, thanks to **@billdenney** for the feature.)
 
 * The new function `assert_count()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines
-
 ## Bug fixes
 
 * `adorn_totals("row")` now succeeds if the new `name` of the totals row is already a factor level of the input data.frame (#529, thanks @egozoglu for reporting).
@@ -24,11 +27,13 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * `get_one_to_one()` no longer errors with near-equal values that become identical factor levels (fix #543, thanks to @olivroy for reporting)
 
+* `clean_names()` for sf objects now works in cases when the sf_column is not the last column name (fix #578, thanks to @ar-puuk for reporting and @billdenney for fixing)
+
 ## Refactoring
 
 * Remove dplyr verbs superseded in dplyr 1.0.0 (#547, @olivroy)
 
-* Restyle the package and vignettes according to the [tidyverse style guide](https://style.tidyverse.org) (#548, olivroy)
+* Restyle the package and vignettes according to the [tidyverse style guide](https://style.tidyverse.org) (#548, @olivroy)
 
 # janitor 2.2.0 (2023-02-02)
 

diff --git a/R/clean_names.R b/R/clean_names.R
@@ -24,6 +24,7 @@
 #' (characters) to "u".
 #'
 #' @param dat The input `data.frame`.
+#' @param set_labels If set to `TRUE`, old names are stored as labels in each column of the returned data.frame.
 #' @inheritDotParams make_clean_names -string
 #' @return A `data.frame` with clean names.
 #'
@@ -32,7 +33,8 @@
 #'   support using `clean_names()` on `sf` and `tbl_graph` (from
 #'   `tidygraph`) objects as well as on database connections through
 #'   `dbplyr`. For cleaning other named objects like named lists
-#'   and vectors, use `make_clean_names()`.
+#'   and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names, 
+#'   stored as column labels, can be restored using `sjlabelled::label_to_colnames()`.
 #'
 #' @export
 #' @family Set names
@@ -71,7 +73,7 @@ clean_names <- function(dat, ...) {
 
 #' @rdname clean_names
 #' @export
-clean_names.default <- function(dat, ...) {
+clean_names.default <- function(dat, ..., set_labels = FALSE) {
   if (is.null(names(dat)) && is.null(dimnames(dat))) {
     stop(
       "`clean_names()` requires that either names or dimnames be non-null.",
@@ -81,14 +83,21 @@ clean_names.default <- function(dat, ...) {
   if (is.null(names(dat))) {
     dimnames(dat) <- lapply(dimnames(dat), make_clean_names, ...)
   } else {
+    if (set_labels){
+      old_names <- names(dat)
+      for (i in seq_along(old_names)){
+        attr(dat[[i]], "label") <- old_names[[i]]
+      }
+    }
     names(dat) <- make_clean_names(names(dat), ...)
+
   }
   dat
 }
 
 #' @rdname clean_names
 #' @export
-clean_names.sf <- function(dat, ...) {
+clean_names.sf <- function(dat, ..., set_labels = FALSE) {
   if (!requireNamespace("sf", quietly = TRUE)) { # nocov start
     stop(
       "Package 'sf' needed for this function to work. Please install it.",
@@ -97,14 +106,20 @@ clean_names.sf <- function(dat, ...) {
   } # nocov end
   # get old names
   sf_names <- names(dat)
-  # identify ending column index to clean
-  n_cols <- length(dat) - 1
+  # Clean the names except for the "sf_column" which is used internally by sf
+  cols_to_rename <- which(!(sf_names %in% attr(dat, "sf_column")))
   # clean all but last column
-  sf_cleaned <- make_clean_names(sf_names[1:n_cols], ...)
+  sf_cleaned <- make_clean_names(sf_names[cols_to_rename], ...)
   # rename original df
-  names(dat)[1:n_cols] <- sf_cleaned
+  names(dat)[cols_to_rename] <- sf_cleaned
+
+  if(set_labels){
+    for (i in seq_along(sf_names[cols_to_rename])){
+      attr(dat[[i]], "label") <- sf_names[[i]]
+    }
+  }
 
-  return(dat)
+  dat
 }
 
 #' @rdname clean_names
@@ -116,6 +131,7 @@ clean_names.tbl_graph <- function(dat, ...) {
       call. = FALSE
     )
   } # nocov end
+
   dplyr::rename_all(dat, .funs = make_clean_names, ...)
 }
 

diff --git a/R/get_dupes.R b/R/get_dupes.R
@@ -22,7 +22,7 @@
 #' mtcars %>% get_dupes(-c(wt, qsec))
 #' mtcars %>% get_dupes(starts_with("cy"))
 #' @importFrom tidyselect eval_select
-#' @importFrom rlang expr dots_n syms
+#' @importFrom rlang expr dots_n syms %||%
 get_dupes <- function(dat, ...) {
   expr <- rlang::expr(c(...))
   pos <- tidyselect::eval_select(expr, data = dat)

diff --git a/R/print_tabyl.R b/R/print_tabyl.R
@@ -1,5 +1,4 @@
 #' @export
-
 print.tabyl <- function(x, ...) {
   print.data.frame(x, row.names = FALSE)
 }
diff --git a/R/remove_empties.R b/R/remove_empties.R
@@ -7,8 +7,8 @@
 #' @param which one of "rows", "cols", or `c("rows", "cols")`. Where no
 #'   value of which is provided, defaults to removing both empty rows and empty
 #'   columns, declaring the behavior with a printed message.
-#' @param cutoff What fraction (>0 to <=1) of rows or columns must be empty to
-#'   be removed?
+#' @param cutoff a row/col should have more than this fraction of non-NA values to be
+#'   retained.  E.g., `cutoff = 0.8` means that rows/cols that are 20% or more missing will be dropped.
 #' @param quiet Should messages be suppressed (`TRUE`) or printed
 #'   (`FALSE`) indicating the summary of empty columns or rows removed?
 #' @return Returns the object without its missing rows or columns.

diff --git a/R/sas_dates.R b/R/sas_dates.R
@@ -16,11 +16,16 @@
 #' sas_numeric_to_date(time_num = 3600) # 01:00:00
 #' @family date-time cleaning
 #' @export
-sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "") {
+sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "UTC") {
   # Confirm that a usable set of input arguments is given
   has_date <- !missing(date_num)
   has_datetime <- !missing(datetime_num)
   has_time <- !missing(time_num)
+  stopifnot(is.character(tz))
+  stopifnot(length(tz) == 1)
+  if (tz != "UTC") {
+    warning("SAS may not properly store timezones other than UTC. Consider confirming the accuracy of the resulting data.")
+  }
   if (has_date & has_datetime) {
     stop("Must not give both `date_num` and `datetime_num`")
   } else if (has_time & has_datetime) {
@@ -37,8 +42,10 @@ sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "") {
     if (!all(mask_na_match)) {
       stop("The same values are not NA for both `date_num` and `time_num`")
     }
-    ret <- as.POSIXct(86400 * date_num + time_num, origin = "1960-01-01", tz = tz)
-  } else if (has_datetime) {
+    datetime_num <- 86400 * date_num + time_num
+    has_datetime <- TRUE
+  }
+  if (has_datetime) {
     ret <- as.POSIXct(datetime_num, origin = "1960-01-01", tz = tz)
   } else if (has_date) {
     ret <- as.Date(date_num, origin = "1960-01-01")