Skip to content

Commit b77958e

Browse files
version 1.0.0 (#84)
* use fr, update codec_check, drivetime standalone * check_* functions can also take a tibble or list * use vroom to test read csv file (this prevents the addition of the "col_types" attribute to the tibble) * add crime risk data * simplify specs documentation; update version * don't use articles * add shiny app Co-authored-by: Andrew Vancil <[email protected]>
1 parent 261a904 commit b77958e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2084
-2612
lines changed

DESCRIPTION

+11-13
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
Package: codec
22
Title: Community Data Explorer for Cincinnati
3-
Version: 0.7.2
3+
Version: 1.0.0
44
Authors@R: c(
55
person("Cole", "Brokamp",
66
email = "[email protected]",
77
role = c("aut", "cre")),
88
person("Erika", "Manning",
9+
role = "aut"),
10+
person("Andrew", "Vancil",
911
role = "aut")
1012
)
1113
Description: codec provides tools for working with metadata in R and storing it alongside data in a YAML file. This package serves as the definition of the CoDEC data specifications and provides helpers to contribute and validate CoDEC data.
@@ -18,33 +20,29 @@ Suggests:
1820
roxygen2,
1921
knitr,
2022
rmarkdown,
23+
dplyr,
2124
curl,
2225
glue,
23-
mapview,
24-
gh,
25-
DT,
26-
leaflet,
2726
callr,
2827
downloadthis,
29-
cincy (>= 1.0.2),
28+
tibble,
3029
bsplus
3130
Remotes:
32-
geomarker-io/cincy
31+
geomarker-io/cincy,
32+
cole-brokamp/fr
3333
Config/testthat/edition: 3
3434
URL: https://github.com/geomarker-io/codec,
3535
http://geomarker.io/codec/
3636
BugReports: https://github.com/geomarker-io/codec/issues
3737
Imports:
38-
dplyr,
39-
forcats,
4038
fs,
4139
purrr (>= 1.0.0),
42-
readr,
43-
rlang (>= 0.4.11),
4440
stringi,
4541
stringr,
46-
tibble,
47-
yaml,
42+
yaml,
43+
vroom,
44+
cincy (>= 1.1.0),
45+
fr (>= 0.4.0),
4846
sf
4947
VignetteBuilder: knitr
5048
Depends:

NAMESPACE

+1-23
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,5 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export(":=")
4-
export(.data)
5-
export(add_attr_from_tdr)
6-
export(add_attrs)
7-
export(add_col_attrs)
8-
export(add_type_attrs)
9-
export(as_label)
10-
export(as_name)
113
export(check_codec_tdr)
124
export(check_codec_tdr_csv)
135
export(check_files)
@@ -16,18 +8,4 @@ export(check_tdr_path)
168
export(codec_colors)
179
export(codec_data)
1810
export(codec_tdr)
19-
export(enquo)
20-
export(enquos)
21-
export(glimpse_attr)
22-
export(glimpse_schema)
23-
export(glimpse_tdr)
24-
export(read_tdr)
25-
export(read_tdr_csv)
26-
export(write_tdr)
27-
export(write_tdr_csv)
28-
importFrom(rlang,":=")
29-
importFrom(rlang,.data)
30-
importFrom(rlang,as_label)
31-
importFrom(rlang,as_name)
32-
importFrom(rlang,enquo)
33-
importFrom(rlang,enquos)
11+
importFrom(cincy,interpolate)

R/attributes.R

-84
This file was deleted.

R/codec_check.R

+38-42
Original file line numberDiff line numberDiff line change
@@ -9,63 +9,51 @@
99
#' - the data contains a year (or year and month) column(s)
1010
#' - all fields in the CSV data are described in the metadata and vice-versa
1111
#' See `vignette("codec-specs")` for the CoDEC specifications.
12-
#' @param tdr a codec tabular-data-resource
13-
#' @param tdr_md a codec tabular-data-resource metadata list object
12+
#' @param x a codec fr_tdr object (or data frame for check_census_tract_id(), check_date()
13+
#' and a list for check_codec_tdr())
1414
#' @param path path to tdr folder
1515
#' @param name the name field from tabular-data-resource.yaml
1616
#' @return for `check_codec_tdr_csv`, a tibble with added
1717
#' tabular-data-resource attributes (equivalent to read_tdr_csv with `codec = TRUE`)
18+
#' @importFrom cincy interpolate
1819
#' @export
1920
check_codec_tdr_csv <- function(path) {
2021
check_files(path)
21-
tdr <- read_tdr(path)$tdr
22-
check_codec_tdr(tdr)
22+
d <- fr::read_fr_tdr(fs::path(path, "tabular-data-resource.yaml"))
2323

24-
md_fields <- names(tdr$schema$fields)
25-
d_fields <- names(readr::read_csv(read_tdr(path)$csv_file, n_max = 0, show_col_types = FALSE))
26-
if(! all(d_fields %in% md_fields)) {
27-
stop("the metadata does not describe all fields in the data", call. = FALSE)
28-
}
29-
if(! all(md_fields %in% d_fields)) {
30-
stop("the metadata describes fields that are not in the data", call. = FALSE)
31-
}
32-
33-
tdr_d <- read_tdr_csv(path)
34-
check_data(tdr_d)
35-
return(invisible(tdr_d))
36-
}
24+
check_codec_tdr(as.list(d))
25+
check_census_tract_id(as.data.frame(d))
26+
check_date(as.data.frame(d))
3727

38-
#' Check data
39-
#' @rdname check_codec_tdr_csv
40-
check_data <- function(tdr) {
41-
check_census_tract_id(tdr)
42-
check_date(tdr)
28+
return(invisible(d))
4329
}
4430

4531
#' Check census tract id column
4632
#' @rdname check_codec_tdr_csv
47-
check_census_tract_id <- function(tdr) {
33+
check_census_tract_id <- function(x) {
4834
census_tract_id_names <- paste0("census_tract_id", c("_2000", "_2010", "_2020"))
35+
tdr_data <- as.data.frame(x)
36+
tdr_data_names <- names(tdr_data)
4937

5038
# has census_tract_id_{year} or census_tract_id column
51-
if (!any(names(tdr) %in% census_tract_id_names)) {
39+
if (!any(tdr_data_names %in% census_tract_id_names)) {
5240
stop("must contain a census tract id column called census_tract_id_2000, census_tract_id_2010, or census_tract_id_2020", call. = FALSE)
5341
}
5442

5543
# make sure only one tract column
56-
if (sum(names(tdr) %in% census_tract_id_names) > 1) {
44+
if (sum(tdr_data_names %in% census_tract_id_names) > 1) {
5745
stop("must contain only one census tract id column", call. = FALSE)
5846
}
5947

60-
census_tract_id_name <- census_tract_id_names[census_tract_id_names %in% names(tdr)]
48+
census_tract_id_name <- census_tract_id_names[census_tract_id_names %in% tdr_data_names]
6149
census_tract_id_year <- stringr::str_extract(census_tract_id_name, "[0-9]+")
6250

6351
required_census_tract_ids <-
6452
parse(text = paste0("cincy::tract_tigris_", census_tract_id_year)) |>
6553
eval() |>
6654
purrr::pluck(paste0("census_tract_id_", census_tract_id_year))
6755

68-
if (!all(required_census_tract_ids %in% tdr[[census_tract_id_name]])) {
56+
if (!all(required_census_tract_ids %in% tdr_data[[census_tract_id_name]])) {
6957
stop("the census tract id column, ",
7058
census_tract_id_name,
7159
", does not contain every census tract in ",
@@ -74,28 +62,31 @@ check_census_tract_id <- function(tdr) {
7462
)
7563
}
7664

77-
return(invisible(tdr))
65+
return(invisible(x))
7866
}
7967

80-
#' Check date
68+
#' Check year or year-month column
8169
#' @rdname check_codec_tdr_csv
82-
check_date <- function(tdr) {
70+
check_date <- function(x) {
71+
72+
tdr_data <- as.data.frame(x)
73+
tdr_data_names <- names(tdr_data)
8374

84-
if (! "year" %in% names(tdr)) {
75+
if (! "year" %in% tdr_data_names) {
8576
stop("must contain a 'year' column", call. = FALSE)
8677
}
8778

88-
years <- unique(tdr$year)
89-
if (! identical(years, as.integer(years))) {
90-
stop("the 'year' field must only contain integer years", call. = FALSE)
79+
years <- unique(tdr_data$year)
80+
if (! all(years %in% 1970:2099)) {
81+
stop("the 'year' field must only contain integer years between 1970 and 2099", call. = FALSE)
9182
}
9283

93-
if ("month" %in% names(tdr)) {
94-
if (! all(tdr$month %in% 1:12)) {
84+
if ("month" %in% tdr_data_names) {
85+
if (! all(tdr_data$month %in% 1:12)) {
9586
stop("the 'month' field must only contain integer values 1-12", call. = FALSE)
9687
}
9788
}
98-
return(invisible(tdr))
89+
return(invisible(x))
9990
}
10091

10192
#' Check files
@@ -129,17 +120,18 @@ check_files <- function(path) {
129120

130121
# try to read (first 100 lines of) CSV file
131122
test_read_csv_file <-
132-
purrr::safely(readr::read_csv)(
123+
purrr::safely(vroom::vroom)(
133124
file = tdr_csv,
125+
delim = ",",
134126
n_max = 100,
135127
col_names = TRUE,
136128
show_col_types = FALSE,
137-
locale = readr::locale(
129+
locale = vroom::locale(
138130
encoding = "UTF-8",
139131
decimal_mark = ".",
140132
grouping_mark = "",
141133
),
142-
name_repair = "check_unique",
134+
.name_repair = "check_unique",
143135
)
144136

145137
if (!is.null(test_read_csv_file$error)) {
@@ -152,7 +144,9 @@ check_files <- function(path) {
152144
#' check CoDEC tdr
153145
#' @rdname check_codec_tdr_csv
154146
#' @export
155-
check_codec_tdr <- function(tdr_md) {
147+
check_codec_tdr <- function(x) {
148+
149+
tdr_md <- as.list(x)
156150

157151
# must have "name" and "path" descriptors
158152
if (!purrr::pluck_exists(tdr_md, "name")) stop("`name` property descriptor is required", call. = FALSE)
@@ -212,7 +206,7 @@ check_codec_tdr <- function(tdr_md) {
212206
)
213207
}
214208

215-
return(invisible(tdr_md))
209+
return(invisible(x))
216210
}
217211

218212

@@ -242,6 +236,8 @@ check_tdr_path <- function(path) {
242236
# path ends with .csv
243237
if (! fs::path_ext(path) == "csv") stop("'path' must end with '.csv'", call. = FALSE)
244238
# path can be a URL
239+
240+
is_url <- function(.x) grepl("^((http|ftp)s?|sftp)://", .x)
245241
if (is_url(path)) return(invisible(NULL))
246242
# if not URL, check for absolute path
247243
if (fs::is_absolute_path(path)) stop("'path' must be a relative file path")

0 commit comments

Comments
 (0)