9
9
# ' - the data contains a year (or year and month) column(s)
10
10
# ' - all fields in the CSV data are described in the metadata and vice-versa
11
11
# ' See `vignette("codec-specs")` for the CoDEC specifications.
12
- # ' @param tdr a codec tabular- data-resource
13
- # ' @param tdr_md a codec tabular-data-resource metadata list object
12
+ # ' @param x a codec fr_tdr object (or data frame for check_census_tract_id(), check_date()
13
+ # ' and a list for check_codec_tdr())
14
14
# ' @param path path to tdr folder
15
15
# ' @param name the name field from tabular-data-resource.yaml
16
16
# ' @return for `check_codec_tdr_csv`, a tibble with added
17
17
# ' tabular-data-resource attributes (equivalent to read_tdr_csv with `codec = TRUE`)
18
+ # ' @importFrom cincy interpolate
18
19
# ' @export
19
20
check_codec_tdr_csv <- function (path ) {
20
21
check_files(path )
21
- tdr <- read_tdr(path )$ tdr
22
- check_codec_tdr(tdr )
22
+ d <- fr :: read_fr_tdr(fs :: path(path , " tabular-data-resource.yaml" ))
23
23
24
- md_fields <- names(tdr $ schema $ fields )
25
- d_fields <- names(readr :: read_csv(read_tdr(path )$ csv_file , n_max = 0 , show_col_types = FALSE ))
26
- if (! all(d_fields %in% md_fields )) {
27
- stop(" the metadata does not describe all fields in the data" , call. = FALSE )
28
- }
29
- if (! all(md_fields %in% d_fields )) {
30
- stop(" the metadata describes fields that are not in the data" , call. = FALSE )
31
- }
32
-
33
- tdr_d <- read_tdr_csv(path )
34
- check_data(tdr_d )
35
- return (invisible (tdr_d ))
36
- }
24
+ check_codec_tdr(as.list(d ))
25
+ check_census_tract_id(as.data.frame(d ))
26
+ check_date(as.data.frame(d ))
37
27
38
- # ' Check data
39
- # ' @rdname check_codec_tdr_csv
40
- check_data <- function (tdr ) {
41
- check_census_tract_id(tdr )
42
- check_date(tdr )
28
+ return (invisible (d ))
43
29
}
44
30
45
31
# ' Check census tract id column
46
32
# ' @rdname check_codec_tdr_csv
47
- check_census_tract_id <- function (tdr ) {
33
+ check_census_tract_id <- function (x ) {
48
34
census_tract_id_names <- paste0(" census_tract_id" , c(" _2000" , " _2010" , " _2020" ))
35
+ tdr_data <- as.data.frame(x )
36
+ tdr_data_names <- names(tdr_data )
49
37
50
38
# has census_tract_id_{year} or census_tract_id column
51
- if (! any(names( tdr ) %in% census_tract_id_names )) {
39
+ if (! any(tdr_data_names %in% census_tract_id_names )) {
52
40
stop(" must contain a census tract id column called census_tract_id_2000, census_tract_id_2010, or census_tract_id_2020" , call. = FALSE )
53
41
}
54
42
55
43
# make sure only one tract column
56
- if (sum(names( tdr ) %in% census_tract_id_names ) > 1 ) {
44
+ if (sum(tdr_data_names %in% census_tract_id_names ) > 1 ) {
57
45
stop(" must contain only one census tract id column" , call. = FALSE )
58
46
}
59
47
60
- census_tract_id_name <- census_tract_id_names [census_tract_id_names %in% names( tdr ) ]
48
+ census_tract_id_name <- census_tract_id_names [census_tract_id_names %in% tdr_data_names ]
61
49
census_tract_id_year <- stringr :: str_extract(census_tract_id_name , " [0-9]+" )
62
50
63
51
required_census_tract_ids <-
64
52
parse(text = paste0(" cincy::tract_tigris_" , census_tract_id_year )) | >
65
53
eval() | >
66
54
purrr :: pluck(paste0(" census_tract_id_" , census_tract_id_year ))
67
55
68
- if (! all(required_census_tract_ids %in% tdr [[census_tract_id_name ]])) {
56
+ if (! all(required_census_tract_ids %in% tdr_data [[census_tract_id_name ]])) {
69
57
stop(" the census tract id column, " ,
70
58
census_tract_id_name ,
71
59
" , does not contain every census tract in " ,
@@ -74,28 +62,31 @@ check_census_tract_id <- function(tdr) {
74
62
)
75
63
}
76
64
77
- return (invisible (tdr ))
65
+ return (invisible (x ))
78
66
}
79
67
80
- # ' Check date
68
+ # ' Check year or year-month column
81
69
# ' @rdname check_codec_tdr_csv
82
- check_date <- function (tdr ) {
70
+ check_date <- function (x ) {
71
+
72
+ tdr_data <- as.data.frame(x )
73
+ tdr_data_names <- names(tdr_data )
83
74
84
- if (! " year" %in% names( tdr ) ) {
75
+ if (! " year" %in% tdr_data_names ) {
85
76
stop(" must contain a 'year' column" , call. = FALSE )
86
77
}
87
78
88
- years <- unique(tdr $ year )
89
- if (! identical (years , as.integer( years ) )) {
90
- stop(" the 'year' field must only contain integer years" , call. = FALSE )
79
+ years <- unique(tdr_data $ year )
80
+ if (! all (years %in% 1970 : 2099 )) {
81
+ stop(" the 'year' field must only contain integer years between 1970 and 2099 " , call. = FALSE )
91
82
}
92
83
93
- if (" month" %in% names( tdr ) ) {
94
- if (! all(tdr $ month %in% 1 : 12 )) {
84
+ if (" month" %in% tdr_data_names ) {
85
+ if (! all(tdr_data $ month %in% 1 : 12 )) {
95
86
stop(" the 'month' field must only contain integer values 1-12" , call. = FALSE )
96
87
}
97
88
}
98
- return (invisible (tdr ))
89
+ return (invisible (x ))
99
90
}
100
91
101
92
# ' Check files
@@ -129,17 +120,18 @@ check_files <- function(path) {
129
120
130
121
# try to read (first 100 lines of) CSV file
131
122
test_read_csv_file <-
132
- purrr :: safely(readr :: read_csv )(
123
+ purrr :: safely(vroom :: vroom )(
133
124
file = tdr_csv ,
125
+ delim = " ," ,
134
126
n_max = 100 ,
135
127
col_names = TRUE ,
136
128
show_col_types = FALSE ,
137
- locale = readr :: locale(
129
+ locale = vroom :: locale(
138
130
encoding = " UTF-8" ,
139
131
decimal_mark = " ." ,
140
132
grouping_mark = " " ,
141
133
),
142
- name_repair = " check_unique" ,
134
+ . name_repair = " check_unique" ,
143
135
)
144
136
145
137
if (! is.null(test_read_csv_file $ error )) {
@@ -152,7 +144,9 @@ check_files <- function(path) {
152
144
# ' check CoDEC tdr
153
145
# ' @rdname check_codec_tdr_csv
154
146
# ' @export
155
- check_codec_tdr <- function (tdr_md ) {
147
+ check_codec_tdr <- function (x ) {
148
+
149
+ tdr_md <- as.list(x )
156
150
157
151
# must have "name" and "path" descriptors
158
152
if (! purrr :: pluck_exists(tdr_md , " name" )) stop(" `name` property descriptor is required" , call. = FALSE )
@@ -212,7 +206,7 @@ check_codec_tdr <- function(tdr_md) {
212
206
)
213
207
}
214
208
215
- return (invisible (tdr_md ))
209
+ return (invisible (x ))
216
210
}
217
211
218
212
@@ -242,6 +236,8 @@ check_tdr_path <- function(path) {
242
236
# path ends with .csv
243
237
if (! fs :: path_ext(path ) == " csv" ) stop(" 'path' must end with '.csv'" , call. = FALSE )
244
238
# path can be a URL
239
+
240
+ is_url <- function (.x ) grepl(" ^((http|ftp)s?|sftp)://" , .x )
245
241
if (is_url(path )) return (invisible (NULL ))
246
242
# if not URL, check for absolute path
247
243
if (fs :: is_absolute_path(path )) stop(" 'path' must be a relative file path" )
0 commit comments