Skip to content

Commit 395a872

Browse files
committed
update acs_measures
- added back in more complex calculations; closes #106 - eliminated rounding - less strict about types of variables
1 parent 2fb69bb commit 395a872

File tree

5 files changed

+251
-199
lines changed

5 files changed

+251
-199
lines changed
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# American Community Survey Measures
2+
3+
[![latest github release for acs_measures dpkg](https://img.shields.io/github/v/release/geomarker-io/codec?sort=date&filter=acs_measures-*&display_name=tag&label=%5B%E2%98%B0%5D&labelColor=%238CB4C3&color=%23396175)](https://github.com/geomarker-io/codec/releases?q=acs_measures&expanded=false)
4+
5+
## About
6+
7+
Select population, socioeconomic, and housing measures derived from the U.S. Census American Community Survey (ACS) are generally available from 2013 through 2022 and were selected to cover three domains:
8+
9+
- **Population**: number of kids, number of households (with kids), single-parent households, racial and ethnic composition, limited English speaking households
10+
- **Socioeconomic**: education, income, poverty, employment, health insurance, public income assistance
11+
- **Housing**: vacancy, age, value, tenure, rent to income ratio
12+
13+
## Data
14+
15+
### Types of ACS measures
16+
17+
Measures and their definitions are contained in `hh_acs_measures.R`.
18+
Generally, each of the derived ACS measures are expressed in one of three ways:
19+
20+
1. measures starting with **`n_`** represent a **number** of something, such as `n_household`
21+
22+
2. measures starting with **`prop_`** represent a **proportion** of some total, such as `prop_poverty`
23+
24+
3. measures starting with **`median_`** represent a **median** of a number in a population, such as `median_income`
25+
26+
### Missing Data
27+
28+
Some data are unavailable in certain earlier years because the question was not included in the ACS that year. Additionally, data summaries may be [suppressed ](https://www.census.gov/programs-surveys/acs/technical-documentation/data-suppression.html) by the census bureau.
+222
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
if (tryCatch(read.dcf("DESCRIPTION")[1, "Package"] == "codec", finally = FALSE)) {
2+
devtools::load_all()
3+
} else {
4+
library(codec)
5+
}
6+
message("Using CoDEC, version ", packageVersion("codec"))
7+
8+
library(dplyr, warn.conflicts = FALSE)
9+
10+
all_acs5_variables <-
11+
dpkg::stow("https://api.census.gov/data/2022/acs/acs5/variables.json") |>
12+
jsonlite::read_json()
13+
14+
get_acs_5yr_data <- function(acs_variables, state = "39", county = "061", year = "2022") {
15+
if (Sys.getenv("CENSUS_API_KEY") == "") stop("set CENSUS_API_KEY enviroment variable")
16+
cli::cli_alert_info(glue::glue("getting {paste(acs_variables, collapse = ', ')}; defined as:"))
17+
all_acs5_variables$variables[acs_variables] |>
18+
vapply(\(.) paste(.$concept, .$label, collapse = "!!"), character(1)) |>
19+
stats::setNames(rep_len("*", length(acs_variables))) |>
20+
cli::cli_bullets()
21+
the_resp <-
22+
httr2::request("https://api.census.gov/data") |>
23+
httr2::req_url_path_append(year) |>
24+
httr2::req_url_path_append("acs") |>
25+
httr2::req_url_path_append("acs5") |>
26+
httr2::req_url_query(get = acs_variables, .multi = "comma") |>
27+
## httr2::req_url_query(`for` = "block group:*") |>
28+
## httr2::req_url_query(`in` = glue::glue("state:{state} county:{county} tract:*")) |>
29+
httr2::req_url_query(`for` = "tract:*") |>
30+
httr2::req_url_query(`in` = glue::glue("state:{state} county:{county}")) |>
31+
httr2::req_url_query(`key` = Sys.getenv("CENSUS_API_KEY")) |>
32+
httr2::req_retry() |>
33+
httr2::req_perform() |>
34+
httr2::resp_body_json()
35+
out <-
36+
the_resp |>
37+
purrr::discard_at(1) |>
38+
purrr::list_transpose() |>
39+
stats::setNames(the_resp[[1]]) |>
40+
tibble::as_tibble() |>
41+
dplyr::mutate(dplyr::across(tidyselect::all_of(acs_variables), as.numeric),
42+
census_tract_id_2020 = paste0(state, county, tract),
43+
## census_blockgroup_id_2020 = paste0(state, county, tract, .data$`block group`),
44+
.keep = "none"
45+
)
46+
out <-
47+
out |>
48+
dplyr::mutate(dplyr::across(tidyselect::all_of(acs_variables), \(.) dplyr::na_if(., -666666666)))
49+
return(out)
50+
}
51+
52+
#' make ACS 5 year n data
53+
#' @param X formula: my_acs_var ~ B000000_000
54+
#' @examples
55+
#' make_acs_5y_n_data(n_households ~ B11005_001E)
56+
make_acs_5y_n_data <- function(x) {
57+
var_name <- as.list(as.formula(x))[[2]]
58+
var_census_name <- as.formula(x)[[3]]
59+
out <-
60+
get_acs_5yr_data(as.character(var_census_name)) |>
61+
dplyr::rename({{ var_name }} := {{ var_census_name }}) |>
62+
dplyr::mutate({{ var_name }} := {{ var_name }})
63+
return(out)
64+
}
65+
66+
#' make ACS 5 year percentage data
67+
#' @param x formula: my_acs_var ~ B00000_000 / B00000_000
68+
#' @examples
69+
#' make_acs_5y_prop_data(prcnt_poverty ~ B17001_002E / B17001_001E)
70+
make_acs_5y_prop_data <- function(x) {
71+
vars <- attr(terms(as.formula(x)), "variables")
72+
var_name <- as.list(as.formula(x))[[2]]
73+
var_numerator <- as.formula(x)[[3]][[2]]
74+
var_denominator <- as.formula(x)[[3]][[3]]
75+
out <-
76+
dplyr::left_join(
77+
get_acs_5yr_data(as.character(var_numerator)),
78+
get_acs_5yr_data(as.character(var_denominator)),
79+
by = "census_tract_id_2020"
80+
) |>
81+
dplyr::mutate(
82+
{{ var_name }} := {{ var_numerator }} / {{ var_denominator }},
83+
.keep = "unused"
84+
)
85+
return(out)
86+
}
87+
88+
make_acs_5y_data <- function(x) {
89+
if (inherits(as.formula(x)[[3]], "name")) {
90+
return(make_acs_5y_n_data(x))
91+
}
92+
return(make_acs_5y_prop_data(x))
93+
}
94+
95+
out <-
96+
list(
97+
n_households ~ B11005_001E,
98+
n_households_children ~ B11005_002E,
99+
n_housing_units ~ B25001_001E,
100+
median_home_value ~ B25077_001E,
101+
prop_poverty ~ B17001_002E / B17001_001E,
102+
prop_recieved_public_assistance_income ~ B19058_002E / B19058_001E,
103+
prop_family_households_with_single_householder ~ B11001_004E / B11001_002E,
104+
prop_employment_among_civilian_workforce ~ B23025_004E / B23025_003E,
105+
prop_housing_units_occupied_by_renters ~ B25003_003E / B25003_001E,
106+
prop_median_rent_to_income_ratio_among_renters ~ B25071_001E,
107+
prop_housing_units_vacant ~ B25002_003E / B25002_001E,
108+
prop_white_and_not_hispanic_or_latino ~ B03002_003E / B03002_001E,
109+
prop_black_and_not_hispanic_or_latino ~ B03002_004E / B03002_001E,
110+
prop_white_and_hispanic_or_latino ~ B03002_013E / B03002_001E,
111+
prop_black_and_hispanic_or_latino ~ B03002_014E / B03002_001E
112+
) |>
113+
purrr::map(make_acs_5y_data, .progress = "making acs data")
114+
115+
out$n_persons_under_18 <-
116+
get_acs_5yr_data(c(paste0("B01001_00", 3:6, "E"), paste0("B01001_0", 27:30, "E"))) |>
117+
rowwise() |>
118+
mutate(
119+
n_persons_under_18 = sum(c_across(-census_tract_id_2020)),
120+
.keep = "unused"
121+
) |>
122+
ungroup()
123+
124+
out$prop_health_insurance <-
125+
get_acs_5yr_data(c(
126+
"B27001_001E",
127+
paste0("B27001_00", c(4, 7), "E"),
128+
paste0("B27001_0", seq(10, 28, by = 3), "E"),
129+
paste0("B27001_0", seq(32, 56, by = 3), "E")
130+
)) |>
131+
rowwise() |>
132+
mutate(
133+
prop_health_insurance = sum(c_across(-c(B27001_001E, census_tract_id_2020))) / B27001_001E,
134+
.keep = "unused"
135+
) |>
136+
ungroup()
137+
138+
# rent at least 30% of income
139+
out$rent_burdened <-
140+
get_acs_5yr_data(
141+
c("B25070_001E", paste0("B25070_", c("007", "008", "009", "010"), "E"))
142+
) |>
143+
rowwise() |>
144+
mutate(
145+
prop_rent_burdened = sum(c_across(-c(B25070_001E, census_tract_id_2020))) / B25070_001E,
146+
.keep = "unused"
147+
) |>
148+
ungroup()
149+
150+
out$housing_conditions <-
151+
get_acs_5yr_data(
152+
c(
153+
"B25123_001E",
154+
paste0("B25123_00", 3:6, "E"),
155+
"B25123_009E",
156+
paste0("B25123_0", 10:12, "E")
157+
)
158+
) |>
159+
rowwise() |>
160+
mutate(
161+
prop_housing_conditions = sum(c_across(-c(B25123_001E, census_tract_id_2020))) / B25123_001E,
162+
.keep = "unused"
163+
) |>
164+
ungroup()
165+
166+
out$housing_age <-
167+
get_acs_5yr_data(
168+
c(
169+
"B25034_001E",
170+
paste0("B25034_00", 7:9, "E"),
171+
"B25034_010E"
172+
)
173+
) |>
174+
rowwise() |>
175+
mutate(
176+
prop_built_prior_1980 = sum(c_across(-c(B25034_001E, census_tract_id_2020))) / B25034_001E,
177+
.keep = "unused"
178+
) |>
179+
ungroup()
180+
181+
182+
out$language <-
183+
get_acs_5yr_data(
184+
c(
185+
"C16002_001E",
186+
paste0("C16002_00", c(4, 7), "E"),
187+
paste0("C16002_01", c(0, 3), "E")
188+
)
189+
) |>
190+
rowwise() |>
191+
mutate(
192+
prop_limited_english_speaking = sum(c_across(-c(C16002_001E, census_tract_id_2020))) / C16002_001E,
193+
.keep = "unused"
194+
) |>
195+
ungroup()
196+
197+
# adults with at least high school education
198+
out$edu <-
199+
get_acs_5yr_data(
200+
c("B15003_001E", paste0("B15003_0", 17:25, "E"))
201+
) |>
202+
rowwise() |>
203+
mutate(
204+
prop_adults_hs_edu = sum(c_across(-c(B15003_001E, census_tract_id_2020))) / B15003_001E,
205+
.keep = "unused"
206+
) |>
207+
ungroup()
208+
209+
out_tbl <- purrr::reduce(out, dplyr::left_join, by = "census_tract_id_2020")
210+
211+
out_dpkg <-
212+
out_tbl |>
213+
dplyr::mutate(year = 2022) |>
214+
as_codec_dpkg(
215+
name = "acs_measures",
216+
version = "0.1.0",
217+
title = "American Community Survey Measures",
218+
homepage = "https://github.com/geomarker-io/codec",
219+
description = paste(readLines(fs::path_package("codec", "codec_data", "acs_measures", "README.md")), collapse = "\n")
220+
)
221+
222+
dpkg::dpkg_gh_release(out_dpkg, draft = FALSE)

inst/codec_data/hh_acs_measures/README.md

-53
This file was deleted.

0 commit comments

Comments
 (0)