Plot 1
### Source of all joy from this book
### https://walker-data.com/census-r/
library(tidycensus)
## census_api_key("a8578c37366fe6218e255d7178b4a5bac5f67566", install=TRUE, overwrite=TRUE)
total_population_10 <- get_decennial(
geography = "state",
variables = "P001001",
year = 2010
)
total_population_10
## # A tibble: 52 × 4
## GEOID NAME variable value
## <chr> <chr> <chr> <dbl>
## 1 01 Alabama P001001 4779736
## 2 02 Alaska P001001 710231
## 3 04 Arizona P001001 6392017
## 4 05 Arkansas P001001 2915918
## 5 06 California P001001 37253956
## 6 22 Louisiana P001001 4533372
## 7 21 Kentucky P001001 4339367
## 8 08 Colorado P001001 5029196
## 9 09 Connecticut P001001 3574097
## 10 10 Delaware P001001 897934
## # ℹ 42 more rows
aian_2020 <- get_decennial(
geography = "state",
variables = "P1_005N",
year = 2020,
sumfile = "pl"
)
aian_2020
## # A tibble: 52 × 4
## GEOID NAME variable value
## <chr> <chr> <chr> <dbl>
## 1 42 Pennsylvania P1_005N 31052
## 2 06 California P1_005N 631016
## 3 54 West Virginia P1_005N 3706
## 4 49 Utah P1_005N 41644
## 5 36 New York P1_005N 149690
## 6 11 District of Columbia P1_005N 3193
## 7 02 Alaska P1_005N 111575
## 8 12 Florida P1_005N 94795
## 9 45 South Carolina P1_005N 24303
## 10 38 North Dakota P1_005N 38914
## # ℹ 42 more rows
born_in_mexico <- get_acs(
geography = "state",
variables = "B05006_150",
year = 2020
)
born_in_mexico
## # A tibble: 52 × 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 01 Alabama B05006_150 46927 1846
## 2 02 Alaska B05006_150 4181 709
## 3 04 Arizona B05006_150 510639 8028
## 4 05 Arkansas B05006_150 60236 2182
## 5 06 California B05006_150 3962910 25353
## 6 08 Colorado B05006_150 215778 4888
## 7 09 Connecticut B05006_150 28086 2144
## 8 10 Delaware B05006_150 14616 1065
## 9 11 District of Columbia B05006_150 4026 761
## 10 12 Florida B05006_150 257933 6418
## # ℹ 42 more rows
wi_income <- get_acs(
geography = "county",
variables = "B19013_001",
state = "WI",
year = 2020
)
wi_income
## # A tibble: 72 × 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 55001 Adams County, Wisconsin B19013_001 48906 2387
## 2 55003 Ashland County, Wisconsin B19013_001 47869 3190
## 3 55005 Barron County, Wisconsin B19013_001 52346 2092
## 4 55007 Bayfield County, Wisconsin B19013_001 57257 2496
## 5 55009 Brown County, Wisconsin B19013_001 64728 1419
## 6 55011 Buffalo County, Wisconsin B19013_001 58364 1871
## 7 55013 Burnett County, Wisconsin B19013_001 53555 2513
## 8 55015 Calumet County, Wisconsin B19013_001 76065 2314
## 9 55017 Chippewa County, Wisconsin B19013_001 61215 2064
## 10 55019 Clark County, Wisconsin B19013_001 54463 1089
## # ℹ 62 more rows
bexar_income <- get_acs(
geography = "tract",
variables = "B19013_001",
state = "TX",
county = "Bexar",
year = 2020
)
bexar_income
## # A tibble: 375 × 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 48029110100 Census Tract 1101, Bexar County, Texas B19013_… 52659 14024
## 2 48029110300 Census Tract 1103, Bexar County, Texas B19013_… 43875 19091
## 3 48029110500 Census Tract 1105, Bexar County, Texas B19013_… 10518 2062
## 4 48029110600 Census Tract 1106, Bexar County, Texas B19013_… 16712 2398
## 5 48029110700 Census Tract 1107, Bexar County, Texas B19013_… 18700 8599
## 6 48029111000 Census Tract 1110, Bexar County, Texas B19013_… 48683 6892
## 7 48029111100 Census Tract 1111, Bexar County, Texas B19013_… 55179 10341
## 8 48029120100 Census Tract 1201, Bexar County, Texas B19013_… 80781 16543
## 9 48029120301 Census Tract 1203.01, Bexar County, Texas B19013_… 123212 13430
## 10 48029120302 Census Tract 1203.02, Bexar County, Texas B19013_… 190865 59075
## # ℹ 365 more rows
v16 <- load_variables(2016, "acs5", cache = TRUE)
dim(v16)
## [1] 22816 4
v20 <- load_variables(2020, "acs5", cache = TRUE)
dim(v20)
## [1] 27850 4
tx_sex_hisp <- get_estimates(
geography = "state",
product = "characteristics",
breakdown = c("SEX", "HISP"),
breakdown_labels = TRUE,
state = "TX",
year = 2019
)
tx_sex_hisp
## # A tibble: 9 × 5
## GEOID NAME value SEX HISP
## <chr> <chr> <dbl> <chr> <chr>
## 1 48 Texas 28995881 Both sexes Both Hispanic Origins
## 2 48 Texas 17470303 Both sexes Non-Hispanic
## 3 48 Texas 11525578 Both sexes Hispanic
## 4 48 Texas 14402702 Male Both Hispanic Origins
## 5 48 Texas 8601893 Male Non-Hispanic
## 6 48 Texas 5800809 Male Hispanic
## 7 48 Texas 14593179 Female Both Hispanic Origins
## 8 48 Texas 8868410 Female Non-Hispanic
## 9 48 Texas 5724769 Female Hispanic
bexar_migration <- get_flows(
geography = "county",
state = "TX",
county = "Bexar",
year = 2019
)
bexar_migration
## # A tibble: 4,122 × 7
## GEOID1 GEOID2 FULL1_NAME FULL2_NAME variable estimate moe
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 48029 <NA> Bexar County, Texas Africa MOVEDIN 341 143
## 2 48029 <NA> Bexar County, Texas Africa MOVEDOUT NA NA
## 3 48029 <NA> Bexar County, Texas Africa MOVEDNET NA NA
## 4 48029 <NA> Bexar County, Texas Asia MOVEDIN 5412 1007
## 5 48029 <NA> Bexar County, Texas Asia MOVEDOUT NA NA
## 6 48029 <NA> Bexar County, Texas Asia MOVEDNET NA NA
## 7 48029 <NA> Bexar County, Texas Central America MOVEDIN 3641 1030
## 8 48029 <NA> Bexar County, Texas Central America MOVEDOUT NA NA
## 9 48029 <NA> Bexar County, Texas Central America MOVEDNET NA NA
## 10 48029 <NA> Bexar County, Texas Caribbean MOVEDIN 261 139
## # ℹ 4,112 more rows
library(tidycensus)
ga_wide <- get_acs(
geography = "county",
state = "Texas",
variables = c(medinc = "B19013_001",
medage = "B01002_001"),
output = "wide",
year = 2020
)
library(tidyverse)
options(scipen = 999)
ggplot(ga_wide, aes(x = medincE)) +
geom_histogram()+theme_bw(base_size=16)

metros <- get_acs(
geography = "cbsa",
variables = "DP03_0021P",
summary_var = "B01003_001",
survey = "acs1",
year = 2019
) %>%
slice_max(summary_est, n = 20)
metros %>%
mutate(NAME = str_remove(NAME, "-.*$")) %>%
mutate(NAME = str_remove(NAME, ",.*$")) %>%
ggplot(aes(y = reorder(NAME, estimate), x = estimate)) +
geom_col() +
theme_minimal() +
labs(title = "Public transit commute share",
subtitle = "2019 1-year ACS estimates",
y = "",
x = "ACS estimate",
caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package")

metros <- get_acs(
geography = "cbsa",
variables = "DP03_0021P",
summary_var = "B01003_001",
survey = "acs1",
year = 2018
) %>%
slice_max(summary_est, n = 20)
metros %>%
mutate(NAME = str_remove(NAME, "-.*$")) %>%
mutate(NAME = str_remove(NAME, ",.*$")) %>%
ggplot(aes(y = reorder(NAME, estimate), x = estimate)) +
geom_col() +
theme_minimal() +
labs(title = "Public transit commute share",
subtitle = "2018 1-year ACS estimates",
y = "",
x = "ACS estimate",
caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package")

library(scales)
metros %>%
mutate(NAME = str_remove(NAME, "-.*$")) %>%
mutate(NAME = str_remove(NAME, ",.*$")) %>%
ggplot(aes(y = reorder(NAME, estimate), x = estimate)) +
geom_col(color = "navy", fill = "navy",
alpha = 0.5, width = 0.85) +
theme_minimal(base_size = 12, base_family = "Verdana") +
scale_x_continuous(labels = label_percent(scale = 1)) +
labs(title = "Public transit commute share",
subtitle = "2019 1-year ACS estimates",
y = "",
x = "ACS estimate",
caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package")

tx_income <- get_acs(
state = "Texas",
geography = "county",
variables = c(hhincome = "B19013_001"),
year = 2020
) %>%
mutate(NAME = str_remove(NAME, " County, Texas"))
ggplot(tx_income, aes(x = estimate, y = reorder(NAME, estimate))) +
geom_point(size = 3, color = "darkgreen") +
labs(title = "Median household income",
subtitle = "Counties in TX",
x = "",
y = "ACS estimate") +
theme_minimal(base_size = 12.5) +
scale_x_continuous(labels = label_dollar())

years <- 2005:2019
names(years) <- years
bexar_value <- map_dfr(years, ~{
get_acs(
geography = "county",
variables = "B25077_001",
state = "TX",
county = "Bexar",
year = .x,
survey = "acs1"
)
}, .id = "year")
ggplot(bexar_value, aes(x = year, y = estimate, group = 1)) +
geom_ribbon(aes(ymax = estimate + moe, ymin = estimate - moe),
fill = "navy",
alpha = 0.4) +
geom_line(color = "navy") +
geom_point(color = "navy", size = 2) +
theme_minimal(base_size = 12) +
scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k")) +
labs(title = "Median home value in Bexar County, TX",
x = "Year",
y = "ACS estimate",
caption = "Shaded area represents margin of error around the ACS estimate")

tx1 <- get_estimates(
geography = "state",
state = "TX",
product = "characteristics",
breakdown = c("SEX", "AGEGROUP"),
breakdown_labels = TRUE,
year = 2019
)
tx_filtered <- filter(tx1, str_detect(AGEGROUP, "^Age"),
SEX != "Both sexes") %>%
mutate(value = ifelse(SEX == "Male", -value, value))
ggplot(tx_filtered, aes(x = value, y = AGEGROUP, fill = SEX)) +
geom_col()

housing_val <- get_acs(
geography = "tract",
variables = "B25077_001",
state = "TX",
county = c(
"Comal",
"Bexar",
"Austin",
"Dallas"
),
year = 2020
)
housing_val2 <- separate(
housing_val,
NAME,
into = c("tract", "county", "state"),
sep = ", "
)
housing_val2 %>%
group_by(county) %>%
summarize(min = min(estimate, na.rm = TRUE),
mean = mean(estimate, na.rm = TRUE),
median = median(estimate, na.rm = TRUE),
max = max(estimate, na.rm = TRUE))
## # A tibble: 4 × 5
## county min mean median max
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Austin County 100300 202550 182350 328900
## 2 Bexar County 54900 182630. 155500 909100
## 3 Comal County 83300 285488. 274100 500000
## 4 Dallas County 23600 263838. 181950 2000001
ggplot(housing_val2, aes(x = estimate, fill = county)) +
geom_density(alpha = 0.3)+theme_bw()

ggplot(housing_val2, aes(x = estimate)) +
geom_density(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~county) +
scale_x_continuous(labels = dollar_format(scale = 0.000001,
suffix = "m")) +
theme_minimal(base_size = 14) +
theme(axis.text.y = element_blank(),
axis.text.x = element_text(angle = 45)) +
labs(x = "ACS estimate",
y = "",
title = "Median home values by Census tract, 2015-2019 ACS")

library(ggbeeswarm)
tx_race_income <- get_acs(
geography = "tract",
state = "TX",
county = c("Comal", "Bexar", "Austin", "Dallas"),
variables = c(White = "B03002_003",
Black = "B03002_004",
Asian = "B03002_006",
Hispanic = "B03002_012"),
summary_var = "B19013_001",
year = 2020
) %>%
group_by(GEOID) %>%
filter(estimate == max(estimate, na.rm = TRUE)) %>%
ungroup() %>%
filter(estimate != 0)
ggplot(tx_race_income, aes(x = variable, y = summary_est, color = summary_est)) +
geom_quasirandom(alpha = 0.5) +
coord_flip() +
theme_minimal(base_size = 13) +
scale_color_viridis_c(guide = "none") +
scale_y_continuous(labels = label_dollar()) +
labs(x = "Largest group in Census tract",
y = "Median household income",
title = "Household income distribution by largest racial/ethnic group",
subtitle = "Census tracts, Texas City",
caption = "Data source: 2016-2020 ACS")

library(geofacet)
us_pyramid_data <- get_estimates(
geography = "state",
product = "characteristics",
breakdown = c("SEX", "AGEGROUP"),
breakdown_labels = TRUE,
year = 2019
) %>%
filter(str_detect(AGEGROUP, "^Age"),
SEX != "Both sexes") %>%
group_by(NAME) %>%
mutate(prop = value / sum(value, na.rm = TRUE)) %>%
ungroup() %>%
mutate(prop = ifelse(SEX == "Male", -prop, prop))
ggplot(us_pyramid_data, aes(x = prop, y = AGEGROUP, fill = SEX)) +
geom_col(width = 1) +
theme_minimal() +
scale_fill_manual(values = c("darkred", "navy")) +
facet_geo(~NAME, grid = "us_state_with_DC_PR_grid2",
label = "code") +
theme(axis.text = element_blank(),
strip.text.x = element_text(size = 8)) +
labs(x = "",
y = "",
title = "Population structure by age and sex",
fill = "",
caption = "Data source: US Census Bureau population estimates & tidycensus R package")
