1 awesome tidycensus 01

1.1 Plot 1

### Source of all joy from this book
### https://walker-data.com/census-r/
library(tidycensus)
## census_api_key("a8578c37366fe6218e255d7178b4a5bac5f67566", install=TRUE, overwrite=TRUE)

total_population_10 <- get_decennial(
  geography = "state", 
  variables = "P001001",
  year = 2010
)
total_population_10
## # A tibble: 52 × 4
##    GEOID NAME        variable    value
##    <chr> <chr>       <chr>       <dbl>
##  1 01    Alabama     P001001   4779736
##  2 02    Alaska      P001001    710231
##  3 04    Arizona     P001001   6392017
##  4 05    Arkansas    P001001   2915918
##  5 06    California  P001001  37253956
##  6 22    Louisiana   P001001   4533372
##  7 21    Kentucky    P001001   4339367
##  8 08    Colorado    P001001   5029196
##  9 09    Connecticut P001001   3574097
## 10 10    Delaware    P001001    897934
## # ℹ 42 more rows
aian_2020 <- get_decennial(
  geography = "state",
  variables = "P1_005N",
  year = 2020,
  sumfile = "pl"
)
aian_2020
## # A tibble: 52 × 4
##    GEOID NAME                 variable  value
##    <chr> <chr>                <chr>     <dbl>
##  1 42    Pennsylvania         P1_005N   31052
##  2 06    California           P1_005N  631016
##  3 54    West Virginia        P1_005N    3706
##  4 49    Utah                 P1_005N   41644
##  5 36    New York             P1_005N  149690
##  6 11    District of Columbia P1_005N    3193
##  7 02    Alaska               P1_005N  111575
##  8 12    Florida              P1_005N   94795
##  9 45    South Carolina       P1_005N   24303
## 10 38    North Dakota         P1_005N   38914
## # ℹ 42 more rows
born_in_mexico <- get_acs(
  geography = "state", 
  variables = "B05006_150",
  year = 2020
)
born_in_mexico
## # A tibble: 52 × 5
##    GEOID NAME                 variable   estimate   moe
##    <chr> <chr>                <chr>         <dbl> <dbl>
##  1 01    Alabama              B05006_150    46927  1846
##  2 02    Alaska               B05006_150     4181   709
##  3 04    Arizona              B05006_150   510639  8028
##  4 05    Arkansas             B05006_150    60236  2182
##  5 06    California           B05006_150  3962910 25353
##  6 08    Colorado             B05006_150   215778  4888
##  7 09    Connecticut          B05006_150    28086  2144
##  8 10    Delaware             B05006_150    14616  1065
##  9 11    District of Columbia B05006_150     4026   761
## 10 12    Florida              B05006_150   257933  6418
## # ℹ 42 more rows
wi_income <- get_acs(
  geography = "county", 
  variables = "B19013_001", 
  state = "WI",
  year = 2020
)

wi_income 
## # A tibble: 72 × 5
##    GEOID NAME                       variable   estimate   moe
##    <chr> <chr>                      <chr>         <dbl> <dbl>
##  1 55001 Adams County, Wisconsin    B19013_001    48906  2387
##  2 55003 Ashland County, Wisconsin  B19013_001    47869  3190
##  3 55005 Barron County, Wisconsin   B19013_001    52346  2092
##  4 55007 Bayfield County, Wisconsin B19013_001    57257  2496
##  5 55009 Brown County, Wisconsin    B19013_001    64728  1419
##  6 55011 Buffalo County, Wisconsin  B19013_001    58364  1871
##  7 55013 Burnett County, Wisconsin  B19013_001    53555  2513
##  8 55015 Calumet County, Wisconsin  B19013_001    76065  2314
##  9 55017 Chippewa County, Wisconsin B19013_001    61215  2064
## 10 55019 Clark County, Wisconsin    B19013_001    54463  1089
## # ℹ 62 more rows
bexar_income <- get_acs(
  geography = "tract", 
  variables = "B19013_001", 
  state = "TX", 
  county = "Bexar",
  year = 2020
)
bexar_income
## # A tibble: 375 × 5
##    GEOID       NAME                                      variable estimate   moe
##    <chr>       <chr>                                     <chr>       <dbl> <dbl>
##  1 48029110100 Census Tract 1101, Bexar County, Texas    B19013_…    52659 14024
##  2 48029110300 Census Tract 1103, Bexar County, Texas    B19013_…    43875 19091
##  3 48029110500 Census Tract 1105, Bexar County, Texas    B19013_…    10518  2062
##  4 48029110600 Census Tract 1106, Bexar County, Texas    B19013_…    16712  2398
##  5 48029110700 Census Tract 1107, Bexar County, Texas    B19013_…    18700  8599
##  6 48029111000 Census Tract 1110, Bexar County, Texas    B19013_…    48683  6892
##  7 48029111100 Census Tract 1111, Bexar County, Texas    B19013_…    55179 10341
##  8 48029120100 Census Tract 1201, Bexar County, Texas    B19013_…    80781 16543
##  9 48029120301 Census Tract 1203.01, Bexar County, Texas B19013_…   123212 13430
## 10 48029120302 Census Tract 1203.02, Bexar County, Texas B19013_…   190865 59075
## # ℹ 365 more rows
v16 <- load_variables(2016, "acs5", cache = TRUE)
dim(v16)
## [1] 22816     4
v20 <- load_variables(2020, "acs5", cache = TRUE)
dim(v20)
## [1] 27850     4
tx_sex_hisp <- get_estimates(
  geography = "state",
  product = "characteristics",
  breakdown = c("SEX", "HISP"),
  breakdown_labels = TRUE,
  state = "TX",
  year = 2019
)
tx_sex_hisp
## # A tibble: 9 × 5
##   GEOID NAME     value SEX        HISP                 
##   <chr> <chr>    <dbl> <chr>      <chr>                
## 1 48    Texas 28995881 Both sexes Both Hispanic Origins
## 2 48    Texas 17470303 Both sexes Non-Hispanic         
## 3 48    Texas 11525578 Both sexes Hispanic             
## 4 48    Texas 14402702 Male       Both Hispanic Origins
## 5 48    Texas  8601893 Male       Non-Hispanic         
## 6 48    Texas  5800809 Male       Hispanic             
## 7 48    Texas 14593179 Female     Both Hispanic Origins
## 8 48    Texas  8868410 Female     Non-Hispanic         
## 9 48    Texas  5724769 Female     Hispanic
bexar_migration <- get_flows(
  geography = "county",
  state = "TX",
  county = "Bexar",
  year = 2019
)
bexar_migration
## # A tibble: 4,122 × 7
##    GEOID1 GEOID2 FULL1_NAME          FULL2_NAME      variable estimate   moe
##    <chr>  <chr>  <chr>               <chr>           <chr>       <dbl> <dbl>
##  1 48029  <NA>   Bexar County, Texas Africa          MOVEDIN       341   143
##  2 48029  <NA>   Bexar County, Texas Africa          MOVEDOUT       NA    NA
##  3 48029  <NA>   Bexar County, Texas Africa          MOVEDNET       NA    NA
##  4 48029  <NA>   Bexar County, Texas Asia            MOVEDIN      5412  1007
##  5 48029  <NA>   Bexar County, Texas Asia            MOVEDOUT       NA    NA
##  6 48029  <NA>   Bexar County, Texas Asia            MOVEDNET       NA    NA
##  7 48029  <NA>   Bexar County, Texas Central America MOVEDIN      3641  1030
##  8 48029  <NA>   Bexar County, Texas Central America MOVEDOUT       NA    NA
##  9 48029  <NA>   Bexar County, Texas Central America MOVEDNET       NA    NA
## 10 48029  <NA>   Bexar County, Texas Caribbean       MOVEDIN       261   139
## # ℹ 4,112 more rows
library(tidycensus)

ga_wide <- get_acs(
  geography = "county",
  state = "Texas",
  variables = c(medinc = "B19013_001",
                medage = "B01002_001"),
  output = "wide",
  year = 2020
)


library(tidyverse)
options(scipen = 999)

ggplot(ga_wide, aes(x = medincE)) + 
  geom_histogram()+theme_bw(base_size=16)

metros <-  get_acs(
  geography = "cbsa",
  variables = "DP03_0021P",
  summary_var = "B01003_001",
  survey = "acs1",
  year = 2019
) %>%
  slice_max(summary_est, n = 20)


metros %>%
  mutate(NAME = str_remove(NAME, "-.*$")) %>%
  mutate(NAME = str_remove(NAME, ",.*$")) %>%
  ggplot(aes(y = reorder(NAME, estimate), x = estimate)) + 
  geom_col() +  
  theme_minimal() + 
  labs(title = "Public transit commute share", 
       subtitle = "2019 1-year ACS estimates", 
       y = "", 
       x = "ACS estimate", 
       caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package") 

metros <-  get_acs(
  geography = "cbsa",
  variables = "DP03_0021P",
  summary_var = "B01003_001",
  survey = "acs1",
  year = 2018
) %>%
  slice_max(summary_est, n = 20)


metros %>%
  mutate(NAME = str_remove(NAME, "-.*$")) %>%
  mutate(NAME = str_remove(NAME, ",.*$")) %>%
  ggplot(aes(y = reorder(NAME, estimate), x = estimate)) + 
  geom_col() +  
  theme_minimal() + 
  labs(title = "Public transit commute share", 
       subtitle = "2018 1-year ACS estimates", 
       y = "", 
       x = "ACS estimate", 
       caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package") 

library(scales)

metros %>%
  mutate(NAME = str_remove(NAME, "-.*$")) %>%
  mutate(NAME = str_remove(NAME, ",.*$")) %>%
  ggplot(aes(y = reorder(NAME, estimate), x = estimate)) + 
  geom_col(color = "navy", fill = "navy", 
           alpha = 0.5, width = 0.85) +  
  theme_minimal(base_size = 12, base_family = "Verdana") + 
  scale_x_continuous(labels = label_percent(scale = 1)) + 
  labs(title = "Public transit commute share", 
       subtitle = "2019 1-year ACS estimates", 
       y = "", 
       x = "ACS estimate", 
       caption = "Source: ACS Data Profile variable DP03_0021P via the tidycensus R package") 

tx_income <- get_acs(
  state = "Texas",
  geography = "county",
  variables = c(hhincome = "B19013_001"),
  year = 2020
) %>%
  mutate(NAME = str_remove(NAME, " County, Texas"))

ggplot(tx_income, aes(x = estimate, y = reorder(NAME, estimate))) + 
  geom_point(size = 3, color = "darkgreen") + 
  labs(title = "Median household income", 
       subtitle = "Counties in TX", 
       x = "", 
       y = "ACS estimate") + 
  theme_minimal(base_size = 12.5) + 
  scale_x_continuous(labels = label_dollar())

years <- 2005:2019
names(years) <- years

bexar_value <- map_dfr(years, ~{
  get_acs(
    geography = "county",
    variables = "B25077_001",
    state = "TX",
    county = "Bexar",
    year = .x,
    survey = "acs1"
  )
}, .id = "year")


ggplot(bexar_value, aes(x = year, y = estimate, group = 1)) + 
  geom_ribbon(aes(ymax = estimate + moe, ymin = estimate - moe), 
              fill = "navy",
              alpha = 0.4) + 
  geom_line(color = "navy") + 
  geom_point(color = "navy", size = 2) + 
  theme_minimal(base_size = 12) + 
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k")) + 
  labs(title = "Median home value in Bexar County, TX",
       x = "Year",
       y = "ACS estimate",
       caption = "Shaded area represents margin of error around the ACS estimate")

tx1 <- get_estimates(
  geography = "state",
  state = "TX",
  product = "characteristics",
  breakdown = c("SEX", "AGEGROUP"),
  breakdown_labels = TRUE,
  year = 2019
) 

tx_filtered <- filter(tx1, str_detect(AGEGROUP, "^Age"), 
                        SEX != "Both sexes") %>%
  mutate(value = ifelse(SEX == "Male", -value, value))

ggplot(tx_filtered, aes(x = value, y = AGEGROUP, fill = SEX)) + 
  geom_col()

housing_val <- get_acs(
  geography = "tract", 
  variables = "B25077_001", 
  state = "TX", 
  county = c(
    "Comal", 
    "Bexar", 
    "Austin", 
    "Dallas"
  ),
  year = 2020
)

housing_val2 <- separate(
  housing_val, 
  NAME, 
  into = c("tract", "county", "state"), 
  sep = ", "
)

housing_val2 %>%
  group_by(county) %>%
  summarize(min = min(estimate, na.rm = TRUE), 
            mean = mean(estimate, na.rm = TRUE), 
            median = median(estimate, na.rm = TRUE), 
            max = max(estimate, na.rm = TRUE))
## # A tibble: 4 × 5
##   county           min    mean median     max
##   <chr>          <dbl>   <dbl>  <dbl>   <dbl>
## 1 Austin County 100300 202550  182350  328900
## 2 Bexar County   54900 182630. 155500  909100
## 3 Comal County   83300 285488. 274100  500000
## 4 Dallas County  23600 263838. 181950 2000001
ggplot(housing_val2, aes(x = estimate, fill = county)) + 
  geom_density(alpha = 0.3)+theme_bw()

ggplot(housing_val2, aes(x = estimate)) +
  geom_density(fill = "darkgreen", color = "darkgreen", alpha = 0.5) + 
  facet_wrap(~county) + 
  scale_x_continuous(labels = dollar_format(scale = 0.000001, 
                                            suffix = "m")) + 
  theme_minimal(base_size = 14) + 
  theme(axis.text.y = element_blank(), 
        axis.text.x = element_text(angle = 45)) + 
  labs(x = "ACS estimate",
       y = "",
       title = "Median home values by Census tract, 2015-2019 ACS")

library(ggbeeswarm)

tx_race_income <- get_acs(
  geography = "tract", 
  state = "TX",  
  county = c("Comal", "Bexar", "Austin", "Dallas"),
  variables = c(White = "B03002_003", 
                Black = "B03002_004", 
                Asian = "B03002_006",
                Hispanic = "B03002_012"), 
  summary_var = "B19013_001",
  year = 2020
) %>%
  group_by(GEOID) %>%
  filter(estimate == max(estimate, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(estimate != 0)

ggplot(tx_race_income, aes(x = variable, y = summary_est, color = summary_est)) +
  geom_quasirandom(alpha = 0.5) + 
  coord_flip() + 
  theme_minimal(base_size = 13) + 
  scale_color_viridis_c(guide = "none") + 
  scale_y_continuous(labels = label_dollar()) + 
  labs(x = "Largest group in Census tract", 
       y = "Median household income", 
       title = "Household income distribution by largest racial/ethnic group", 
       subtitle = "Census tracts, Texas City", 
       caption = "Data source: 2016-2020 ACS")

library(geofacet)

us_pyramid_data <- get_estimates(
  geography = "state",
  product = "characteristics",
  breakdown = c("SEX", "AGEGROUP"),
  breakdown_labels = TRUE,
  year = 2019
) %>%
  filter(str_detect(AGEGROUP, "^Age"),
         SEX != "Both sexes") %>%
  group_by(NAME) %>%
  mutate(prop = value / sum(value, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(prop = ifelse(SEX == "Male", -prop, prop))

ggplot(us_pyramid_data, aes(x = prop, y = AGEGROUP, fill = SEX)) + 
  geom_col(width = 1) + 
  theme_minimal() + 
  scale_fill_manual(values = c("darkred", "navy")) + 
  facet_geo(~NAME, grid = "us_state_with_DC_PR_grid2",
            label = "code") + 
  theme(axis.text = element_blank(),
        strip.text.x = element_text(size = 8)) + 
  labs(x = "", 
       y = "", 
       title = "Population structure by age and sex", 
       fill = "", 
       caption = "Data source: US Census Bureau population estimates & tidycensus R package")