library("tidycensus")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("terra")
## terra 1.5.21
##
## Attaching package: 'terra'
## The following object is masked from 'package:dplyr':
##
## src
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:ggplot2':
##
## arrow
library("tmap")
library("mapview")
library("rosm")
library("crsuggest")
## Using the EPSG Dataset v10.019, a product of the International Association of Oil & Gas Producers.
## Please view the terms of use at https://epsg.org/terms-of-use.html.
library("tigris")
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library("sf")
## Linking to GEOS 3.9.1, GDAL 3.2.3, PROJ 7.2.1; sf_use_s2() is TRUE
library("leafsync")
library("ggiraph")
library("ggplot2")
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:terra':
##
## rescale
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
decennial_2010_vars <- load_variables(
year = 2010,
"pl",
cache = TRUE
)
decennial_2020_vars <- load_variables(
year = 2020,
"pl",
cache = TRUE
)
#Get home value 2011 - 2020
years <- 2011:2020
names(years) <- years
home_value <- map_dfr(years, ~{
get_acs(
geography = "cbsa",
variables = "B25077_001",
year = .x,
survey = "acs5"
)
}, .id = "year")
## Getting data from the 2007-2011 5-year ACS
## Getting data from the 2008-2012 5-year ACS
## Getting data from the 2009-2013 5-year ACS
## Getting data from the 2010-2014 5-year ACS
## Getting data from the 2011-2015 5-year ACS
## Getting data from the 2012-2016 5-year ACS
## Getting data from the 2013-2017 5-year ACS
## Getting data from the 2014-2018 5-year ACS
## Getting data from the 2015-2019 5-year ACS
## Getting data from the 2016-2020 5-year ACS
home_value1 <- home_value %>% arrange(desc(estimate))
value1 <- home_value %>%
filter(year==2020) %>% head(100)
value2 <- home_value %>% filter(year==2011) %>% head(100)
#Get median income
years <- 2011:2020
names(years) <- years
income <- map_dfr(years, ~{
get_acs(
geography = "cbsa",
variables = "B19013_001",
year = .x,
survey = "acs5"
)
}, .id = "year")
## Getting data from the 2007-2011 5-year ACS
## Getting data from the 2008-2012 5-year ACS
## Getting data from the 2009-2013 5-year ACS
## Getting data from the 2010-2014 5-year ACS
## Getting data from the 2011-2015 5-year ACS
## Getting data from the 2012-2016 5-year ACS
## Getting data from the 2013-2017 5-year ACS
## Getting data from the 2014-2018 5-year ACS
## Getting data from the 2015-2019 5-year ACS
## Getting data from the 2016-2020 5-year ACS
income <- income %>% arrange(desc(estimate))
income1 <- income %>% filter(year==2020) %>% head(100)
income2 <- income %>% filter(year==2011) %>% head(100)
county_value <- get_acs(
geography = "county",
variables = "B25077_001",
year = 2020,
survey = "acs5",
geometry = TRUE,
resolution = "20m"
)
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 31%
|
|======================= | 33%
|
|========================= | 35%
|
|========================= | 36%
|
|=========================== | 38%
|
|============================ | 41%
|
|============================== | 43%
|
|================================= | 48%
|
|=================================== | 50%
|
|===================================== | 53%
|
|========================================= | 58%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================= | 71%
|
|=================================================== | 73%
|
|====================================================== | 77%
|
|======================================================== | 81%
|
|============================================================= | 87%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|======================================================================| 100%
county_value_shifted <- county_value %>%
shift_geometry(position = "outside") %>%
mutate(tooltip = paste(NAME, estimate, sep = ": "))
gg <- ggplot(county_value_shifted, aes(fill = estimate)) +
geom_sf_interactive(aes(tooltip = tooltip, data_id = NAME),
size = 0.1) +
scale_fill_viridis_c(option = "viridis", labels = scales::dollar, direction = -1) +
labs(title = "Median home value by County, 2020",
caption = "Data source: 2020 5-year ACS, US Census Bureau",
fill = "ACS estimate") +
theme_void()
gg
#girafe(ggobj = gg) %>%
#girafe_options(opts_hover(css = "fill:cyan;"),
#opts_zoom(max = 10))
ggsave("homevalue.png")
## Saving 7 x 5 in image
county_income <- get_acs(
geography = "county",
variables = "B19013_001",
year = 2020,
survey = "acs5",
geometry = TRUE,
resolution = "20m"
)
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
write_csv(county_income, "county_income2020.csv")
county_income_shifted <- county_income %>%
shift_geometry(position = "outside") %>%
mutate(tooltip = paste(NAME, estimate, sep = ": "))
gg <- ggplot(county_income_shifted, aes(fill = estimate)) +
geom_sf_interactive(aes(tooltip = tooltip, data_id = NAME),
size = 0.1) +
scale_fill_viridis_c(option = "viridis", labels = scales::dollar, direction = -1, breaks = c(25000, 50000, 75000, 100000, 125000)) +
labs(title = "Median income by County, 2020",
caption = "Data source: 2020 5-year ACS, US Census Bureau",
fill = "ACS estimate") +
theme_void()
gg
ggsave("income.png")
## Saving 7 x 5 in image
county_value1 <- get_acs(
geography = "county",
variables = "B25077_001",
year = 2010,
survey = "acs5",
geometry = TRUE,
resolution = "20m")
## Getting data from the 2006-2010 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|========= | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 18%
|
|============= | 19%
|
|============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 35%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================== | 48%
|
|=================================== | 50%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|========================================== | 60%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|======================================================================| 100%
county_value_shifted1 <- county_value1 %>%
shift_geometry(position = "outside") %>%
mutate(tooltip = paste(NAME, estimate, sep = ": "))
gg <- ggplot(county_value_shifted1, aes(fill = estimate)) +
geom_sf_interactive(aes(tooltip = tooltip, data_id = NAME),
size = 0.1) +
scale_fill_viridis_c(option = "viridis", labels = scales::dollar, direction = -1) +
labs(title = "Median home value by County, 2010",
caption = "Data source: 2010 5-year ACS, US Census Bureau",
fill = "ACS estimate") +
theme_void()
gg
county_income1 <- get_acs(
geography = "county",
variables = "B19013_001",
year = 2010,
survey = "acs5",
geometry = TRUE,
resolution = "20m"
)
## Getting data from the 2006-2010 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
county_income_shifted1 <- county_income1 %>%
shift_geometry(position = "outside") %>%
mutate(tooltip = paste(NAME, estimate, sep = ": "))
gg <- ggplot(county_income_shifted1, aes(fill = estimate)) +
geom_sf_interactive(aes(tooltip = tooltip, data_id = NAME),
size = 0.1) +
scale_fill_viridis_c(option = "viridis", labels = scales::dollar, direction = -1, breaks = c(25000, 50000, 75000, 100000, 125000)) +
labs(title = "Median home value by County, 2010",
caption = "Data source: 2010 5-year ACS, US Census Bureau",
fill = "ACS estimate") +
theme_void()
gg
re_income2010 <- county_income1 %>% rename("income_2010" = "estimate") %>% select("GEOID", "NAME", "income_2010") %>% st_drop_geometry()
re_income2020 <- county_income %>% rename("income_2020" = "estimate") %>% select("GEOID", "NAME", "income_2020") %>% st_drop_geometry()
df <- inner_join(re_income2010, re_income2020, by="GEOID")
df <- df %>% select(!"NAME.y")
df <- df %>% mutate(percent = ((income_2020 - income_2010)/income_2010*100)) %>% arrange(desc(percent))
re_value2010 <- county_value1 %>% rename("value_2010" = "estimate") %>% select("GEOID", "NAME", "value_2010") %>% st_drop_geometry()
re_value2020 <- county_value %>% rename("value_2020" = "estimate") %>% select("GEOID", "NAME", "value_2020") %>% st_drop_geometry()
df2 <- inner_join(re_value2010, re_value2020, by="GEOID") %>% select(!"NAME.y") %>% mutate(percent2 = ((value_2020 - value_2010)/value_2010*100)) %>% arrange(desc(percent2))
df3 <- inner_join(df, df2, by="GEOID") %>% select(!"NAME.x.y")
df4 <- df3 %>% arrange(desc(percent2)) %>% head(100)
write_csv(df4, "head_value.csv")
df5 <- df3 %>% arrange(desc(percent2)) %>% tail(100)
write_csv(df5, "tail_value.csv")
df6 <- df3 %>% arrange(desc(value_2020)) %>% head(100)
write_csv(df6, "home_value_2020.csv")
df7 <- df3 %>% arrange(desc(value_2020)) %>% tail(100)
write_csv(df7, "home_value_2020_tail.csv")
#Race
race <- get_decennial(
geography = "county",
variables = c(
Hispanic = "P2_002N",
White = "P2_005N",
Black = "P2_006N",
Native = "P2_007N",
Asian = "P2_008N"
),
summary_var = "P2_001N",
year = 2020,
output = "wide"
) %>%
mutate(percent_white = 100 * (White / summary_value))
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data summary file
## Note: 2020 decennial Census data use differential privacy, a technique that
## introduces errors into data to preserve respondent confidentiality.
## ℹ Small counts should be interpreted with caution.
## ℹ See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
## This message is displayed once per session.
#race 2010
# race1 <- get_decennial(
# geography = "county",
# variables = c(
# Hispanic = "P002002",
# White = "P002005",
# Black = "P002006",
# Native = "P002007",
# Asian = "P002008"
# ),
# summary_var = "P002001",
# year = 2010,
# output = "wide"
# ) #%>%
#mutate(percent_white = 100 * (White / summary_value))
race_home <- inner_join(race, re_value2020, by="GEOID") %>% select(!"NAME.y") %>% arrange(desc(value_2020)) %>% mutate(percent_asian = (Asian / summary_value)*100) %>% mutate(percent_black = (Black / summary_value)*100) %>% mutate(percent_hispanic = (Hispanic / summary_value)*100)