This is a walkthrough of the material at https://walkerke.github.io/tidycensus/articles/basic-usage.html
First get a census api key and load the required libraries. Make sure that you have tidycensus and viridis installed on your computer.
library(tidyverse)
## ── Attaching packages ─────────────────
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidycensus)
library(viridis)
## Loading required package: viridisLite
#census_api_key("e86c386d15a50c2981a55346f28b3d13e3036da6")
Get the median gross rent by state from the 1990 decennial census.
m90 <- get_decennial(geography = "state", variables = "H043A001", year = 1990)
## Getting data from the 1990 decennial Census
head(m90)
Do Walker’s graph. Play with fig.width and fig.height in the R chunk spec to get something readable.
m90 %>%
ggplot(aes(x = value, y = reorder(NAME, value))) +
geom_point()
The statistical data is not in the decennial data after 1990. We have to use the most recent 5-year acs data.
Let’s get the list of variables for the most recent acs.
v16 <- load_variables(2016, "acs5", cache = TRUE)
head(v16)
str(v16)
## Classes 'tbl_df', 'tbl' and 'data.frame': 22815 obs. of 3 variables:
## $ name : chr "B00001_001" "B00002_001" "B01001_001" "B01001_002" ...
## $ label : chr "Estimate!!Total" "Estimate!!Total" "Estimate!!Total" "Estimate!!Total!!Male" ...
## $ concept: chr "UNWEIGHTED SAMPLE COUNT OF THE POPULATION" "UNWEIGHTED SAMPLE HOUSING UNITS" "SEX BY AGE" "SEX BY AGE" ...
Look at v16 in View. Look at American Factfinder and ACS manual.
Let’s get the most recent 5-year ACS variables.
v16 <- load_variables(2016, "acs5", cache = TRUE)
str(v16)
## Classes 'tbl_df', 'tbl' and 'data.frame': 22815 obs. of 3 variables:
## $ name : chr "B00001_001" "B00002_001" "B01001_001" "B01001_002" ...
## $ label : chr "Estimate!!Total" "Estimate!!Total" "Estimate!!Total" "Estimate!!Total!!Male" ...
## $ concept: chr "UNWEIGHTED SAMPLE COUNT OF THE POPULATION" "UNWEIGHTED SAMPLE HOUSING UNITS" "SEX BY AGE" "SEX BY AGE" ...
Walker’s Vermont Example - Change to WA
wa <- get_acs(geography = "county",
variables = c(medincome = "B19013_001"),
state = "WA")
## Getting data from the 2012-2016 5-year ACS
wa
med_state<- get_acs(geography = "state",
variables = c(medincome = "B19013_001"))
## Getting data from the 2012-2016 5-year ACS
med_state
Let’s make a Cleveland dotplot of median income of Washington counties by modifying Walker’s code.
wa %>%
mutate(NAME = gsub(" County, Washington", "", NAME)) %>%
ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
geom_errorbarh(aes(xmin = estimate - moe, xmax = estimate + moe)) +
geom_point(color = "red", size = 1) +
labs(title = "Household income by county in Washington",
subtitle = "2012-2016 American Community Survey",
y = "",
x = "ACS estimate (bars represent margin of error)")
Review “Spatial Data” under “Articles” on the website. Replicate the Orange County Example then repeat it for Thurston and King Counties.
options(tigris_use_cache = TRUE)
thurston <- get_acs(state = "WA", county = "Thurston", geography = "tract",
variables = "B19013_001", geometry = TRUE)
## Getting data from the 2012-2016 5-year ACS
str(thurston)
## Classes 'sf' and 'data.frame': 50 obs. of 6 variables:
## $ GEOID : chr "53067010100" "53067010200" "53067010300" "53067010400" ...
## $ NAME : chr "Census Tract 101, Thurston County, Washington" "Census Tract 102, Thurston County, Washington" "Census Tract 103, Thurston County, Washington" "Census Tract 104, Thurston County, Washington" ...
## $ variable: chr "B19013_001" "B19013_001" "B19013_001" "B19013_001" ...
## $ estimate: num 28938 71696 57820 89118 37393 ...
## $ moe : num 3629 9941 8664 8991 6174 ...
## $ geometry:sfc_MULTIPOLYGON of length 50; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:29, 1:2] -123 -123 -123 -123 -123 ...
## ..- attr(*, "class")= chr "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA
## ..- attr(*, "names")= chr "GEOID" "NAME" "variable" "estimate" ...
Note the strategy for storing substantive data and map data in the same structure. There is a variable in the tbl containing lists.
library(viridis)
thurston %>%
ggplot(aes(fill = estimate),color="black") +
geom_sf() +
coord_sf(crs = 26911) +
scale_fill_viridis(option = "magma")
king <- get_decennial(geography = “tract”, variables = racevars, state = “WA”, county = “King County”, geometry = TRUE, summary_var = “P001001”)
head(king)
king %>% mutate(pct = 100 * (value / summary_value)) %>% ggplot(aes(fill = pct, color = pct)) + facet_wrap(~variable) + geom_sf() + coord_sf(crs = 26915) + scale_fill_viridis() + scale_color_viridis()
```
# The Data
racevars <- c(White = "P005003",
Black = "P005004",
Asian = "P005006",
Hispanic = "P004003")
pierce <- get_decennial(geography = "tract", variables = racevars,
state = "WA", county = "Pierce County", geometry = TRUE,
summary_var = "P001001")
## Getting data from the 2010 decennial Census
##
|
| | 0%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 5%
|
|==== | 5%
|
|===== | 7%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|========= | 14%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 19%
|
|============= | 19%
|
|============== | 21%
|
|=============== | 22%
|
|=============== | 23%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 31%
|
|===================== | 32%
|
|====================== | 34%
|
|======================= | 35%
|
|======================== | 36%
|
|========================= | 38%
|
|========================== | 39%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================== | 46%
|
|=============================== | 47%
|
|================================ | 49%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================== | 89%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
# The Map
pierce %>%
filter(variable == "Asian") %>%
mutate(pct = 100 * (value / summary_value)) %>%
ggplot(aes(fill = pct, color = pct)) +
geom_sf() +
coord_sf(crs = 26915) +
scale_fill_viridis() +
scale_color_viridis()