Census data provides valuable resource for understanding and analyzing US demographic, social, and economic trends within a population. In this code through, we will explore the basic of handling census data in R with packages like ‘tidyverse’ for data wrangling and ‘ggplot2’ for creating informative visualizations.
census_api_key("")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
CenDF <- load_variables(2017, "acs5", cache = TRUE) #load the data set, 2017 is the end of year. acs5 set the time frame from 2013-2017.
CenDF$label <- toupper(CenDF$label) #Make valuables to upper case
head(CenDF)
## # A tibble: 6 × 4
## name label concept geography
## <chr> <chr> <chr> <chr>
## 1 B00001_001 ESTIMATE!!TOTAL UNWEIGHTED SAMPLE … block gr…
## 2 B00002_001 ESTIMATE!!TOTAL UNWEIGHTED SAMPLE … block gr…
## 3 B01001A_001 ESTIMATE!!TOTAL SEX BY AGE (WHITE … tract
## 4 B01001A_002 ESTIMATE!!TOTAL!!MALE SEX BY AGE (WHITE … tract
## 5 B01001A_003 ESTIMATE!!TOTAL!!MALE!!UNDER 5 YEARS SEX BY AGE (WHITE … tract
## 6 B01001A_004 ESTIMATE!!TOTAL!!MALE!!5 TO 9 YEARS SEX BY AGE (WHITE … tract
M_Home<- c(Median_House_Value = "B25077_001",
HHIncome = "B06011_001")
CenDF<- get_acs(geography = "state", year = 2017, survey = "acs5", variables = M_Home,geometry = T)
head(CenDF)
CenDF <- CenDF %>%
select(-moe)%>% #Remove moe column
spread(variable, estimate)%>% #spread data from long to wide. "variable" is the column name and "estimate" is the value you want for variable.
mutate(HHInc_HousePrice_Ratio_2017 = round( HHIncome/Median_House_Value*100,2)) # "mutate" creates a new column
datatable(CenDF) # create an interactive table
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
head(CenDF)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1489 ymin: 30.22333 xmax: 179.7785 ymax: 71.36516
## Geodetic CRS: NAD83
## GEOID NAME HHIncome Median_House_Value geometry
## 1 01 Alabama 24476 132100 MULTIPOLYGON (((-88.05338 3...
## 2 02 Alaska 32940 261900 MULTIPOLYGON (((-166.5772 5...
## 3 04 Arizona 27517 193200 MULTIPOLYGON (((-114.8163 3...
## 4 05 Arkansas 23789 118500 MULTIPOLYGON (((-94.61783 3...
## 5 06 California 29454 443400 MULTIPOLYGON (((-118.6044 3...
## 6 08 Colorado 32401 286100 MULTIPOLYGON (((-109.0603 3...
## HHInc_HousePrice_Ratio_2017
## 1 18.53
## 2 12.58
## 3 14.24
## 4 20.08
## 5 6.64
## 6 11.33
CenDF <- CenDF %>% mutate(state = c("AL", "AK","AZ","AR","CA","CO","CT","DE","CL","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM", "NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY", "PR"))
ggplot(CenDF, aes(fill = Median_House_Value/1000)) +
geom_sf(color = "white") +
coord_sf(xlim = c(-130, -60), ylim = c(25, 50)) +
theme_void() +
theme(panel.grid.major = element_line(colour = "transparent")) +
scale_fill_distiller(palette = "Blues", direction = 1, name = "Estimate in thousand") +
labs(title = "Median Household Value by State", caption = "Source: US Census/ACS5 2017")
M_Home<- c(Median_House_Value = "B25077_001",
HHIncome = "B06011_001")
CenDF2<- get_acs(geography = "county", year = 2017, survey = "acs5", variables = M_Home,geometry = T)
CenDF2 <- CenDF2[grep("California$", CenDF2$NAME), ] %>%
select(-moe)%>%
spread(variable, estimate)%>%
mutate(HHInc_HousePrice_Ratio_2017 = round( HHIncome/Median_House_Value*100,2))
head(CenDF2)
ggplot(CenDF2, aes(fill = Median_House_Value/1000)) +
geom_sf(color = "white") +
theme_void() +
theme(
plot.background = element_rect(fill = "white"), # Set the plot background color
panel.grid.major = element_line(colour = "transparent"),
plot.margin = margin(20, 20, 20, 20, "pt") # Adjust margins (top, right, bottom, left)
) +
labs(title = "Median Household Value by State", caption = "Source: US Census/ACS5 2017") +
scale_fill_distiller(palette = "Oranges", direction = 1, name = "Estimate in thousand")
ggplot(CenDF2, aes(x =HHInc_HousePrice_Ratio_2017, y =NAME )) +
geom_bar(stat = "identity", fill = "darkblue", color = "white") +
labs(x = "HHInc_HousePrice_Ratio_2017", y = "COUNTY") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))