This example will use R to downloard American Coummunity Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R and to illustrate how to create basic descriptive maps of attributes.
The example will use data from San Antonio, Texas from the 2015 American Community Survey summary file.
Obtain one at http://api.census.gov/data/key_signup.html
use census_api_key(key = "yourkeyhere", install = T)
one time to install your key for use in tidycensus
library(tidycensus); library(tidyverse); library(sf)
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.5
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
v15_Profile <- load_variables(2015 , "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#
View(v15_tables)
#Search for variables by
v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label")]
## # A tibble: 38 x 2
## name label
## <chr> <chr>
## 1 DP03_0119E Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 2 DP03_0119PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## 3 DP03_0120E Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 4 DP03_0120PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## 5 DP03_0121E Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 6 DP03_0121PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## 7 DP03_0122E Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 8 DP03_0122PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## 9 DP03_0123E Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 10 DP03_0123PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## # ... with 28 more rows
v15_Profile[grep(x = v15_Profile$label, "enrolled"), c("name", "label")]
## # A tibble: 24 x 2
## name label
## <chr> <chr>
## 1 DP02_0052E Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 2 DP02_0052PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## 3 DP02_0053E Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 4 DP02_0053PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## 5 DP02_0054E Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 6 DP02_0054PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## 7 DP02_0055E Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 8 DP02_0055PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## 9 DP02_0056E Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 10 DP02_0056PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## # ... with 14 more rows
The data profile tables are very useful because they contain lots of pre-calculated variables.
sa_acs<-get_acs(geography = "tract", state="TX", county = c("Bexar"), year = 2015,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP04_0078PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
summary_var = "B01001_001",
geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
sa_acs$county<-substr(sa_acs$GEOID, 1, 5)
#rename variables and filter missing cases
sa_acs2<-sa_acs%>%
mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE,
pblack=DP05_0073PE , phisp=DP05_0066PE, pfemhh=DP02_0008PE,
phsormore=DP02_0066PE,punemp=DP03_0009PE, medhhinc=DP03_0062E,
ppov=DP03_0119PE, pforn=DP02_0092PE,plep=DP02_0113PE) %>%
st_transform(crs = 102740)%>%
na.omit()
class(sa_acs2)
sf::st_write(sa_acs2,dsn="C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)
## Deleting layer `sa_tract_dp' using driver `ESRI Shapefile'
## Updating layer `sa_tract_dp' to data source `C:\Users\ozd504\Google Drive\classes\dem7093\GIS_class_2018\data' using driver `ESRI Shapefile'
## features: 362
## fields: 65
## geometry type: Multi Polygon
Here I generate a quantile break for % black in census tracts and compare it to a Jenks break
library(classInt)
library(patchwork)
pblack_map<-sa_acs2 %>%
mutate(cblack=cut(pblack,breaks = quantile(pblack, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
jblack = cut(pblack,breaks=data.frame(classIntervals(var=sa_acs2$pblack, n=5, style="jenks")[2])[,1], include.lowest = T))
p1<-ggplot(pblack_map, aes(fill = cblack, color = cblack)) +
geom_sf() +
ggtitle("Proportion African American",
subtitle = "Bexar County Texas, 2015 - Quantile Breaks")+
coord_sf(crs = 102009) +
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())
p2<-ggplot(pblack_map, aes(fill = jblack, color = jblack)) +
geom_sf() +
ggtitle("Proportion African American",
subtitle = "Bexar County Texas, 2015 - Jenks Breaks")+
coord_sf(crs = 102009) +
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())
p1/ p2
library(mapview)
library(RColorBrewer)
pal <- colorRampPalette(brewer.pal(6, "Blues")) #set colors
mapview(pblack_map["jblack"], col.regions=pal, legend=T,map.types="OpenStreetMap", layer.name="% African American")
Spatial Representation of the data
Basic spatial clustering of data
Descriptive analysis
OLS regression models
Map of model residuals
Analysis of residual clustering