This example will use R to downloard American Coummunity Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R and to illustrate how to create basic descriptive maps of attributes.

The example will use data from San Antonio, Texas from the 2015 American Community Survey summary file.

Get a Census developer API Key

Obtain one at http://api.census.gov/data/key_signup.html

Save your API key to your working directory

use census_api_key(key = "yourkeyhere", install = T)

one time to install your key for use in tidycensus

Look at available ACS variables

library(tidycensus);  library(tidyverse); library(sf)
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000     v purrr   0.2.4     
## v tibble  1.4.2          v dplyr   0.7.5     
## v tidyr   0.8.1          v stringr 1.3.1     
## v readr   1.1.1          v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
v15_Profile <- load_variables(2015 , "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#

View(v15_tables)

#Search for variables by 
v15_Profile[grep(x = v15_Profile$label, "POVERTY"), c("name", "label")]
## # A tibble: 38 x 2
##    name        label                                                      
##    <chr>       <chr>                                                      
##  1 DP03_0119E  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
##  2 DP03_0119PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
##  3 DP03_0120E  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
##  4 DP03_0120PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
##  5 DP03_0121E  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
##  6 DP03_0121PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
##  7 DP03_0122E  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
##  8 DP03_0122PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
##  9 DP03_0123E  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME I~
## 10 DP03_0123PE Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN~
## # ... with 28 more rows
v15_Profile[grep(x = v15_Profile$label, "enrolled"), c("name", "label")]
## # A tibble: 24 x 2
##    name        label                                                      
##    <chr>       <chr>                                                      
##  1 DP02_0052E  Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
##  2 DP02_0052PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
##  3 DP02_0053E  Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
##  4 DP02_0053PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
##  5 DP02_0054E  Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
##  6 DP02_0054PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
##  7 DP02_0055E  Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
##  8 DP02_0055PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
##  9 DP02_0056E  Estimate!!SCHOOL ENROLLMENT!!Population 3 years and over e~
## 10 DP02_0056PE Percent!!SCHOOL ENROLLMENT!!Population 3 years and over en~
## # ... with 14 more rows

Extract from ACS summary file data profile variables from 2015 for Bexar County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

sa_acs<-get_acs(geography = "tract", state="TX", county = c("Bexar"), year = 2015,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                            "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                            "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                            "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                            "DP03_0119PE","DP04_0046PE","DP04_0078PE","DP05_0072PE","DP05_0073PE",
                            "DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
sa_acs$county<-substr(sa_acs$GEOID, 1, 5)

#rename variables and filter missing cases
sa_acs2<-sa_acs%>%
  mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE, 
         pblack=DP05_0073PE , phisp=DP05_0066PE, pfemhh=DP02_0008PE,
         phsormore=DP02_0066PE,punemp=DP03_0009PE, medhhinc=DP03_0062E,
         ppov=DP03_0119PE, pforn=DP02_0092PE,plep=DP02_0113PE) %>%
  st_transform(crs = 102740)%>%
  na.omit()

class(sa_acs2)

Write data out to shapefile

sf::st_write(sa_acs2,dsn="C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)
## Deleting layer `sa_tract_dp' using driver `ESRI Shapefile'
## Updating layer `sa_tract_dp' to data source `C:\Users\ozd504\Google Drive\classes\dem7093\GIS_class_2018\data' using driver `ESRI Shapefile'
## features:       362
## fields:         65
## geometry type:  Multi Polygon

Some basic mapping of variables

Here I generate a quantile break for % black in census tracts and compare it to a Jenks break

library(classInt)
library(patchwork)

pblack_map<-sa_acs2 %>%
  mutate(cblack=cut(pblack,breaks = quantile(pblack, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
           jblack = cut(pblack,breaks=data.frame(classIntervals(var=sa_acs2$pblack, n=5, style="jenks")[2])[,1], include.lowest = T))

p1<-ggplot(pblack_map, aes(fill = cblack, color = cblack)) + 
  geom_sf() + 
  ggtitle("Proportion African American", 
          subtitle = "Bexar County Texas, 2015 - Quantile Breaks")+
  coord_sf(crs =  102009) + 
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  theme(axis.text.x = element_blank(), axis.text.y = element_blank())

  p2<-ggplot(pblack_map, aes(fill = jblack, color = jblack)) + 
  geom_sf() + 
  ggtitle("Proportion African American", 
          subtitle = "Bexar County Texas, 2015 - Jenks Breaks")+
  coord_sf(crs =  102009) + 
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())
  
p1/ p2

Interactive map with mapview

library(mapview)
library(RColorBrewer)
pal <- colorRampPalette(brewer.pal(6, "Blues")) #set colors
mapview(pblack_map["jblack"], col.regions=pal, legend=T,map.types="OpenStreetMap", layer.name="% African American")

Map median household income

Spatial Representation of the data

Basic spatial clustering of data

Descriptive analysis

OLS regression models

Map of model residuals

Analysis of residual clustering