This example will use R to downloard American Coummunity Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R and to illustrate how to create basic descriptive maps of attributes.

The example will use data from San Antonio, Texas from the 2015 American Community Survey summary file.

Get a Census developer API Key

Obtain one at http://api.census.gov/data/key_signup.html

Save your API key to your working directory

use census_api_key(key = "yourkeyhere", install = T)

one time to install your key for use in tidycensus

Look at available ACS variables

library(tidycensus);  library(tidyverse); library(sf)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000     v purrr   0.2.4     
## v tibble  1.4.2          v dplyr   0.7.5     
## v tidyr   0.8.0          v stringr 1.3.1     
## v readr   1.1.1          v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
v15_Profile <- load_variables(2015, "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#

View(v15_Profile)

#Search for variables by 
v15_Profile[grep(x = v15_Profile$label, "HighSchool"), c("name", "label")]
## # A tibble: 0 x 2
## # ... with 2 variables: name <chr>, label <chr>
v15_Profile[grep(x = v15_Profile$label, "Built 2000 to 2009"), c("name", "label")]
## # A tibble: 4 x 2
##   name        label                                                       
##   <chr>       <chr>                                                       
## 1 DP04_0019E  YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 2 DP04_0019M  YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 3 DP04_0019PE YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 4 DP04_0019PM YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~

Extract from ACS summary file data profile variables from 2015 for Bexar County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

sa_acs<-get_acs(geography = "tract", state="TX", county = c("Bexar"), year = 2015,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                            "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                            "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                            "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                            "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                            "DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
sa_acs$county<-substr(sa_acs$GEOID, 1, 5)

#rename variables and filter missing cases
sa_acs2<-sa_acs%>%
  mutate(totpop= DP05_0001E,tpop_cv=(DP05_0001M/1.645)/DP05_0001E ,
         phsormore=DP02_0066PE,phsormore_cv=(DP02_0066PM/1.645)/DP02_0066PE, medhhinc=DP03_0062E,ppov=DP03_0119PE ) %>%
#  st_transform(crs = 102740)%>%
  filter(complete.cases(totpop, phsormore, medhhinc))

#(acs.sub$medhhinc_ME/1.645) / acs.sub$medhhinc) * 100

class(sa_acs2)

Write data out to shapefile

#change the directory
sf::st_write(sa_acs2,dsn="C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)

mapping of errors in variables

Here I generate a quantile break for the coefficient of variation in census tract population estimates

library(classInt)
library(dplyr)

cv_map<-sa_acs2 %>%
  mutate(cv_cut=cut(tpop_cv,breaks = quantile(tpop_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
         ed_cut=cut(phsormore_cv,breaks = quantile(phsormore_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T) )

library(ggsn)
p1<-ggplot(cv_map, aes(fill = cv_cut, color = cv_cut)) + 
  geom_sf() + 
  ggtitle("Coefficent of Variation in Population Estimate", 
          subtitle = "Bexar County Texas, 2015 ACS")+
    scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(cv_map)+
  scalebar(cv_map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)
p1

p2<-ggplot(cv_map, aes(fill = ed_cut, color = ed_cut)) + 
  geom_sf() + 
  ggtitle("Coefficent of Variation in Estimate of % Greater than High School", 
          subtitle = "Bexar County Texas, 2015 ACS")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(cv_map)+
  scalebar(cv_map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)
p2

Change map between two time points

When we have data that are collected over time on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.

In the ACS, we can compare two estimates if the years used to produce the esimates do not overlap. For instance, we could compare the 2005-2009 estimates to the 2011-2015 estimates, but we could not compare the 2010-2014 to the 2011-2015, becuase they share years of data. See this for the official position on the subject.

There is a fix that will let us do this. see below

compare poverty rates over time

Here we take the poverty rate in tracts derived from the 2007-2011 ACS and compare it to the estimate from the 2011-2015 ACS

#v11_Profile <- load_variables(2011 ,dataset =  "acs5") #demographic profile 

options(tigris_use_cache = TRUE)
#2009 ACS

acs_11<-get_acs(geography = "tract", state="TX", county = "029", year = 2011, variables="B17001_002",summary_var = "B17001_001", geometry = TRUE, output = "wide")
## Getting data from the 2007-2011 5-year ACS
#2015 ACS
acs_15<-get_acs(geography = "tract", state="TX", county = "Bexar", year = 2015, variables="B17001_002",summary_var = "B17001_001",geometry = TRUE, output = "wide")
## Getting data from the 2011-2015 5-year ACS
#create variables with nice names
acs_11<-acs_11%>%
  mutate(pov11=B17001_002E/summary_est, poverr11=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
  select(GEOID, NAME, pov11, poverr11)
#create variables with nice names
acs_15<-acs_15%>%
  mutate(pov15=B17001_002E/summary_est, poverr15=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
  select(GEOID, NAME, pov15, poverr15)

#merge the two years worth of data
mdat<-left_join(acs_11, as.data.frame(acs_15), by="GEOID", st_join=FALSE)

head(mdat)
## Simple feature collection with 6 features and 7 fields
## Active geometry column: geometry.x
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -98.56778 ymin: 29.32196 xmax: -98.41912 ymax: 29.44661
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
##         GEOID                                    NAME.x     pov11
## 1 48029170401 Census Tract 1704.01, Bexar County, Texas 0.4730151
## 2 48029170402 Census Tract 1704.02, Bexar County, Texas 0.2710789
## 3 48029160702 Census Tract 1607.02, Bexar County, Texas 0.3448651
## 4 48029160200    Census Tract 1602, Bexar County, Texas 0.1369106
## 5 48029141600    Census Tract 1416, Bexar County, Texas 0.1108108
## 6 48029141200    Census Tract 1412, Bexar County, Texas 0.1447313
##     poverr11                                    NAME.y     pov15
## 1 0.09238926 Census Tract 1704.01, Bexar County, Texas 0.5043601
## 2 0.06925421 Census Tract 1704.02, Bexar County, Texas 0.2909091
## 3 0.11298489 Census Tract 1607.02, Bexar County, Texas 0.3492222
## 4 0.08311340    Census Tract 1602, Bexar County, Texas 0.1575614
## 5 0.06370955    Census Tract 1416, Bexar County, Texas 0.1166735
## 6 0.07850766    Census Tract 1412, Bexar County, Texas 0.3406908
##     poverr15                     geometry.x                     geometry.y
## 1 0.09935877 POLYGON ((-98.5251 29.44358... MULTIPOLYGON (((-98.54106 2...
## 2 0.08782205 POLYGON ((-98.53594 29.4405... MULTIPOLYGON (((-98.54196 2...
## 3 0.07324105 POLYGON ((-98.55955 29.3849... MULTIPOLYGON (((-98.56773 2...
## 4 0.06412144 POLYGON ((-98.51153 29.3992... MULTIPOLYGON (((-98.52883 2...
## 5 0.05433554 POLYGON ((-98.43569 29.3351... MULTIPOLYGON (((-98.45349 2...
## 6 0.14070293 POLYGON ((-98.44832 29.3826... MULTIPOLYGON (((-98.44833 2...
##                   geometry
## 1 GEOMETRYCOLLECTION EMPTY
## 2 GEOMETRYCOLLECTION EMPTY
## 3 GEOMETRYCOLLECTION EMPTY
## 4 GEOMETRYCOLLECTION EMPTY
## 5 GEOMETRYCOLLECTION EMPTY
## 6 GEOMETRYCOLLECTION EMPTY

Here I create a function that implements the testing procedure used by the Census for comparing estimates across year

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
  crit<-qnorm(1-alpha/2)
  pval<-2*pnorm(abs(test),lower.tail=F)
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

Here I use the function I just made to do the comparisons

  diff0915<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pov11, est2 = mdat$pov15, err1 = mdat$poverr11, err2=mdat$poverr15, alpha = .1, yr1 = 2011, yr2=2015, span = 5)

head(diff0915)
##                                        name       geoid      est1
## 1 Census Tract 1704.01, Bexar County, Texas 48029170401 0.4730151
## 2 Census Tract 1704.02, Bexar County, Texas 48029170402 0.2710789
## 3 Census Tract 1607.02, Bexar County, Texas 48029160702 0.3448651
## 4    Census Tract 1602, Bexar County, Texas 48029160200 0.1369106
## 5    Census Tract 1416, Bexar County, Texas 48029141600 0.1108108
## 6    Census Tract 1412, Bexar County, Texas 48029141200 0.1447313
##        est2        se1        se2   difference        test
## 1 0.5043601 0.07209172 0.07753006 -0.031345011 -0.36261649
## 2 0.2909091 0.05403935 0.06852791 -0.019830215 -0.27829175
## 3 0.3492222 0.08816258 0.05715029 -0.004357117 -0.05079068
## 4 0.1575614 0.06485373 0.05003422 -0.020650793 -0.30877316
## 5 0.1166735 0.04971283 0.04239825 -0.005862667 -0.10989527
## 6 0.3406908 0.06125985 0.10979108 -0.195959568 -1.90892748
##                 result       pval
## 1 insignificant change 0.71689139
## 2 insignificant change 0.78078841
## 3 insignificant change 0.95949232
## 4 insignificant change 0.75749409
## 5 insignificant change 0.91249244
## 6 significant increase 0.05627145
options(scipen=999)
acs_merge<-left_join(sa_acs, diff0915, by=c("GEOID"="geoid"))
## Warning: Column `GEOID`/`geoid` joining character vector and factor,
## coercing into character vector
brks<-quantile(c(acs_11$pov11, acs_15$pov15),  probs = seq(0,1,.2), na.rm=T)
acs_11map<-acs_11%>%
  mutate(pov_q = cut(pov11,breaks = brks, include.lowest = T))

p1<-ggplot(acs_11map, aes( fill=pov_q, color=pov_q))+
  geom_sf() + 
  ggtitle("Poverty Rate 2011 ", 
          subtitle = "Bexar County Texas")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  #scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_11map)+
  scalebar(acs_11map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)

acs_15map<-acs_15%>%
  mutate(pov_q = cut(pov15,breaks = brks, include.lowest = T))

p2<-ggplot(acs_15map, aes( fill=pov_q, color=pov_q))+
  geom_sf() + 
  ggtitle("Poverty Rate 2015 ", 
          subtitle = "Bexar County Texas")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  #scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_15map)+
  scalebar(acs_15map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)


myfil<-c("grey", "red", "blue")
p3<-ggplot(acs_merge, aes( fill=result))+
  geom_sf() + 
  ggtitle("Comparison of Poverty Rate 2011 to 2015", 
          subtitle = "Bexar County Texas")+
  #scale_fill_brewer(palette = "Accent") + 
  #scale_color_brewer(palette = "Accent")+
  scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_merge)+
  scalebar(acs_merge, dist = 5,  dd2km =T, model="GRS80", st.size = 2)


library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
out<-grid.arrange(p1, p2, p3, nrow = 2)

out
## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]