This example will use R to downloard American Coummunity Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R and to illustrate how to create basic descriptive maps of attributes.

The example will use data from San Antonio, Texas from the 2015 American Community Survey summary file.

Get a Census developer API Key

Obtain one at http://api.census.gov/data/key_signup.html

Save your API key to your working directory

use census_api_key(key = "yourkeyhere", install = T)

one time to install your key for use in tidycensus

Look at available ACS variables

library(tidycensus);  library(tidyverse); library(sf)
## -- Attaching packages ------ tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000     v purrr   0.2.4     
## v tibble  1.4.2          v dplyr   0.7.5     
## v tidyr   0.8.1          v stringr 1.3.1     
## v readr   1.1.1          v forcats 0.3.0
## -- Conflicts --------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
v15_Profile <- load_variables(2015, "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#

View(v15_Profile)

#Search for variables by 
v15_Profile[grep(x = v15_Profile$label, "HighSchool"), c("name", "label")]
## # A tibble: 0 x 2
## # ... with 2 variables: name <chr>, label <chr>
v15_Profile[grep(x = v15_Profile$label, "Built 2000 to 2009"), c("name", "label")]
## # A tibble: 2 x 2
##   name        label                                                       
##   <chr>       <chr>                                                       
## 1 DP04_0019E  Estimate!!YEAR STRUCTURE BUILT!!Total housing units!!Built ~
## 2 DP04_0019PE Percent!!YEAR STRUCTURE BUILT!!Total housing units!!Built 2~

Extract from ACS summary file data profile variables from 2015 for Bexar County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

sa_acs<-get_acs(geography = "tract", state="TX", county = c("Bexar"), year = 2015,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                            "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                            "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                            "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                            "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                            "DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
                summary_var = "B01001_001",
                geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
sa_acs$county<-substr(sa_acs$GEOID, 1, 5)

#rename variables and filter missing cases
sa_acs2<-sa_acs%>%
  mutate(totpop= DP05_0001E,tpop_cv=(DP05_0001M/1.645)/DP05_0001E ,
         phsormore=DP02_0066PE,phsormore_cv=(DP02_0066PM/1.645)/DP02_0066PE, medhhinc=DP03_0062E,ppov=DP03_0119PE ) %>%
#  st_transform(crs = 102740)%>%
  filter(complete.cases(totpop, phsormore, medhhinc))

#(acs.sub$medhhinc_ME/1.645) / acs.sub$medhhinc) * 100

class(sa_acs2)

Write data out to shapefile

#change the directory
sf::st_write(sa_acs2,dsn="C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)

mapping of errors in variables

Here I generate a quantile break for the coefficient of variation in census tract population estimates

library(classInt)
library(dplyr)

cv_map<-sa_acs2 %>%
  mutate(cv_cut=cut(tpop_cv,breaks = quantile(tpop_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
         ed_cut=cut(phsormore_cv,breaks = quantile(phsormore_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T) )

library(ggsn)
p1<-ggplot(cv_map, aes(fill = cv_cut, color = cv_cut)) + 
  geom_sf() + 
  ggtitle("Coefficent of Variation in Population Estimate", 
          subtitle = "Bexar County Texas, 2015 ACS")+
    scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(cv_map)+
  scalebar(cv_map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)
p1

p2<-ggplot(cv_map, aes(fill = ed_cut, color = ed_cut)) + 
  geom_sf() + 
  ggtitle("Coefficent of Variation in Estimate of % Greater than High School", 
          subtitle = "Bexar County Texas, 2015 ACS")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(cv_map)+
  scalebar(cv_map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)
p2

Change map between two time points

When we have data that are collected over time on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.

In the ACS, we can compare two estimates if the years used to produce the esimates do not overlap. For instance, we could compare the 2005-2009 estimates to the 2011-2015 estimates, but we could not compare the 2010-2014 to the 2011-2015, becuase they share years of data. See this for the official position on the subject.

compare median household incomes

Here we take the median houshold income in tracts derived from the 2007-2011 ACS and compare it to the estimate from the 2011-2015 ACS

#v11_Profile <- load_variables(2011 ,dataset =  "acs5") #demographic profile 

options(tigris_use_cache = TRUE)
#2009 ACS

acs_11<-get_acs(geography = "tract", state="TX", county = "029", year = 2011, variables="B19013_001",geometry = TRUE, output = "wide")
## Getting data from the 2007-2011 5-year ACS
#2015 ACS
acs_15<-get_acs(geography = "tract", state="TX", county = "Bexar", year = 2015, variables="B19013_001",geometry = TRUE, output = "wide")
## Getting data from the 2011-2015 5-year ACS
#create variables with nice names
acs_11<-acs_11%>%
  mutate(inc11=B19013_001E, inc11err=B19013_001M)%>%
  select(GEOID, NAME, inc11, inc11err)
#create variables with nice names
acs_15<-acs_15%>%
  mutate(inc15=B19013_001E, inc15err=B19013_001M)%>%
  select(GEOID, NAME, inc15, inc15err)

#merge the two years worth of data
mdat<-left_join(acs_11, as.data.frame(acs_15), by="GEOID", st_join=FALSE)

head(mdat)
## Simple feature collection with 6 features and 7 fields
## Active geometry column: geometry.x
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -98.56778 ymin: 29.32196 xmax: -98.41912 ymax: 29.44661
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
##         GEOID                                    NAME.x inc11 inc11err
## 1 48029170401 Census Tract 1704.01, Bexar County, Texas 17796     3070
## 2 48029170402 Census Tract 1704.02, Bexar County, Texas 27433     3465
## 3 48029160702 Census Tract 1607.02, Bexar County, Texas 25455     5946
## 4 48029160200    Census Tract 1602, Bexar County, Texas 34821     9070
## 5 48029141600    Census Tract 1416, Bexar County, Texas 56184     7383
## 6 48029141200    Census Tract 1412, Bexar County, Texas 39897     3871
##                                      NAME.y inc15 inc15err
## 1 Census Tract 1704.01, Bexar County, Texas 18310     2821
## 2 Census Tract 1704.02, Bexar County, Texas 24722     4437
## 3 Census Tract 1607.02, Bexar County, Texas 22076     4843
## 4    Census Tract 1602, Bexar County, Texas 36458     7223
## 5    Census Tract 1416, Bexar County, Texas 53534     4526
## 6    Census Tract 1412, Bexar County, Texas 32088     7034
##                       geometry.x                     geometry.y
## 1 POLYGON ((-98.5251 29.44358... MULTIPOLYGON (((-98.54106 2...
## 2 POLYGON ((-98.53594 29.4405... MULTIPOLYGON (((-98.54196 2...
## 3 POLYGON ((-98.55955 29.3849... MULTIPOLYGON (((-98.56773 2...
## 4 POLYGON ((-98.51153 29.3992... MULTIPOLYGON (((-98.52883 2...
## 5 POLYGON ((-98.43569 29.3351... MULTIPOLYGON (((-98.45349 2...
## 6 POLYGON ((-98.44832 29.3826... MULTIPOLYGON (((-98.44833 2...
##                   geometry
## 1 GEOMETRYCOLLECTION EMPTY
## 2 GEOMETRYCOLLECTION EMPTY
## 3 GEOMETRYCOLLECTION EMPTY
## 4 GEOMETRYCOLLECTION EMPTY
## 5 GEOMETRYCOLLECTION EMPTY
## 6 GEOMETRYCOLLECTION EMPTY

Here I create a function that implements the testing procedure used by the Census for comparing estimates across year

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
  crit<-qnorm(1-alpha/2)
  pval<-2*pnorm(abs(test),lower.tail=F)
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test > 0]<- "significant increase"
  result[pval < alpha & test < 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

Here I use the function I just made to do the comparisons

diff0915<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$inc11*1.07, est2 = mdat$inc15, err1 = mdat$inc11err*1.12, err2=mdat$inc15err, alpha = .1, yr1 = 2011, yr2=2015, span = 5)

head(diff0915)
##                                        name       geoid     est1  est2
## 1 Census Tract 1704.01, Bexar County, Texas 48029170401 19041.72 18310
## 2 Census Tract 1704.02, Bexar County, Texas 48029170402 29353.31 24722
## 3 Census Tract 1607.02, Bexar County, Texas 48029160702 27236.85 22076
## 4    Census Tract 1602, Bexar County, Texas 48029160200 37258.47 36458
## 5    Census Tract 1416, Bexar County, Texas 48029141600 60116.88 53534
## 6    Census Tract 1412, Bexar County, Texas 48029141200 42689.79 32088
##        se1      se2 difference      test               result       pval
## 1 2682.998 2201.238     731.72 0.2582299 insignificant change 0.79622949
## 2 3028.204 3462.209    4631.31 1.2331710 insignificant change 0.21751198
## 3 5196.451 3779.013    5160.85 0.9837296 insignificant change 0.32524846
## 4 7926.642 5636.137     800.47 0.1007977 insignificant change 0.91971109
## 5 6452.304 3531.657    6582.88 1.0960834 insignificant change 0.27304230
## 6 3383.024 5488.659   10601.79 2.0138800 significant increase 0.04402213
options(scipen=999)
acs_merge<-left_join(sa_acs, diff0915, by=c("GEOID"="geoid"))
## Warning: Column `GEOID`/`geoid` joining character vector and factor,
## coercing into character vector
brks<-quantile(c(acs_11$inc11, acs_15$inc15),  probs = seq(0,1,.2), na.rm=T)
acs_11map<-acs_11%>%
  mutate(inc_q = cut(inc11,breaks = brks, include.lowest = T))

p1<-ggplot(acs_11map, aes( fill=inc_q, color=inc_q))+
  geom_sf() + 
  ggtitle("Median Household Income 2011 ", 
          subtitle = "Bexar County Texas")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  #scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_11map)+
  scalebar(acs_11map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)

acs_15map<-acs_15%>%
  mutate(inc_q = cut(inc15,breaks = brks, include.lowest = T))

p2<-ggplot(acs_15map, aes( fill=inc_q, color=inc_q))+
  geom_sf() + 
  ggtitle("Median Household Income 2015 ", 
          subtitle = "Bexar County Texas")+
  scale_fill_brewer(palette = "Blues") + 
  scale_color_brewer(palette = "Blues")+
  #scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_15map)+
  scalebar(acs_15map, dist = 5,  dd2km =T, model="GRS80", st.size = 2)


myfil<-c("grey", "red", "blue")
p3<-ggplot(acs_merge, aes( fill=result))+
  geom_sf() + 
  ggtitle("Comparison of Median Household Income 2011 to 2015", 
          subtitle = "Bexar County Texas")+
  #scale_fill_brewer(palette = "Accent") + 
  #scale_color_brewer(palette = "Accent")+
  scale_fill_manual(values=myfil)+
    theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
  north(acs_merge)+
  scalebar(acs_merge, dist = 5,  dd2km =T, model="GRS80", st.size = 2)


library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
out<-grid.arrange(p1, p2, p3, nrow = 2)

out
## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]