This example will use R to download American Community Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R, how to create basic descriptive maps of attributes and how to construct a change map between two time periods.

The example will use data from San Antonio, Texas from the American Community Survey summary file.

Get a Census developer API Key

Obtain one at http://api.census.gov/data/key_signup.html

Save your API key to your working directory

use census_api_key(key = "yourkeyhere", install = T)

one time to install your key for use in tidycensus

Load Libraries

library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)

You should always check the variable name of your ACS variable, you cannot assume that the variable name stays the same across years.

v10_Profile <- load_variables(2010,
                              "acs5/profile",
                              cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
                              "acs5/profile",
                              cache = TRUE) #demographic 

#Search for variables by using grep()

v10_Profile[grep(x = v10_Profile$label,
                 "BELOW THE POVERTY LEVEL",
                 ignore.case = TRUE), 
            c("name", "label")]
## # A tibble: 38 x 2
##    name       label                                                             
##    <chr>      <chr>                                                             
##  1 DP03_0119  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  2 DP03_0119P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  3 DP03_0120  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  4 DP03_0120P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  5 DP03_0121  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  6 DP03_0121P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  7 DP03_0122  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  8 DP03_0122P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  9 DP03_0123  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
## 10 DP03_0123P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
## # ... with 28 more rows
v19_Profile[grep(x = v19_Profile$label,
                 "BELOW THE POVERTY LEVEL",
                 ignore.case = TRUE),
            c("name", "label")]
## # A tibble: 38 x 2
##    name       label                                                             
##    <chr>      <chr>                                                             
##  1 DP03_0119  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  2 DP03_0119P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  3 DP03_0120  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  4 DP03_0120P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  5 DP03_0121  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  6 DP03_0121P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  7 DP03_0122  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
##  8 DP03_0122P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
##  9 DP03_0123  Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE P~
## 10 DP03_0123P Percent!!PERCENTAGE OF FAMILIES AND PEOPLE WHOSE INCOME IN THE PA~
## # ... with 28 more rows

Extract from ACS summary file data profile variables from 2010 and 2019 for Bexar County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

pov10<-get_acs(geography = "tract",
                state="TX",
                county = "Bexar",
                year = 2010,
                variables="DP03_0119PE" ,
                geometry = T,
                output = "wide")
## Getting data from the 2006-2010 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
pov10 <- pov10%>%
  mutate(ppov = DP03_0119PE,
         ppov_er = DP03_0119PM/1.645,
         ppov_cv =100* (ppov_er/ppov)) %>%
  filter(complete.cases(ppov), is.finite(ppov_cv)==T)%>%
  select(GEOID, ppov, ppov_er,ppov_cv)

head(pov10)

Alternative break strategies

Two common ways to break a continuous variable into discrete bins are quantile breaks and Jenks/Fisher breaks.

tm_shape(pov10)+
  tm_polygons(c("ppov"),
              title=c("% in Poverty"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="San Antonio Poverty Rate Estimates - Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

tm_shape(pov10)+
  tm_polygons(c("ppov"),
              title=c("% in Poverty"),
              palette="Blues",
              style="jenks",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="San Antonio Poverty Rate Estimates - Jenks Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

Mapping of errors in estimates

Here I generate a quantile break for the coefficient of variation in census tract poverty estimates.

If you don’t remember, the coefficient of variation is:

\[CV =\frac {\sigma }{\mu}\]

and is a measure of the variability in the estimate, relative to the estimate itself. If the CV is greater than 100, the estimate is very imprecise. The lower the value, the more precise the estimate. This is very important when using small area estimates from the ACS.

When presenting data from the ACS, you should always examine the coefficients of variation, as the ACS is based off a survey, and the estimates, especially for small or rare groups can be very imprecise.

p1<-tm_shape(pov10)+
  tm_polygons(c("ppov"),
              title=c("% in Poverty"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="San Antonio Poverty Rate Estimates",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p2<-tm_shape(pov10)+
  tm_polygons(c("ppov_cv"),
              title=c("CV Poverty"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", title="San Antonio Poverty Rate CV", legend.outside=T)+
  tm_layout(title="San Antonio Poverty Rate CV",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))



tmap_arrange(p1, p2)

plot(pov10$ppov, pov10$ppov_cv, main = "Error in Estimates vs Estimate Size")

Change mapping between two time points

When we have data that are collected over times on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.

In the ACS, we can compare two estimates if the years used to produce the estimates do not overlap. For instance, we could compare the 2006-2010 estimates to the 2015-2019 estimates, but we could not compare the 2006-2010 to the 2008-2012, because they share years of data.

See this for the official position on the subject.

There is a fix that will let us do this. see below

Compare poverty rates over time

Here we take the poverty rate in tracts derived from the 2006-2010 ACS and compare it to the estimate from the 2015-2019 ACS

# get the 2019 estimates
pov19<-get_acs(geography = "tract",
                state="TX",
                county = "Bexar",
                year = 2019,
                variables="DP03_0119P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
pov19<- pov19%>%
  mutate(ppov19 = DP03_0119PE,
         ppov19_er = DP03_0119PM/1.645,
         ppov19_cv =100* (ppov19_er/ppov19)) %>%
  filter(complete.cases(ppov19), is.finite(ppov19_cv)==T)%>%
  select(GEOID, ppov19, ppov19_er,ppov19_cv)

head(pov19)
#merge the two years worth of data
st_geometry(pov19)<-NULL #strip the geometry from the 2019 data

mdat<-left_join(pov10, pov19, by=c("GEOID"="GEOID"))

head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -98.5152 ymin: 29.40677 xmax: -98.456 ymax: 29.58043
## Geodetic CRS:  NAD83
##         GEOID ppov   ppov_er  ppov_cv ppov19 ppov19_er  ppov19_cv
## 1 48029110100 15.4 10.151976 65.92192   24.7 14.650456  59.313587
## 2 48029110300 29.9  8.085106 27.04049   31.3 12.401216  39.620498
## 3 48029110500 57.5  9.057751 15.75261   69.1  4.498480   6.510102
## 4 48029120300  2.3  1.519757 66.07638    2.6  2.370821  91.185410
## 5 48029120702  7.4  4.802432 64.89772    1.6  1.702128 106.382979
## 6 48029121108  3.5  2.127660 60.79027    6.6  3.282675  49.737497
##                         geometry
## 1 MULTIPOLYGON (((-98.49928 2...
## 2 MULTIPOLYGON (((-98.48687 2...
## 3 MULTIPOLYGON (((-98.51411 2...
## 4 MULTIPOLYGON (((-98.45928 2...
## 5 MULTIPOLYGON (((-98.48187 2...
## 6 MULTIPOLYGON (((-98.47468 2...

Here I create a function that implements the testing procedure used by the Census for comparing estimates across year. The test for two estimates is:

\[\text{z = }\left| \frac{est_1 - est_2}{(1-C)*\sqrt{se_1^2 + se_2^2}}\right|\]

Where \(C\) is the number of years overlap in the estimates.

Here is my function:

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
  crit<-qnorm(1-alpha/2)
  pval<-1-pnorm(abs(test))
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

A very similar function significance() from the tidycensus package does a similar thing, but with less output.

significance(est1=mdat$ppov,
             est2=mdat$ppov19,
             moe1=mdat$ppov_er,
             moe2 = mdat$ppov19_er,
             clevel = .9)
##   [1] FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
##  [13]  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
##  [37]  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
##  [49]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE
##  [61]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73]  TRUE FALSE FALSE FALSE FALSE FALSE    NA  TRUE  TRUE FALSE  TRUE  TRUE
##  [85] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
##  [97] FALSE  TRUE    NA FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE
## [109]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
## [145]  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
## [157]  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
## [169] FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [181]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
## [193]  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE
## [205]  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [217] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [229] FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [241] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
## [253]  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [265]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE
## [277] FALSE FALSE  TRUE    NA FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [289] FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [301]  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
## [313] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
## [325] FALSE FALSE    NA FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
## [337] FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE
mdat$signif<- significance(est1=mdat$ppov,
                           est2=mdat$ppov19,
                           moe1=mdat$ppov_er,
                           moe2 = mdat$ppov19_er,
                           clevel = .9)

Here I use the function I just made to do the comparisons

  diff1019<-acstest(names = mdat$GEOID,
                    geoid = mdat$GEOID,
                    est1 = mdat$ppov,
                    est2 = mdat$ppov19,
                    err1 = mdat$ppov_er,
                    err2=mdat$ppov19_er,
                    alpha = .1,
                    yr1 = 2010, yr2=2019,
                    span = 5)

head(diff1019)
##          name       geoid est1 est2      se1       se2 difference       test
## 1 48029110100 48029110100 15.4 24.7 7.921629 11.431812       -9.3 -0.6686694
## 2 48029110300 48029110300 29.9 31.3 6.308842  9.676720       -1.4 -0.1211949
## 3 48029110500 48029110500 57.5 69.1 7.067800  3.510183      -11.6 -1.4699429
## 4 48029120300 48029120300  2.3  2.6 1.185873  1.849961       -0.3 -0.1365238
## 5 48029120702 48029120702  7.4  1.6 3.747357  1.328177        5.8  1.4588372
## 6 48029121108 48029121108  3.5  6.6 1.660222  2.561485       -3.1 -1.0155728
##                 result       pval
## 1 insignificant change 0.25185318
## 2 insignificant change 0.45176833
## 3 significant increase 0.07078861
## 4 insignificant change 0.44570360
## 5 significant decrease 0.07230497
## 6 insignificant change 0.15491643
table(diff1019$result)
## 
## insignificant change significant decrease significant increase 
##                  220                   71                   52

Compare to tidycensus

table(mdat$signif) #
## 
## FALSE  TRUE 
##   220   123

Make a map layout

acs_merge<-left_join(mdat, diff1019, by=c("GEOID"="geoid"))

tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
  tm_polygons(c("ppov"),
              title=c("% in Poverty  2010"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="San Antonio Poverty Rate Estimates 2010",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

p2<-tm_shape(acs_merge)+
  tm_polygons(c("ppov19"),
              title=c("% in Poverty 2019"),
              palette="Blues", 
              style="quantile",
              n=5)+
  #tm_format("World", title="San Antonio Poverty Rate CV", legend.outside=T)+
  tm_layout(title="San Antonio Poverty Rate Estimate 2019",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p3  <- tm_shape(acs_merge)+
  tm_polygons(c("result"),
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="San Antonio Poverty Rate CV", legend.outside=T)+
  tm_layout(title="San Antonio Poverty Rate Estimate Changes",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()
  

tmap_arrange(p1, p2,  p3, nrow=2)

Make an interactive map

tmap_mode("view")
## tmap mode set to interactive viewing
#osmtile <- tmaptools::read_osm(pov10, mergeTiles = T)

#tm_shape(osmtile)+
 # tm_rgb()+
tm_shape(acs_merge)+
  tm_polygons("result",
              alpha = .7,
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="San Antonio Poverty Rate CV", legend.outside=T)+
  tm_layout(title="San Antonio Poverty Rate Estimate Changes",
            title.size =1.5)+
  tm_scale_bar()
  #tm_compass()+
st_write(acs_merge, dsn="C:/Users/ozd504/Documents/GitHub/DEM7093/data/lab3_dat.gpkg",
             layer="lab3dat",
             driver="GPKG" )