This task uses R to download American Community Survey summary file tables using the tidycensus package. The goal of the exercise is to a) find ACS variable for the percent vacant housing units, b)create maps using quantile breaks for the variable, c) calculate the difference between the years ( 2011 and 2019), and d)create a map showing differences by census tract for 2011 and 2019.

Load libraries

library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)

Check the ACS variable name

v11_Profile <- load_variables(2011,
                              "acs5/profile",
                              cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
                              "acs5/profile",
                              cache = TRUE) #demographic 

Search variables

v11_Profile[grep(x = v11_Profile$label,
                 "VACANT HOUSING UNITS",
                 ignore.case = TRUE), 
            c("name", "label")]
## # A tibble: 2 x 2
##   name       label                                            
##   <chr>      <chr>                                            
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label,
                 "VACANT HOUSING UNITS",
                 ignore.case = TRUE),
            c("name", "label")]
## # A tibble: 2 x 2
##   name       label                                                              
##   <chr>      <chr>                                                              
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~

Extract from ACS summary file data profile variable for Harris County, TX census tracts for 2011 and 2019

VAC11<-get_acs(geography = "tract",
               state="TX",
               county = "201",
               year = 2011,
               variables="DP04_0003P" ,
               geometry = T,
               output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile

#rename variables and filter missing cases

VAC11 <- VAC11%>%
     mutate(pvac = DP04_0003PE,
            pvac_er = DP04_0003PM/1.645,
            pvac_cv =100* (pvac_er/pvac)) %>%
     filter(complete.cases(pvac), is.finite(pvac_cv)==T)%>%
     select(GEOID, pvac, pvac_er,pvac_cv)
 head(VAC11)
## Simple feature collection with 6 features and 4 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS:  NAD83
##         GEOID pvac  pvac_er  pvac_cv                       geometry
## 1 48201222100 12.5 3.647416 29.17933 POLYGON ((-95.34437 29.8805...
## 2 48201221400 16.3 3.768997 23.12268 POLYGON ((-95.3931 29.84572...
## 3 48201221300 11.0 4.133739 37.57944 POLYGON ((-95.3618 29.84628...
## 4 48201530500 24.0 3.586626 14.94428 POLYGON ((-95.38838 29.8306...
## 5 48201530400 20.5 4.680851 22.83342 POLYGON ((-95.38838 29.8306...
## 6 48201530300 22.4 4.984802 22.25358 POLYGON ((-95.37563 29.8140...

Break continuous variable into discrete bins by Quantile breaks

tm_shape(VAC11)+
  tm_polygons(c("pvac"),
              title=c("% of Vacant Units"),
              palette="Blues",
              style="quantile",
              n=4)+
 
  tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position=c('.02','.09'))+
  tm_layout(title="Houston Vacant Housing Rate Estimates,2011",
            title.size =1.7,
            legend.frame = TRUE,
            title.position = c('left', 'bottom'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c(".02", ".2"),
            main.title.position =c("center"))

Mapping of errors in estimate

p1<-tm_shape(VAC11)+
  tm_polygons(c("pvac"),
              title=c("% of Vacant Units"),
              palette="Blues",
              style="quantile",
              n=4)+
 
  tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position= c('.02','.09'))+
  tm_layout(title="Houston Vacant Housing Rate Estimates",
            title.size =1.7, 
            legend.frame = TRUE,
            title.position = c('left', 'bottom'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c(".02", ".2"),
            main.title.position =c("center"))


p2<-tm_shape(VAC11)+
  tm_polygons(c("pvac_cv"),
              title=c("CV Vacant Housing Units"),
              palette="Blues",
              style="quantile",
              n=4)+
  
  tm_layout(title="Houston Vacant Housing Rate CV",
            title.size =1.7,
            legend.frame = TRUE,
            title.position = c('left', 'bottom'))+
  tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position= c('.02','.09'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c(".02", ".2"),
            main.title.position =c("center"))


tmap_arrange(p1, p2)

plot(VAC11$pvac, VAC11$pvac_cv, main = "Error in Estimates vs Estimate Size")

Get 2019 estimates

vac19<-get_acs(geography = "tract",
                state="TX",
                county = "201",
                year = 2019,
                variables="DP04_0003P" ,
                geometry = T,
               output="wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
# rename variables and filter missing cases
vac19<- vac19%>%
  mutate(pvac19 = DP04_0003PE,
         pvac19_er = DP04_0003PM/1.645,
         pvac19_cv =100* (pvac19_er/pvac19)) %>%
  filter(complete.cases(pvac19), is.finite(pvac19_cv)==T)%>%
  select(GEOID, pvac19, pvac19_er,pvac19_cv)

head(vac19)
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -95.65378 ymin: 29.70875 xmax: -95.12716 ymax: 30.00429
## Geodetic CRS:  NAD83
##         GEOID pvac19 pvac19_er pvac19_cv                       geometry
## 1 48201311900   10.5  3.890578  37.05312 MULTIPOLYGON (((-95.33319 2...
## 2 48201450200    7.2  3.708207  51.50287 MULTIPOLYGON (((-95.59029 2...
## 3 48201450400   27.9  4.924012  17.64879 MULTIPOLYGON (((-95.62377 2...
## 4 48201554502    1.5  1.094225  72.94833 MULTIPOLYGON (((-95.65325 2...
## 5 48201550603    6.4  2.857143  44.64286 MULTIPOLYGON (((-95.48031 2...
## 6 48201252200   15.4  3.100304  20.13184 MULTIPOLYGON (((-95.18517 2...

Merge 2011 and 2019 data

st_geometry(vac19)<-NULL #strip the geometry from the 2019 data

mdat<-left_join(VAC11, vac19, by=c("GEOID"="GEOID"))

head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS:  NAD83
##         GEOID pvac  pvac_er  pvac_cv pvac19 pvac19_er pvac19_cv
## 1 48201222100 12.5 3.647416 29.17933    9.3  3.221884  34.64392
## 2 48201221400 16.3 3.768997 23.12268   13.4  3.465046  25.85855
## 3 48201221300 11.0 4.133739 37.57944    3.2  2.188450  68.38906
## 4 48201530500 24.0 3.586626 14.94428    8.3  2.492401  30.02893
## 5 48201530400 20.5 4.680851 22.83342   10.6  2.978723  28.10116
## 6 48201530300 22.4 4.984802 22.25358   21.9  5.167173  23.59440
##                         geometry
## 1 POLYGON ((-95.34437 29.8805...
## 2 POLYGON ((-95.3931 29.84572...
## 3 POLYGON ((-95.3618 29.84628...
## 4 POLYGON ((-95.38838 29.8306...
## 5 POLYGON ((-95.38838 29.8306...
## 6 POLYGON ((-95.37563 29.8140...

Test for 2011 and 2019 estimates

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
  crit<-qnorm(1-alpha/2)
  pval<-1-pnorm(abs(test))
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

A similar function significance()from the tidycensus package does a similar thing, but with less output.

significance(est1=mdat$pvac,
             est2=mdat$pvac19,
             moe1=mdat$pvac_er,
             moe2 = mdat$pvac19_er,
             clevel = .9)
mdat$signif<- significance(est1=mdat$pvac,
                           est2=mdat$pvac19,
                           moe1=mdat$pvac_er,
                           moe2 = mdat$pvac19_er,
                           clevel = .9)
  diff1119<-acstest(names = mdat$GEOID,
                    geoid = mdat$GEOID,
                    est1 = mdat$pvac,
                    est2 = mdat$pvac19,
                    err1 = mdat$pvac_er,
                    err2=mdat$pvac19_er,
                    alpha = .1,
                    yr1 = 2011, yr2=2019,
                    span = 5)
head(diff1119)
##          name       geoid est1 est2      se1      se2 difference      test
## 1 48201222100 48201222100 12.5  9.3 2.846094 2.514050        3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789        2.9 0.7259137
## 3 48201221300 48201221300 11.0  3.2 3.225573 1.707656        7.8 2.1371536
## 4 48201530500 48201530500 24.0  8.3 2.798659 1.944831       15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310        9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967        0.5 0.0892484
##                 result         pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01
table(diff1119$result)
## 
## insignificant change significant decrease significant increase 
##                  393                  257                  101

Compare to tidycensus

table(mdat$signif)
## 
## FALSE  TRUE 
##   393   358

Make a map layout

acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid"))

tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
     tm_polygons(c("pvac"),
                 title=c("% of Vacant Units"),
                palette="Blues",
                 style="quantile",
                 n=4)+
     
     tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+
     tm_layout(title="Houston Vacant Housing Rate Estimates 2011",
               title.size =1.1,
               legend.frame = FALSE,
               title.position = c('left', 'bottom'))+     tm_compass(position=c('.9','.1'))+tm_credits("Source: American      Community Survey", size=.7, position=c('.02','.08'))+
     tm_format("World",
              legend.position =  c(".02", ".2"),legend.title.size=.9,main.title.position =c("center"))
 p2<-tm_shape(acs_merge)+
    tm_polygons(c("pvac19"),
                 title=c("% of Vacant Units"),
                 palette="Blues", 
                 style="quantile",
                 n=4)+
     
     tm_layout(title="Houston Vacant Housing Rate Estimate 2019",
               title.size =1.1,
               legend.frame = FALSE,
               title.position = c('left', 'bottom'))+
     tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+
     tm_compass(position=c('.9','.1'))+tm_credits("Source: American Community Survey", size=.7,position=c('.02','.08'))+
     tm_format("World",
               legend.position =  c(".02", ".2"),legend.title.size=.9,
               main.title.position =c("center"))  
 p3  <- tm_shape(acs_merge)+
     tm_polygons(c("result"),
                 title=c("Changes"),
                 palette = "Set3")+
     
     tm_layout(title="Houston Vacant Housing Rate Estimate Changes (2011-2019)",
               title.size =1.1, 
              legend.frame = FALSE,
               title.position = c('left', 'bottom'))+
     tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+tm_credits("Source: American Community Survey", size=.7, position=c('.02','.08'))+
     tm_compass()+tm_format("World",legend.position =  c(".02", ".2"),legend.title.size=.9,main.title.position =c("center"))
 tmap_arrange(p1, p2,  p3, nrow=2)