Load Libraries

library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(tmap)
v11_Profile <- load_variables(2011, "acs5/profile", cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019, "acs5/profile", cache = TRUE) #demographic 

#Search for variables by using grep()

v11_Profile[grep(x = v11_Profile$label, "Vacant", ignore.case = TRUE), c("name", "label")]
## # A tibble: 2 x 2
##   name       label                                            
##   <chr>      <chr>                                            
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label, "Vacant", ignore.case = TRUE), c("name", "label")]
## # A tibble: 2 x 2
##   name       label                                                              
##   <chr>      <chr>                                                              
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~

Extract from ACS summary file data profile variables from 2011 and 2019 for Dallas County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

vac11<-get_acs(geography = "tract",
                state="TX",
                county = "Dallas",
                year = 2011,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
vac19<-get_acs(geography = "tract",
                state="TX",
                county = "Dallas",
                year = 2019,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile

Data Processing

#rename variables and filter missing cases
vac11 <- vac11%>%
  mutate(pvac = DP04_0003PE,
         pvac_er = DP04_0003PM/1.645,
         pvac_cv =100* (pvac_er/pvac)) %>%
  filter(complete.cases(pvac), is.finite(pvac_cv)==T)

vac19 <- vac19%>%
  mutate(pvac = DP04_0003PE,
         pvac_er = DP04_0003PM/1.645,
         pvac_cv =100* (pvac_er/pvac)) %>%
  filter(complete.cases(pvac), is.finite(pvac_cv)==T)

Maps with Quantile Breaks

vac11M <- tm_shape(vac11)+
  tm_polygons(c("pvac"), title=c("% Housing Vacant 2011"), palette="Blues", style="quantile", n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Dallas Vacant Percent Estimates 2011 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_compass()

vac19M <- tm_shape(vac19)+
  tm_polygons(c("pvac"), title=c("% Housing Vacant 2019"), palette="Blues", style="quantile", n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Dallas Vacant Percent Estimates 2019 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_compass()

tmap_arrange(vac11M, vac19M)

Comparing 2011 and 2019

#merge the two years worth of data
mdat<-tigris::geo_join(vac11, as.data.frame(vac19), by_sp="GEOID", by_df="GEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(mdat)
## Simple feature collection with 6 features and 14 fields
## Active geometry column: geometry.x
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -96.80067 ymin: 32.83427 xmax: -96.57525 ymax: 32.94708
## geographic CRS: NAD83
##         GEOID                                    NAME.x DP04_0003PE.x
## 1 48113018120 Census Tract 181.20, Dallas County, Texas          10.2
## 2 48113012702 Census Tract 127.02, Dallas County, Texas           5.2
## 3 48113012800    Census Tract 128, Dallas County, Texas           2.9
## 4 48113012900    Census Tract 129, Dallas County, Texas           7.4
## 5 48113013004 Census Tract 130.04, Dallas County, Texas           5.7
## 6 48113019400    Census Tract 194, Dallas County, Texas           6.5
##   DP04_0003PM.x pvac.x pvac_er.x pvac_cv.x
## 1           6.4   10.2  3.890578  38.14292
## 2           5.7    5.2  3.465046  66.63549
## 3           3.0    2.9  1.823708  62.88649
## 4           4.9    7.4  2.978723  40.25302
## 5           4.6    5.7  2.796353  49.05882
## 6           4.8    6.5  2.917933  44.89128
##                                      NAME.y DP04_0003PE.y DP04_0003PM.y pvac.y
## 1 Census Tract 181.20, Dallas County, Texas           4.6           4.2    4.6
## 2 Census Tract 127.02, Dallas County, Texas           3.8           3.6    3.8
## 3    Census Tract 128, Dallas County, Texas           1.1           1.4    1.1
## 4    Census Tract 129, Dallas County, Texas           6.4           5.5    6.4
## 5 Census Tract 130.04, Dallas County, Texas           3.2           3.6    3.2
## 6    Census Tract 194, Dallas County, Texas           7.8           5.4    7.8
##   pvac_er.y pvac_cv.y rank                     geometry.x
## 1 2.5531915  55.50416    1 POLYGON ((-96.57525 32.9365...
## 2 2.1884498  57.59079    1 POLYGON ((-96.68648 32.8400...
## 3 0.8510638  77.36944    1 POLYGON ((-96.68664 32.8402...
## 4 3.3434650  52.24164    1 POLYGON ((-96.71915 32.8612...
## 5 2.1884498  68.38906    1 POLYGON ((-96.73348 32.8677...
## 6 3.2826748  42.08557    1 POLYGON ((-96.78691 32.8507...
##                       geometry.y
## 1 MULTIPOLYGON (((-96.61445 3...
## 2 MULTIPOLYGON (((-96.68511 3...
## 3 MULTIPOLYGON (((-96.70107 3...
## 4 MULTIPOLYGON (((-96.7251 32...
## 5 MULTIPOLYGON (((-96.73332 3...
## 6 MULTIPOLYGON (((-96.80067 3...

Here I create a function that implements the testing procedure used by the Census for comparing estimates across year

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
  crit<-qnorm(1-alpha/2)
  pval<-2*pnorm(abs(test),lower.tail=F)
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

Here I use the function I just made to do the comparisons

  diff1119<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pvac.x, est2 = mdat$pvac.y, err1 = mdat$pvac_er.x, err2=mdat$pvac_er.y,alpha = .1, yr1 = 2011, yr2=2019, span = 5)

head(diff1119)
##                                        name       geoid est1 est2      se1
## 1 Census Tract 181.20, Dallas County, Texas 48113018120 10.2  4.6 3.035834
## 2 Census Tract 127.02, Dallas County, Texas 48113012702  5.2  3.8 2.703789
## 3    Census Tract 128, Dallas County, Texas 48113012800  2.9  1.1 1.423047
## 4    Census Tract 129, Dallas County, Texas 48113012900  7.4  6.4 2.324310
## 5 Census Tract 130.04, Dallas County, Texas 48113013004  5.7  3.2 2.182006
## 6    Census Tract 194, Dallas County, Texas 48113019400  6.5  7.8 2.276875
##         se2 difference       test               result      pval
## 1 1.9922659        5.6  1.5422018 insignificant change 0.1230246
## 2 1.7076565        1.4  0.4377872 insignificant change 0.6615405
## 3 0.6640886        1.8  1.1462233 insignificant change 0.2517028
## 4 2.6089196        1.0  0.2861950 insignificant change 0.7747287
## 5 1.7076565        2.5  0.9022720 insignificant change 0.3669124
## 6 2.5614847       -1.3 -0.3793238 insignificant change 0.7044474
table(diff1119$result)
## 
## insignificant change significant decrease significant increase 
##                  342                  117                   43

Maping Difference

acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid")) #adding difference info to merged data

diffmap <- tm_shape(acs_merge) +
  tm_polygons(col = "difference", style="quantile", n=5, title=c("% Housing Vacant Change"))+
  tm_scale_bar()+
  tm_layout(title="Dallas Vacant Percent Change from 2011 to 2019 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_compass()
diffmap
## Variable(s) "difference" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.