library(tidycensus)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)

1 Find the ACS variable for the Percent Vacant housing units for 2011 and 2019 for Harris County, TX

v11_Profile <- load_variables(2011,
                              "acs5/profile",
                              cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
                              "acs5/profile",
                              cache = TRUE) #demographic 

#Search for variables by using grep()

v11_Profile[grep(x = v11_Profile$label,
                 "Vacant housing units",
                 ignore.case = TRUE), 
            c("name", "label")]
## # A tibble: 2 × 2
##   name       label                                            
##   <chr>      <chr>                                            
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label,
                 "Vacant housing units",
                 ignore.case = TRUE),
            c("name", "label")]
## # A tibble: 2 × 2
##   name       label                                                              
##   <chr>      <chr>                                                              
## 1 DP04_0003  Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u…
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un…

Extract from ACS summary file data profile variables from 2011 and 2019 for Harris County, TX Census Tracts

The data profile tables are very useful because they contain lots of pre-calculated variables.

For 2011

house11<-get_acs(geography = "tract",
                state="TX",
                county = "Harris County",
                year = 2011,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
house11<- house11%>%
  mutate( house = DP04_0003PE,
         house_er = DP04_0003PM/1.645,
         house_cv =100* (house_er/house)) %>%
  filter(complete.cases(house), is.finite(house_cv)==T)%>%
  select(GEOID, house, house_er,house_cv)

head(house11)

2 Create a map using quantile breaks of this variable for each year

Breaking the continuous variable into discrete bins with quantile breaks

tm_shape(house11)+
  tm_polygons(c("house"),
              title=c("% in Vacant housing units"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant housing Rate Estimates in 2011- Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

p1<-tm_shape(house11)+
  tm_polygons(c("house"),
              title=c("% in Vacant Housing"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant Housing Rate Estimates",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p2<-tm_shape(house11)+
  tm_polygons(c("house_cv"),
              title=c("CV Vacant Housing"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate CV",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))



tmap_arrange(p1, p2)

plot(house11$house, house11$house_cv, main = "Error in Estimates vs Estimate Size")

Compare Vacant Housing rates over time

house19<-get_acs(geography = "tract",
                state="TX",
                county = "Harris County",
                year = 2019,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
house19<- house19%>%
  mutate( house = DP04_0003PE,
         house_er = DP04_0003PM/1.645,
         house_cv =100* (house_er/house)) %>%
  filter(complete.cases(house), is.finite(house_cv)==T)%>%
  select(GEOID, house, house_er,house_cv)

head(house19)
tm_shape(house19)+
  tm_polygons(c("house"),
              title=c("% in Vacant housing units"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant housing Rate Estimates in 2019- Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

3 Calculate the differences between the two years, including the test for significance of the differnces

#merge the two years worth of data
st_geometry(house19)<-NULL #strip the geometry from the 2019 data

mdat<-left_join(house11, house19, by=c("GEOID"="GEOID"))

head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS:  NAD83
##         GEOID house.x house_er.x house_cv.x house.y house_er.y house_cv.y
## 1 48201222100    12.5   3.647416   29.17933     9.3   3.221884   34.64392
## 2 48201221400    16.3   3.768997   23.12268    13.4   3.465046   25.85855
## 3 48201221300    11.0   4.133739   37.57944     3.2   2.188450   68.38906
## 4 48201530500    24.0   3.586626   14.94428     8.3   2.492401   30.02893
## 5 48201530400    20.5   4.680851   22.83342    10.6   2.978723   28.10116
## 6 48201530300    22.4   4.984802   22.25358    21.9   5.167173   23.59440
##                         geometry
## 1 POLYGON ((-95.34437 29.8805...
## 2 POLYGON ((-95.3931 29.84572...
## 3 POLYGON ((-95.3618 29.84628...
## 4 POLYGON ((-95.38838 29.8306...
## 5 POLYGON ((-95.38838 29.8306...
## 6 POLYGON ((-95.37563 29.8140...
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
  crit<-qnorm(1-alpha/2)
  pval<-1-pnorm(abs(test))
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
significance(est1=mdat$house.x,
             est2=mdat$house.y,
             moe1=mdat$house_er.x,
             moe2 = mdat$house_er.y,
             clevel = .9)
##   [1] FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
##  [13]  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
##  [37] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
##  [49] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE    NA FALSE FALSE
##  [73] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
##  [85]  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE
##  [97] FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
## [109]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121] FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [133] FALSE  TRUE    NA  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE
## [145]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE
## [157] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [169]  TRUE  TRUE FALSE  TRUE    NA  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [181]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
## [193] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [205] FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE
## [217]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
## [229]  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [241] FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
## [253]  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [265] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [277]  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE
## [289] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [301] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [313] FALSE  TRUE FALSE    NA  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE
## [325] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [337] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [349] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
## [361] FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [373]  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE
## [385] FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [397] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [409]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [421]  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE
## [433] FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [445]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE
## [457]  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
## [469]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [481]  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE
## [493]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [505]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [517]  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [529] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [541]  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [553]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
## [565]  TRUE FALSE FALSE    NA  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [577]    NA  TRUE    NA  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE
## [589]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
## [601] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [613]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [625]  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [637]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE    NA
## [649] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [661]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
## [673] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE
## [685] FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
## [697]  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [709] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
## [721]  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [733] FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE
## [745] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [757]    NA  TRUE FALSE FALSE
mdat$signif<- significance(est1=mdat$house.x,
                           est2=mdat$house.y,
                           moe1=mdat$house_er.x,
                           moe2 = mdat$house_er.y,
                           clevel = .9)
  diff1119<-acstest(names = mdat$GEOID,
                    geoid = mdat$GEOID,
                    est1 = mdat$house.x,
                    est2 = mdat$house.y,
                    err1 = mdat$house_er.x,
                    err2=mdat$house_er.y,
                    alpha = .1,
                    yr1 = 2011, yr2=2019,
                    span = 5)

head(diff1119)
##          name       geoid est1 est2      se1      se2 difference      test
## 1 48201222100 48201222100 12.5  9.3 2.846094 2.514050        3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789        2.9 0.7259137
## 3 48201221300 48201221300 11.0  3.2 3.225573 1.707656        7.8 2.1371536
## 4 48201530500 48201530500 24.0  8.3 2.798659 1.944831       15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310        9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967        0.5 0.0892484
##                 result         pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01
table(diff1119$result)
## 
## insignificant change significant decrease significant increase 
##                  393                  257                  101

Compare to tidycensus

table(mdat$signif) #
## 
## FALSE  TRUE 
##   393   358

4 Create a map showing the differences by census tract

Make a map layout

acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid"))

tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
  tm_polygons(c("house.x"),
              title=c("% in Vacant Housing in  2011"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Harris County Vacant Housing Rate Estimates 2011",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

p2<-tm_shape(acs_merge)+
  tm_polygons(c("house.y"),
              title=c("% in Harris County Vacant Housing Rate Estimates in 2019"),
              palette="Blues", 
              style="quantile",
              n=5)+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate Estimate 2019",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))


p3  <- tm_shape(acs_merge)+
  tm_polygons(c("result"),
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="Harris County Vacant Housing Rate CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate Estimate Changes",
            title.size =1.5, 
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()
  

tmap_arrange(p1, p2,  p3, nrow=2)

Make an interactive map

tmap_mode("view")
## tmap mode set to interactive viewing
#osmtile <- tmaptools::read_osm(house11, mergeTiles = T)

#tm_shape(osmtile)+
 # tm_rgb()+
tm_shape(acs_merge)+
  tm_polygons("result",
              alpha = .7,
              title=c("Changes"),
              palette = "Set2")+
  #tm_format("World", title="Harris County Vacant Housing Rate  CV", legend.outside=T)+
  tm_layout(title="Harris County Vacant Housing Rate Estimate Changes",
            title.size =1.5)+
  tm_scale_bar()
  #tm_compass()+