Homework 4

Load Libraries

library(tidycensus)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.5     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(tmap)

Search for ACS Variables

v11_Profile <- load_variables(2011, "acs5/profile", cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019, "acs5/profile", cache = TRUE) #demographic 

#Search for variables by using grep()

v11_Profile[grep(x = v11_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")]

#Percent Vacant = DP04_0003P 

v19_Profile[grep(x = v19_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")]

Extract from ACS summary file data profile variables from 2011 and 2019 for Dallas County, TX Census Tracts

#Pull 2011 Data
vac11<-get_acs(geography = "tract",
                state="TX",
                county = "Dallas",
                year = 2011,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#Pull 2019 Data
vac19<-get_acs(geography = "tract",
                state="TX",
                county = "Dallas",
                year = 2019,
                variables="DP04_0003P" ,
                geometry = T,
                output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac11 <- vac11%>%
  mutate(pvac11 = DP04_0003PE,
         pvac_er11 = DP04_0003PM/1.645,
         pvac_cv11 =100* (pvac_er11/pvac11)) %>%
  filter(complete.cases(pvac11), is.finite(pvac_cv11)==T)

vac19 <- vac19%>%
  mutate(pvac19 = DP04_0003PE,
         pvac_er19 = DP04_0003PM/1.645,
         pvac_cv19 =100* (pvac_er19/pvac19)) %>%
  filter(complete.cases(pvac19), is.finite(pvac_cv19)==T)

#Merge Two Years of Data
mdat<-tigris::geo_join(vac11, as.data.frame(vac19), by_sp="GEOID", by_df="GEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Alternative break strategies

map11<-tm_shape(mdat)+
  tm_polygons(col="pvac11", title=c("% Vacant 2011 - Quantile"), palette="YlGnBu", border.col="#FFFFFF", style="quantile", n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Dallas County Vacancy Rate, 2011", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_compass()

map19<-tm_shape(mdat)+
  tm_polygons(col="pvac19", title=c("% Vacant 2019 - Quantile"), palette="YlGnBu", border.col="#FFFFFF", style="quantile", n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Dallas County Vacancy Rate, 2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_compass()

Mapping of errors in estimates

Here I generate a quantile break for the coefficient of variation in census tract poverty estimates.

If you don’t remeber, the coefficient of variation is:

\[CV =\frac {\sigma }{\mu}\]

and is a measure of the variability in the estimate, relative to the estimate itself. If the CV is greater than 100, the estimate is very imprecise. The lower the value, the more precise the estimate. This is very important when using small area estimates from the ACS.

cov11<-tm_shape(mdat)+
  tm_polygons(col="pvac_cv11", title=c("CV Vacancy Rate"), palette="Purples", style="quantile", n=5)+
  #tm_format("World", title="Vacancy Rate CV, 2011", legend.outside=T)+
  tm_layout(title="Vacancy Rate CV, 2011", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()

cov19<-tm_shape(mdat)+
  tm_polygons(col="pvac_cv19", title=c("CV Vacancy Rate"), palette="Purples", style="quantile", n=5)+
  #tm_format("World", title="Vacancy Rate CV, 2019", legend.outside=T)+
  tm_layout(title="Vacancy Rate CV, 2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()


tmap_arrange(map11, cov11)

tmap_arrange(map19, cov19)

plot(mdat$pvac11, mdat$pvac_cv11)

plot(mdat$pvac19, mdat$pvac_cv19)

Change map between two time points

When we have data that are collected over times on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.

In the ACS, we can compare two estimates if the years used to produce the estimates do not overlap. For instance, we could compare the 2006-2010 estimates to the 2015-2019 estimates, but we could not compare the 2006-2010 to the 2008-22012, because they share years of data.

See this for the official position on the subject.

There is a fix that will let us do this. see below

Compare poverty rates over time

Here we take the poverty rate in tracts derived from the 2006-2010 ACS and compare it to the estimate from the 2015-2019 ACS

Here I create a function that implements the testing procedure used by the Census for comparing estimates across year

acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
  
  se1<-err1/qnorm(.90)
  se2<-err2/qnorm(.90)
  yrs1<-seq(yr1, to=yr1-span)
  yrs2<-seq(yr2, to=yr2-span)

  C<-mean(yrs2%in%yrs1)
  diff<- (est1-est2)
  test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
  crit<-qnorm(1-alpha/2)
  pval<-2*pnorm(abs(test),lower.tail=F)
  result<-NULL
  result[pval > alpha]<-"insignificant change"
  result[pval < alpha & test < 0]<- "significant increase"
  result[pval < alpha & test > 0]<-"significant decrease" 
  
  data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}

Here I use the function I just made to do the comparisons

  diff1019<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pvac11, est2 = mdat$pvac19, err1 = mdat$pvac_er11, err2=mdat$pvac_er19,alpha = .1, yr1 = 2011, yr2=2019, span = 5)

table(diff1019$result)
## 
## insignificant change significant decrease significant increase 
##                  342                  117                   43
acs_merge<-left_join(mdat, diff1019, by=c("GEOID"="geoid"))

tmap_mode("plot")
## tmap mode set to plotting
sig11_19  <- tm_shape(acs_merge)+
  tm_polygons(c("result"), title=c("Changes"), palette = "Dark2")+
  #tm_format("World", title="Change", legend.outside=T)+
  tm_layout(title="Dallas Vacancy Rate Change, 2011-2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
  tm_scale_bar()+
  tm_compass()
  

tmap_arrange(map11, map19,sig11_19)