library(tidycensus)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.5 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(tmap)
v11_Profile <- load_variables(2011, "acs5/profile", cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019, "acs5/profile", cache = TRUE) #demographic
#Search for variables by using grep()
v11_Profile[grep(x = v11_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")]
#Percent Vacant = DP04_0003P
v19_Profile[grep(x = v19_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")]
#Pull 2011 Data
vac11<-get_acs(geography = "tract",
state="TX",
county = "Dallas",
year = 2011,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#Pull 2019 Data
vac19<-get_acs(geography = "tract",
state="TX",
county = "Dallas",
year = 2019,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac11 <- vac11%>%
mutate(pvac11 = DP04_0003PE,
pvac_er11 = DP04_0003PM/1.645,
pvac_cv11 =100* (pvac_er11/pvac11)) %>%
filter(complete.cases(pvac11), is.finite(pvac_cv11)==T)
vac19 <- vac19%>%
mutate(pvac19 = DP04_0003PE,
pvac_er19 = DP04_0003PM/1.645,
pvac_cv19 =100* (pvac_er19/pvac19)) %>%
filter(complete.cases(pvac19), is.finite(pvac_cv19)==T)
#Merge Two Years of Data
mdat<-tigris::geo_join(vac11, as.data.frame(vac19), by_sp="GEOID", by_df="GEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
map11<-tm_shape(mdat)+
tm_polygons(col="pvac11", title=c("% Vacant 2011 - Quantile"), palette="YlGnBu", border.col="#FFFFFF", style="quantile", n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Dallas County Vacancy Rate, 2011", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass()
map19<-tm_shape(mdat)+
tm_polygons(col="pvac19", title=c("% Vacant 2019 - Quantile"), palette="YlGnBu", border.col="#FFFFFF", style="quantile", n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Dallas County Vacancy Rate, 2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass()
Here I generate a quantile break for the coefficient of variation in census tract poverty estimates.
If you don’t remeber, the coefficient of variation is:
\[CV =\frac {\sigma }{\mu}\]
and is a measure of the variability in the estimate, relative to the estimate itself. If the CV is greater than 100, the estimate is very imprecise. The lower the value, the more precise the estimate. This is very important when using small area estimates from the ACS.
cov11<-tm_shape(mdat)+
tm_polygons(col="pvac_cv11", title=c("CV Vacancy Rate"), palette="Purples", style="quantile", n=5)+
#tm_format("World", title="Vacancy Rate CV, 2011", legend.outside=T)+
tm_layout(title="Vacancy Rate CV, 2011", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()
cov19<-tm_shape(mdat)+
tm_polygons(col="pvac_cv19", title=c("CV Vacancy Rate"), palette="Purples", style="quantile", n=5)+
#tm_format("World", title="Vacancy Rate CV, 2019", legend.outside=T)+
tm_layout(title="Vacancy Rate CV, 2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()
tmap_arrange(map11, cov11)
tmap_arrange(map19, cov19)
plot(mdat$pvac11, mdat$pvac_cv11)
plot(mdat$pvac19, mdat$pvac_cv19)
When we have data that are collected over times on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.
In the ACS, we can compare two estimates if the years used to produce the estimates do not overlap. For instance, we could compare the 2006-2010 estimates to the 2015-2019 estimates, but we could not compare the 2006-2010 to the 2008-22012, because they share years of data.
See this for the official position on the subject.
There is a fix that will let us do this. see below
Here we take the poverty rate in tracts derived from the 2006-2010 ACS and compare it to the estimate from the 2015-2019 ACS
Here I create a function that implements the testing procedure used by the Census for comparing estimates across year
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
crit<-qnorm(1-alpha/2)
pval<-2*pnorm(abs(test),lower.tail=F)
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
Here I use the function I just made to do the comparisons
diff1019<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pvac11, est2 = mdat$pvac19, err1 = mdat$pvac_er11, err2=mdat$pvac_er19,alpha = .1, yr1 = 2011, yr2=2019, span = 5)
table(diff1019$result)
##
## insignificant change significant decrease significant increase
## 342 117 43
acs_merge<-left_join(mdat, diff1019, by=c("GEOID"="geoid"))
tmap_mode("plot")
## tmap mode set to plotting
sig11_19 <- tm_shape(acs_merge)+
tm_polygons(c("result"), title=c("Changes"), palette = "Dark2")+
#tm_format("World", title="Change", legend.outside=T)+
tm_layout(title="Dallas Vacancy Rate Change, 2011-2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()
tmap_arrange(map11, map19,sig11_19)