options(Ncores = 6)
library(tidycensus)
library(tidyverse)
library(sf)
library(tmap)
v10_Profile <- load_variables(2010, "acs5/profile", cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019, "acs5/profile", cache = TRUE) #demographic profile tables
#Search for variables by using grep()
v10_Profile[grep(x = v10_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")] #DP04_0003P
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label, "VACANT", ignore.case = TRUE), c("name", "label")] #DP04_0003P
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~
# pull 2010 ACS data for Harris County
vac10<-get_acs(geography = "tract",
state = "TX",
county = "Harris County",
year = 2010,
variables = "DP04_0003P",
geometry = T,
output = "wide")
#rename variables and filter missing cases
vac10 <- vac10 %>%
mutate(pvac = DP04_0003PE,
pvac_er = DP04_0003PM/1.645,
pvac_cv = 100*(pvac_er/pvac)) %>%
filter(complete.cases(pvac), is.finite(pvac_cv)==T) %>%
select(GEOID, pvac, pvac_er, pvac_cv)
head(vac10)
# get the 2019 estimates
vac19<-get_acs(geography = "tract",
state = "TX",
county = "Harris County",
year = 2019,
variables = "DP04_0003P",
geometry = T,
output = "wide")
#rename variables and filter missing cases
vac19<- vac19%>%
mutate(pvac19 = DP04_0003PE,
pvac19_er = DP04_0003PM/1.645,
pvac19_cv = 100*(pvac19_er/pvac19)) %>%
filter(complete.cases(pvac19), is.finite(pvac19_cv)==T)%>%
select(GEOID, pvac19, pvac19_er, pvac19_cv)
head(vac19)
#Merge 2010 and 2019 data
st_geometry(vac19) <- NULL
mdat <- left_join(vac10, vac19, by = c("GEOID"="GEOID"))
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -95.37805 ymin: 29.74486 xmax: -95.29166 ymax: 29.8197
## Geodetic CRS: NAD83
## GEOID pvac pvac_er pvac_cv pvac19 pvac19_er pvac19_cv
## 1 48201100000 32.8 4.255319 12.97353 21.6 3.100304 14.35326
## 2 48201210900 5.5 3.890578 70.73777 21.1 5.167173 24.48897
## 3 48201211000 26.0 6.443769 24.78373 18.3 4.559271 24.91405
## 4 48201211600 9.9 3.586626 36.22855 12.5 2.674772 21.39818
## 5 48201211900 15.8 3.647416 23.08491 7.8 2.978723 38.18876
## 6 48201220200 11.3 5.167173 45.72720 17.4 3.829787 22.01027
## geometry
## 1 MULTIPOLYGON (((-95.37348 2...
## 2 MULTIPOLYGON (((-95.32851 2...
## 3 MULTIPOLYGON (((-95.324 29....
## 4 MULTIPOLYGON (((-95.3159 29...
## 5 MULTIPOLYGON (((-95.3032 29...
## 6 MULTIPOLYGON (((-95.35169 2...
#Function created by C Sparks to get differences in estimates between two years and test for significant change
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff <- (est1-est2)
test <- (est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2))) #this is a t-test
crit <- qnorm(1-alpha/2)
pval <- 1-pnorm(abs(test)) #2*pnorm(abs(test),lower.tail=F)
result <- NULL
result[pval > alpha] <- "insignificant change"
result[pval < alpha & test < 0] <- "significant increase"
result[pval < alpha & test > 0] <- "significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
Run the function above to get comparison of percent housing units vacant and test for significant changes
diff1019 <- acstest(names = mdat$GEOID,
geoid = mdat$GEOID,
est1 = mdat$pvac,
est2 = mdat$pvac19,
err1 = mdat$pvac_er,
err2 = mdat$pvac19_er,
alpha = .1,
yr1 = 2010, yr2=2019,
span = 5)
head(diff1019)
## name geoid est1 est2 se1 se2 difference test
## 1 48201100000 48201100000 32.8 21.6 3.320443 2.419180 11.2 2.726217
## 2 48201210900 48201210900 5.5 21.1 3.035834 4.031967 -15.6 -3.090895
## 3 48201211000 48201211000 26.0 18.3 5.028100 3.557618 7.7 1.250118
## 4 48201211600 48201211600 9.9 12.5 2.798659 2.087136 -2.6 -0.744725
## 5 48201211900 48201211900 15.8 7.8 2.846094 2.324310 8.0 2.177109
## 6 48201220200 48201220200 11.3 17.4 4.031967 2.988399 -6.1 -1.215456
## result pval
## 1 significant decrease 0.0032032428
## 2 significant increase 0.0009977724
## 3 insignificant change 0.1056282015
## 4 insignificant change 0.2282189992
## 5 significant decrease 0.0147362201
## 6 insignificant change 0.1120961479
table(diff1019$result)
##
## insignificant change significant decrease significant increase
## 396 257 94
acs_merge<-left_join(mdat, diff1019, by=c("GEOID"="geoid"))
tmap_mode("plot")
p1<-tm_shape(acs_merge)+
tm_polygons(c("pvac"), title=c("% Housing Units Vacant 2010"), palette="Blues", style="quantile", n=5)+
tm_scale_bar()+
tm_layout(title="Houston Estimated Percent Housing Units Vacant 2010", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass() +
tm_format("World",
legend.position = c("left", "bottom"),
legend.frame = FALSE,
legend.title.size = 1.3,
legend.text.size = 1,
main.title.position =c("center"))
p2<-tm_shape(acs_merge)+
tm_polygons(c("pvac19"), title=c("% Housing Units Vacant 2019"), palette="Blues", style="quantile", n=5)+
tm_layout(title="Houston Estimated Percent Housing Units Vacant 2019", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass() +
tm_format("World",
legend.position = c("left", "bottom"),
legend.frame = FALSE,
legend.title.size = 1.3,
legend.text.size = 1,
main.title.position =c("center"))
p3 <- tm_shape(acs_merge)+
tm_polygons(c("result"), title=c("Change from 2010 to 2019"), palette = "Set2")+
tm_layout(title="Houston Vacant Housing Units Estimate Changes", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass() +
tm_format("World",
legend.position = c("left", "bottom"),
legend.frame = FALSE,
legend.title.size = 1.3,
legend.text.size = 1,
main.title.position =c("center"))
tmap_arrange(p1, p2, p3)