This task uses R to download American Community Survey summary file tables using the tidycensus
package. The goal of the exercise is to a) find ACS variable for the percent vacant housing units, b)create maps using quantile breaks for the variable, c) calculate the difference between the years ( 2011 and 2019), and d)create a map showing differences by census tract for 2011 and 2019.
Load libraries
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
Check the ACS variable name
v11_Profile <- load_variables(2011,
"acs5/profile",
cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
"acs5/profile",
cache = TRUE) #demographic
Search variables
v11_Profile[grep(x = v11_Profile$label,
"VACANT HOUSING UNITS",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label,
"VACANT HOUSING UNITS",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~
Extract from ACS summary file data profile variable for Harris County, TX census tracts for 2011 and 2019
VAC11<-get_acs(geography = "tract",
state="TX",
county = "201",
year = 2011,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
VAC11 <- VAC11%>%
mutate(pvac = DP04_0003PE,
pvac_er = DP04_0003PM/1.645,
pvac_cv =100* (pvac_er/pvac)) %>%
filter(complete.cases(pvac), is.finite(pvac_cv)==T)%>%
select(GEOID, pvac, pvac_er,pvac_cv)
head(VAC11)
## Simple feature collection with 6 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS: NAD83
## GEOID pvac pvac_er pvac_cv geometry
## 1 48201222100 12.5 3.647416 29.17933 POLYGON ((-95.34437 29.8805...
## 2 48201221400 16.3 3.768997 23.12268 POLYGON ((-95.3931 29.84572...
## 3 48201221300 11.0 4.133739 37.57944 POLYGON ((-95.3618 29.84628...
## 4 48201530500 24.0 3.586626 14.94428 POLYGON ((-95.38838 29.8306...
## 5 48201530400 20.5 4.680851 22.83342 POLYGON ((-95.38838 29.8306...
## 6 48201530300 22.4 4.984802 22.25358 POLYGON ((-95.37563 29.8140...
Break continuous variable into discrete bins by Quantile breaks
tm_shape(VAC11)+
tm_polygons(c("pvac"),
title=c("% of Vacant Units"),
palette="Blues",
style="quantile",
n=4)+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position=c('.02','.09'))+
tm_layout(title="Houston Vacant Housing Rate Estimates,2011",
title.size =1.7,
legend.frame = TRUE,
title.position = c('left', 'bottom'))+
tm_compass()+
tm_format("World",
legend.position = c(".02", ".2"),
main.title.position =c("center"))
Mapping of errors in estimate
p1<-tm_shape(VAC11)+
tm_polygons(c("pvac"),
title=c("% of Vacant Units"),
palette="Blues",
style="quantile",
n=4)+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position= c('.02','.09'))+
tm_layout(title="Houston Vacant Housing Rate Estimates",
title.size =1.7,
legend.frame = TRUE,
title.position = c('left', 'bottom'))+
tm_compass()+
tm_format("World",
legend.position = c(".02", ".2"),
main.title.position =c("center"))
p2<-tm_shape(VAC11)+
tm_polygons(c("pvac_cv"),
title=c("CV Vacant Housing Units"),
palette="Blues",
style="quantile",
n=4)+
tm_layout(title="Houston Vacant Housing Rate CV",
title.size =1.7,
legend.frame = TRUE,
title.position = c('left', 'bottom'))+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.85, position= c('.02','.09'))+
tm_compass()+
tm_format("World",
legend.position = c(".02", ".2"),
main.title.position =c("center"))
tmap_arrange(p1, p2)
plot(VAC11$pvac, VAC11$pvac_cv, main = "Error in Estimates vs Estimate Size")
Get 2019 estimates
vac19<-get_acs(geography = "tract",
state="TX",
county = "201",
year = 2019,
variables="DP04_0003P" ,
geometry = T,
output="wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
# rename variables and filter missing cases
vac19<- vac19%>%
mutate(pvac19 = DP04_0003PE,
pvac19_er = DP04_0003PM/1.645,
pvac19_cv =100* (pvac19_er/pvac19)) %>%
filter(complete.cases(pvac19), is.finite(pvac19_cv)==T)%>%
select(GEOID, pvac19, pvac19_er,pvac19_cv)
head(vac19)
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -95.65378 ymin: 29.70875 xmax: -95.12716 ymax: 30.00429
## Geodetic CRS: NAD83
## GEOID pvac19 pvac19_er pvac19_cv geometry
## 1 48201311900 10.5 3.890578 37.05312 MULTIPOLYGON (((-95.33319 2...
## 2 48201450200 7.2 3.708207 51.50287 MULTIPOLYGON (((-95.59029 2...
## 3 48201450400 27.9 4.924012 17.64879 MULTIPOLYGON (((-95.62377 2...
## 4 48201554502 1.5 1.094225 72.94833 MULTIPOLYGON (((-95.65325 2...
## 5 48201550603 6.4 2.857143 44.64286 MULTIPOLYGON (((-95.48031 2...
## 6 48201252200 15.4 3.100304 20.13184 MULTIPOLYGON (((-95.18517 2...
Merge 2011 and 2019 data
st_geometry(vac19)<-NULL #strip the geometry from the 2019 data
mdat<-left_join(VAC11, vac19, by=c("GEOID"="GEOID"))
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS: NAD83
## GEOID pvac pvac_er pvac_cv pvac19 pvac19_er pvac19_cv
## 1 48201222100 12.5 3.647416 29.17933 9.3 3.221884 34.64392
## 2 48201221400 16.3 3.768997 23.12268 13.4 3.465046 25.85855
## 3 48201221300 11.0 4.133739 37.57944 3.2 2.188450 68.38906
## 4 48201530500 24.0 3.586626 14.94428 8.3 2.492401 30.02893
## 5 48201530400 20.5 4.680851 22.83342 10.6 2.978723 28.10116
## 6 48201530300 22.4 4.984802 22.25358 21.9 5.167173 23.59440
## geometry
## 1 POLYGON ((-95.34437 29.8805...
## 2 POLYGON ((-95.3931 29.84572...
## 3 POLYGON ((-95.3618 29.84628...
## 4 POLYGON ((-95.38838 29.8306...
## 5 POLYGON ((-95.38838 29.8306...
## 6 POLYGON ((-95.37563 29.8140...
Test for 2011 and 2019 estimates
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
crit<-qnorm(1-alpha/2)
pval<-1-pnorm(abs(test))
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
A similar function significance()
from the tidycensus
package does a similar thing, but with less output.
significance(est1=mdat$pvac,
est2=mdat$pvac19,
moe1=mdat$pvac_er,
moe2 = mdat$pvac19_er,
clevel = .9)
mdat$signif<- significance(est1=mdat$pvac,
est2=mdat$pvac19,
moe1=mdat$pvac_er,
moe2 = mdat$pvac19_er,
clevel = .9)
diff1119<-acstest(names = mdat$GEOID,
geoid = mdat$GEOID,
est1 = mdat$pvac,
est2 = mdat$pvac19,
err1 = mdat$pvac_er,
err2=mdat$pvac19_er,
alpha = .1,
yr1 = 2011, yr2=2019,
span = 5)
head(diff1119)
## name geoid est1 est2 se1 se2 difference test
## 1 48201222100 48201222100 12.5 9.3 2.846094 2.514050 3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789 2.9 0.7259137
## 3 48201221300 48201221300 11.0 3.2 3.225573 1.707656 7.8 2.1371536
## 4 48201530500 48201530500 24.0 8.3 2.798659 1.944831 15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310 9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967 0.5 0.0892484
## result pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01
table(diff1119$result)
##
## insignificant change significant decrease significant increase
## 393 257 101
Compare to tidycensus
table(mdat$signif)
##
## FALSE TRUE
## 393 358
Make a map layout
acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid"))
tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
tm_polygons(c("pvac"),
title=c("% of Vacant Units"),
palette="Blues",
style="quantile",
n=4)+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+
tm_layout(title="Houston Vacant Housing Rate Estimates 2011",
title.size =1.1,
legend.frame = FALSE,
title.position = c('left', 'bottom'))+ tm_compass(position=c('.9','.1'))+tm_credits("Source: American Community Survey", size=.7, position=c('.02','.08'))+
tm_format("World",
legend.position = c(".02", ".2"),legend.title.size=.9,main.title.position =c("center"))
p2<-tm_shape(acs_merge)+
tm_polygons(c("pvac19"),
title=c("% of Vacant Units"),
palette="Blues",
style="quantile",
n=4)+
tm_layout(title="Houston Vacant Housing Rate Estimate 2019",
title.size =1.1,
legend.frame = FALSE,
title.position = c('left', 'bottom'))+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+
tm_compass(position=c('.9','.1'))+tm_credits("Source: American Community Survey", size=.7,position=c('.02','.08'))+
tm_format("World",
legend.position = c(".02", ".2"),legend.title.size=.9,
main.title.position =c("center"))
p3 <- tm_shape(acs_merge)+
tm_polygons(c("result"),
title=c("Changes"),
palette = "Set3")+
tm_layout(title="Houston Vacant Housing Rate Estimate Changes (2011-2019)",
title.size =1.1,
legend.frame = FALSE,
title.position = c('left', 'bottom'))+
tm_scale_bar(breaks=c(0,5,10,15,20), size=.7, position=c('.02','.11'))+tm_credits("Source: American Community Survey", size=.7, position=c('.02','.08'))+
tm_compass()+tm_format("World",legend.position = c(".02", ".2"),legend.title.size=.9,main.title.position =c("center"))
tmap_arrange(p1, p2, p3, nrow=2)