library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
You should always check the variable name of your ACS variable, you cannot assume that the variable name stays the same across years.
v11_Profile <- load_variables(2011,
"acs5/profile",
cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
"acs5/profile",
cache = TRUE) #demographic
#Search for variables by using grep()
v11_Profile[grep(x = v11_Profile$label,
"VACANT",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label,
"VACANT",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~
vac11<-get_acs(geography = "tract",
state="TX",
county = "Harris County",
year = 2011,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac11 <- vac11%>%
mutate(pvac = DP04_0003PE,
pvac_er = DP04_0003PM/1.645,
pvac_cv =100* (pvac_er/pvac)) %>%
filter(complete.cases(pvac), is.finite(pvac_cv)==T)%>%
select(GEOID, pvac, pvac_er,pvac_cv)
vac19<-get_acs(geography = "tract",
state="TX",
county = "Harris County",
year = 2019,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac19 <- vac19%>%
mutate(pvac19 = DP04_0003PE,
pvac19_er = DP04_0003PM/1.645,
pvac19_cv =100* (pvac19_er/pvac19)) %>%
filter(complete.cases(pvac19), is.finite(pvac19_cv)==T)%>%
select(GEOID, pvac19, pvac19_er,pvac19_cv)
head(vac11)
head(vac19)
#merge the two years worth of data
st_geometry(vac19)<-NULL #strip the geometry from the 2019 data
mdat<-left_join(vac11, vac19, by=c("GEOID"="GEOID"))
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS: NAD83
## GEOID pvac pvac_er pvac_cv pvac19 pvac19_er pvac19_cv
## 1 48201222100 12.5 3.647416 29.17933 9.3 3.221884 34.64392
## 2 48201221400 16.3 3.768997 23.12268 13.4 3.465046 25.85855
## 3 48201221300 11.0 4.133739 37.57944 3.2 2.188450 68.38906
## 4 48201530500 24.0 3.586626 14.94428 8.3 2.492401 30.02893
## 5 48201530400 20.5 4.680851 22.83342 10.6 2.978723 28.10116
## 6 48201530300 22.4 4.984802 22.25358 21.9 5.167173 23.59440
## geometry
## 1 POLYGON ((-95.34437 29.8805...
## 2 POLYGON ((-95.3931 29.84572...
## 3 POLYGON ((-95.3618 29.84628...
## 4 POLYGON ((-95.38838 29.8306...
## 5 POLYGON ((-95.38838 29.8306...
## 6 POLYGON ((-95.37563 29.8140...
Here I create a function that implements the testing procedure used by the Census for comparing estimates across year. The test for two estimates is:
\[\text{z = }\left| \frac{est_1 - est_2}{(1-C)*\sqrt{se_1^2 + se_2^2}}\right|\]
Where \(C\) is the number of years overlap in the estimates.
Here is my function:
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
crit<-qnorm(1-alpha/2)
pval<-1-pnorm(abs(test))
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
diff1119<-acstest(names = mdat$GEOID,
geoid = mdat$GEOID,
est1 = mdat$pvac,
est2 = mdat$pvac19,
err1 = mdat$pvac_er,
err2=mdat$pvac19_er,
alpha = .1,
yr1 = 2011, yr2=2019,
span = 5)
head(diff1119)
## name geoid est1 est2 se1 se2 difference test
## 1 48201222100 48201222100 12.5 9.3 2.846094 2.514050 3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789 2.9 0.7259137
## 3 48201221300 48201221300 11.0 3.2 3.225573 1.707656 7.8 2.1371536
## 4 48201530500 48201530500 24.0 8.3 2.798659 1.944831 15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310 9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967 0.5 0.0892484
## result pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01
table(diff1119$result)
##
## insignificant change significant decrease significant increase
## 393 257 101
acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid"))
tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
tm_polygons(c("pvac"),
title=c("% Vacant Housing Units 2011"),
palette="Blues",
style="quantile",
n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Houston Vacant Housing Units Estimates 2011",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
p2<-tm_shape(acs_merge)+
tm_polygons(c("pvac19"),
title=c("% Vacant Housing Units 2019"),
palette="Blues",
style="quantile",
n=5)+
#tm_format("World", title="Houston Poverty Rate CV", legend.outside=T)+
tm_layout(title="Houston Vacant Housing Units Estimate 2019",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
p3 <- tm_shape(acs_merge)+
tm_polygons(c("result"),
title=c("Changes"),
palette = "Set2")+
#tm_format("World", title="Houston Poverty Rate CV", legend.outside=T)+
tm_layout(title="Houston Vacant Housing Units Estimate Changes",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()
tmap_arrange(p1, p2, p3, nrow=2)