library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(tmap)
v11_Profile <- load_variables(2011, "acs5/profile", cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019, "acs5/profile", cache = TRUE) #demographic
#Search for variables by using grep()
v11_Profile[grep(x = v11_Profile$label, "Vacant", ignore.case = TRUE), c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label, "Vacant", ignore.case = TRUE), c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~
The data profile tables are very useful because they contain lots of pre-calculated variables.
vac11<-get_acs(geography = "tract",
state="TX",
county = "Dallas",
year = 2011,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
vac19<-get_acs(geography = "tract",
state="TX",
county = "Dallas",
year = 2019,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac11 <- vac11%>%
mutate(pvac = DP04_0003PE,
pvac_er = DP04_0003PM/1.645,
pvac_cv =100* (pvac_er/pvac)) %>%
filter(complete.cases(pvac), is.finite(pvac_cv)==T)
vac19 <- vac19%>%
mutate(pvac = DP04_0003PE,
pvac_er = DP04_0003PM/1.645,
pvac_cv =100* (pvac_er/pvac)) %>%
filter(complete.cases(pvac), is.finite(pvac_cv)==T)
vac11M <- tm_shape(vac11)+
tm_polygons(c("pvac"), title=c("% Housing Vacant 2011"), palette="Blues", style="quantile", n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Dallas Vacant Percent Estimates 2011 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass()
vac19M <- tm_shape(vac19)+
tm_polygons(c("pvac"), title=c("% Housing Vacant 2019"), palette="Blues", style="quantile", n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Dallas Vacant Percent Estimates 2019 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass()
tmap_arrange(vac11M, vac19M)
#merge the two years worth of data
mdat<-tigris::geo_join(vac11, as.data.frame(vac19), by_sp="GEOID", by_df="GEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(mdat)
## Simple feature collection with 6 features and 14 fields
## Active geometry column: geometry.x
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -96.80067 ymin: 32.83427 xmax: -96.57525 ymax: 32.94708
## geographic CRS: NAD83
## GEOID NAME.x DP04_0003PE.x
## 1 48113018120 Census Tract 181.20, Dallas County, Texas 10.2
## 2 48113012702 Census Tract 127.02, Dallas County, Texas 5.2
## 3 48113012800 Census Tract 128, Dallas County, Texas 2.9
## 4 48113012900 Census Tract 129, Dallas County, Texas 7.4
## 5 48113013004 Census Tract 130.04, Dallas County, Texas 5.7
## 6 48113019400 Census Tract 194, Dallas County, Texas 6.5
## DP04_0003PM.x pvac.x pvac_er.x pvac_cv.x
## 1 6.4 10.2 3.890578 38.14292
## 2 5.7 5.2 3.465046 66.63549
## 3 3.0 2.9 1.823708 62.88649
## 4 4.9 7.4 2.978723 40.25302
## 5 4.6 5.7 2.796353 49.05882
## 6 4.8 6.5 2.917933 44.89128
## NAME.y DP04_0003PE.y DP04_0003PM.y pvac.y
## 1 Census Tract 181.20, Dallas County, Texas 4.6 4.2 4.6
## 2 Census Tract 127.02, Dallas County, Texas 3.8 3.6 3.8
## 3 Census Tract 128, Dallas County, Texas 1.1 1.4 1.1
## 4 Census Tract 129, Dallas County, Texas 6.4 5.5 6.4
## 5 Census Tract 130.04, Dallas County, Texas 3.2 3.6 3.2
## 6 Census Tract 194, Dallas County, Texas 7.8 5.4 7.8
## pvac_er.y pvac_cv.y rank geometry.x
## 1 2.5531915 55.50416 1 POLYGON ((-96.57525 32.9365...
## 2 2.1884498 57.59079 1 POLYGON ((-96.68648 32.8400...
## 3 0.8510638 77.36944 1 POLYGON ((-96.68664 32.8402...
## 4 3.3434650 52.24164 1 POLYGON ((-96.71915 32.8612...
## 5 2.1884498 68.38906 1 POLYGON ((-96.73348 32.8677...
## 6 3.2826748 42.08557 1 POLYGON ((-96.78691 32.8507...
## geometry.y
## 1 MULTIPOLYGON (((-96.61445 3...
## 2 MULTIPOLYGON (((-96.68511 3...
## 3 MULTIPOLYGON (((-96.70107 3...
## 4 MULTIPOLYGON (((-96.7251 32...
## 5 MULTIPOLYGON (((-96.73332 3...
## 6 MULTIPOLYGON (((-96.80067 3...
Here I create a function that implements the testing procedure used by the Census for comparing estimates across year
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
crit<-qnorm(1-alpha/2)
pval<-2*pnorm(abs(test),lower.tail=F)
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
Here I use the function I just made to do the comparisons
diff1119<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pvac.x, est2 = mdat$pvac.y, err1 = mdat$pvac_er.x, err2=mdat$pvac_er.y,alpha = .1, yr1 = 2011, yr2=2019, span = 5)
head(diff1119)
## name geoid est1 est2 se1
## 1 Census Tract 181.20, Dallas County, Texas 48113018120 10.2 4.6 3.035834
## 2 Census Tract 127.02, Dallas County, Texas 48113012702 5.2 3.8 2.703789
## 3 Census Tract 128, Dallas County, Texas 48113012800 2.9 1.1 1.423047
## 4 Census Tract 129, Dallas County, Texas 48113012900 7.4 6.4 2.324310
## 5 Census Tract 130.04, Dallas County, Texas 48113013004 5.7 3.2 2.182006
## 6 Census Tract 194, Dallas County, Texas 48113019400 6.5 7.8 2.276875
## se2 difference test result pval
## 1 1.9922659 5.6 1.5422018 insignificant change 0.1230246
## 2 1.7076565 1.4 0.4377872 insignificant change 0.6615405
## 3 0.6640886 1.8 1.1462233 insignificant change 0.2517028
## 4 2.6089196 1.0 0.2861950 insignificant change 0.7747287
## 5 1.7076565 2.5 0.9022720 insignificant change 0.3669124
## 6 2.5614847 -1.3 -0.3793238 insignificant change 0.7044474
table(diff1119$result)
##
## insignificant change significant decrease significant increase
## 342 117 43
acs_merge<-left_join(mdat, diff1119, by=c("GEOID"="geoid")) #adding difference info to merged data
diffmap <- tm_shape(acs_merge) +
tm_polygons(col = "difference", style="quantile", n=5, title=c("% Housing Vacant Change"))+
tm_scale_bar()+
tm_layout(title="Dallas Vacant Percent Change from 2011 to 2019 - Quantile Breaks", title.size =1.5, legend.frame = TRUE, title.position = c('right', 'top'))+
tm_compass()
diffmap
## Variable(s) "difference" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.