This example will use R to download American Community Survey summary file tables using the tidycensus package. The goal of this example is to illustrate how to download data from the Census API using R, how to create basic descriptive maps of attributes and how to construct a change map between two time periods.
The example will use data from Harris County, Texas from the American Community Survey summary file.
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
census_api_key(key = "d42eebdfb8a5be15b37eb8cef2a3abc37a71f12b", install = T, overwrite = T)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY").
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "d42eebdfb8a5be15b37eb8cef2a3abc37a71f12b"
You should always check the variable name of your ACS variable, you cannot assume that the variable name stays the same across years.
v11_Profile <- load_variables(2011,
"acs5/profile",
cache = TRUE) #demographic profile tables
v19_Profile <- load_variables(2019,
"acs5/profile",
cache = TRUE) #demographic
#Search for variables by using grep()
v11_Profile[grep(x = v11_Profile$label,
"vACANT HOUSING",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Vacant housing units
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Vacant housing units
v19_Profile[grep(x = v19_Profile$label,
"vACANT HOUSING",
ignore.case = TRUE),
c("name", "label")]
## # A tibble: 2 x 2
## name label
## <chr> <chr>
## 1 DP04_0003 Estimate!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing u~
## 2 DP04_0003P Percent!!HOUSING OCCUPANCY!!Total housing units!!Vacant housing un~
vac_hous11 = get_acs(geography = "tract",
state="TX",
county = "Harris County",
year = 2011,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac_hous11 = vac_hous11 %>%
mutate(pvac_hous = DP04_0003PE,
pvac_hous_er = DP04_0003PM/1.645,
ppvac_hous_cv =100* (pvac_hous_er/pvac_hous)) %>%
filter(complete.cases(pvac_hous), is.finite(ppvac_hous_cv)==T)%>%
select(GEOID, pvac_hous, pvac_hous_er,ppvac_hous_cv)
head(vac_hous11)
tm_shape(vac_hous11)+
tm_polygons(c("pvac_hous"),
title=c("% Vacant Housing Units"),
palette="Blues",
style="quantile",
n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Vacant Housing Units Rate Estimates, Harris County, Texas - Quantile Breaks",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
I compared vacant housing rates in Harris County tracts derived from the 2007-2011 ACS and compare it to the estimate from the 2015-2019 ACS.
# get the 2019 estimates
vac_hous19 = get_acs(geography = "tract",
state="TX",
county = "Harris County",
year = 2019,
variables="DP04_0003P" ,
geometry = T,
output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
#rename variables and filter missing cases
vac_hous19 = vac_hous19 %>%
mutate(pvac_hous19 = DP04_0003PE,
pvac_hous19_er = DP04_0003PM/1.645,
ppvac_hous19_cv =100* (pvac_hous19_er/pvac_hous19)) %>%
filter(complete.cases(pvac_hous19), is.finite(ppvac_hous19_cv)==T)%>%
select(GEOID, pvac_hous19, pvac_hous19_er,ppvac_hous19_cv)
head(vac_hous19)
#merge the two years worth of data
st_geometry(vac_hous19) = NULL #strip the geometry from the 2019 data
mdat = left_join(vac_hous11, vac_hous19, by=c("GEOID"="GEOID"))
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -95.40302 ymin: 29.8129 xmax: -95.3233 ymax: 29.8869
## Geodetic CRS: NAD83
## GEOID pvac_hous pvac_hous_er ppvac_hous_cv pvac_hous19 pvac_hous19_er
## 1 48201222100 12.5 3.647416 29.17933 9.3 3.221884
## 2 48201221400 16.3 3.768997 23.12268 13.4 3.465046
## 3 48201221300 11.0 4.133739 37.57944 3.2 2.188450
## 4 48201530500 24.0 3.586626 14.94428 8.3 2.492401
## 5 48201530400 20.5 4.680851 22.83342 10.6 2.978723
## 6 48201530300 22.4 4.984802 22.25358 21.9 5.167173
## ppvac_hous19_cv geometry
## 1 34.64392 POLYGON ((-95.34437 29.8805...
## 2 25.85855 POLYGON ((-95.3931 29.84572...
## 3 68.38906 POLYGON ((-95.3618 29.84628...
## 4 30.02893 POLYGON ((-95.38838 29.8306...
## 5 28.10116 POLYGON ((-95.38838 29.8306...
## 6 23.59440 POLYGON ((-95.37563 29.8140...
I use Dr. Sparks’ brilliant function for testing the significance of the differences between vacant housing rates in Harris county between 2011 and 2019:
acstest<- function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt(se1^2+se2^2))
crit<-qnorm(1-alpha/2)
pval<-1-pnorm(abs(test))
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
mdat$signif<- significance(est1=mdat$pvac_hous,
est2=mdat$pvac_hous19,
moe1=mdat$pvac_hous_er,
moe2 = mdat$pvac_hous19_er,
clevel = .9)
diff11_19 = acstest(names = mdat$GEOID,
geoid = mdat$GEOID,
est1 = mdat$pvac_hous,
est2 = mdat$pvac_hous19,
err1 = mdat$pvac_hous_er,
err2=mdat$pvac_hous19_er,
alpha = .1,
yr1 = 2011, yr2=2019,
span = 5)
head(diff11_19)
## name geoid est1 est2 se1 se2 difference test
## 1 48201222100 48201222100 12.5 9.3 2.846094 2.514050 3.2 0.8426686
## 2 48201221400 48201221400 16.3 13.4 2.940964 2.703789 2.9 0.7259137
## 3 48201221300 48201221300 11.0 3.2 3.225573 1.707656 7.8 2.1371536
## 4 48201530500 48201530500 24.0 8.3 2.798659 1.944831 15.7 4.6067287
## 5 48201530400 48201530400 20.5 10.6 3.652487 2.324310 9.9 2.2867289
## 6 48201530300 48201530300 22.4 21.9 3.889662 4.031967 0.5 0.0892484
## result pval
## 1 insignificant change 1.997069e-01
## 2 insignificant change 2.339458e-01
## 3 significant decrease 1.629275e-02
## 4 significant decrease 2.045265e-06
## 5 significant decrease 1.110583e-02
## 6 insignificant change 4.644422e-01
table(diff11_19$result)
##
## insignificant change significant decrease significant increase
## 393 257 101
acs_merge<-left_join(mdat, diff11_19, by=c("GEOID"="geoid"))
tmap_mode("plot")
## tmap mode set to plotting
p1<-tm_shape(acs_merge)+
tm_polygons(c("pvac_hous"),
title=c("% of Vacant Housing Units 2011"),
palette="Blues",
style="quantile",
n=5)+
tm_scale_bar()+
tm_layout(title="Vacant Housing Units Rate Estimates, Harris County 2011",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
p2<-tm_shape(acs_merge)+
tm_polygons(c("pvac_hous19"),
title=c("% of Vacant Housing Units 2019"),
palette="Blues",
style="quantile",
n=5)+
tm_layout(title="Vacant Housing Units Rate Estimates, Harris County 2019",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
p3 <- tm_shape(acs_merge)+
tm_polygons(c("result"),
title=c("Changes"),
palette = "Set2")+
tm_layout(title="Vacant Housing Units Rate Estimates Change",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_scale_bar()+
tm_compass()
tmap_arrange(p1, p2, p3, nrow=2)