This example will use R to downloard American Coummunity Survey summary file tables using the tidycensus
package. The goal of this example is to illustrate how to download data from the Census API using R and to illustrate how to create basic descriptive maps of attributes.
The example will use data from San Antonio, Texas from the 2015 American Community Survey summary file.
Obtain one at http://api.census.gov/data/key_signup.html
use census_api_key(key = "yourkeyhere", install = T)
one time to install your key for use in tidycensus
library(tidycensus); library(tidyverse); library(sf)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1.9000 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.5
## v tidyr 0.8.0 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
v15_Profile <- load_variables(2015, "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#
View(v15_Profile)
#Search for variables by
v15_Profile[grep(x = v15_Profile$label, "HighSchool"), c("name", "label")]
## # A tibble: 0 x 2
## # ... with 2 variables: name <chr>, label <chr>
v15_Profile[grep(x = v15_Profile$label, "Built 2000 to 2009"), c("name", "label")]
## # A tibble: 4 x 2
## name label
## <chr> <chr>
## 1 DP04_0019E YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 2 DP04_0019M YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 3 DP04_0019PE YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
## 4 DP04_0019PM YEAR STRUCTURE BUILT!!Total housing units!!Built 2000 to 20~
The data profile tables are very useful because they contain lots of pre-calculated variables.
sa_acs<-get_acs(geography = "tract", state="TX", county = c("Bexar"), year = 2015,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE", "DP05_0072PE", "DP02_0113PE") ,
summary_var = "B01001_001",
geometry = T, output = "wide")
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
## Using the ACS Data Profile
#create a county FIPS code - 5 digit
sa_acs$county<-substr(sa_acs$GEOID, 1, 5)
#rename variables and filter missing cases
sa_acs2<-sa_acs%>%
mutate(totpop= DP05_0001E,tpop_cv=(DP05_0001M/1.645)/DP05_0001E ,
phsormore=DP02_0066PE,phsormore_cv=(DP02_0066PM/1.645)/DP02_0066PE, medhhinc=DP03_0062E,ppov=DP03_0119PE ) %>%
# st_transform(crs = 102740)%>%
filter(complete.cases(totpop, phsormore, medhhinc))
#(acs.sub$medhhinc_ME/1.645) / acs.sub$medhhinc) * 100
class(sa_acs2)
#change the directory
sf::st_write(sa_acs2,dsn="C:/Users/ozd504/Google Drive/classes/dem7093/GIS_class_2018/data",layer="sa_tract_dp", driver="ESRI Shapefile", delete_layer=T, update=T)
Here I generate a quantile break for the coefficient of variation in census tract population estimates
library(classInt)
library(dplyr)
cv_map<-sa_acs2 %>%
mutate(cv_cut=cut(tpop_cv,breaks = quantile(tpop_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T),
ed_cut=cut(phsormore_cv,breaks = quantile(phsormore_cv, na.rm=T, p=seq(0,1,length.out = 6)),include.lowest = T) )
library(ggsn)
p1<-ggplot(cv_map, aes(fill = cv_cut, color = cv_cut)) +
geom_sf() +
ggtitle("Coefficent of Variation in Population Estimate",
subtitle = "Bexar County Texas, 2015 ACS")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(cv_map)+
scalebar(cv_map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
p1
p2<-ggplot(cv_map, aes(fill = ed_cut, color = ed_cut)) +
geom_sf() +
ggtitle("Coefficent of Variation in Estimate of % Greater than High School",
subtitle = "Bexar County Texas, 2015 ACS")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(cv_map)+
scalebar(cv_map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
p2
When we have data that are collected over time on the same geographies, we may be interested in whether the variable we’re mapping has changed much over time.
In the ACS, we can compare two estimates if the years used to produce the esimates do not overlap. For instance, we could compare the 2005-2009 estimates to the 2011-2015 estimates, but we could not compare the 2010-2014 to the 2011-2015, becuase they share years of data. See this for the official position on the subject.
There is a fix that will let us do this. see below
Here we take the poverty rate in tracts derived from the 2007-2011 ACS and compare it to the estimate from the 2011-2015 ACS
#v11_Profile <- load_variables(2011 ,dataset = "acs5") #demographic profile
options(tigris_use_cache = TRUE)
#2009 ACS
acs_11<-get_acs(geography = "tract", state="TX", county = "029", year = 2011, variables="B17001_002",summary_var = "B17001_001", geometry = TRUE, output = "wide")
## Getting data from the 2007-2011 5-year ACS
#2015 ACS
acs_15<-get_acs(geography = "tract", state="TX", county = "Bexar", year = 2015, variables="B17001_002",summary_var = "B17001_001",geometry = TRUE, output = "wide")
## Getting data from the 2011-2015 5-year ACS
#create variables with nice names
acs_11<-acs_11%>%
mutate(pov11=B17001_002E/summary_est, poverr11=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
select(GEOID, NAME, pov11, poverr11)
#create variables with nice names
acs_15<-acs_15%>%
mutate(pov15=B17001_002E/summary_est, poverr15=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
select(GEOID, NAME, pov15, poverr15)
#merge the two years worth of data
mdat<-left_join(acs_11, as.data.frame(acs_15), by="GEOID", st_join=FALSE)
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Active geometry column: geometry.x
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -98.56778 ymin: 29.32196 xmax: -98.41912 ymax: 29.44661
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## GEOID NAME.x pov11
## 1 48029170401 Census Tract 1704.01, Bexar County, Texas 0.4730151
## 2 48029170402 Census Tract 1704.02, Bexar County, Texas 0.2710789
## 3 48029160702 Census Tract 1607.02, Bexar County, Texas 0.3448651
## 4 48029160200 Census Tract 1602, Bexar County, Texas 0.1369106
## 5 48029141600 Census Tract 1416, Bexar County, Texas 0.1108108
## 6 48029141200 Census Tract 1412, Bexar County, Texas 0.1447313
## poverr11 NAME.y pov15
## 1 0.09238926 Census Tract 1704.01, Bexar County, Texas 0.5043601
## 2 0.06925421 Census Tract 1704.02, Bexar County, Texas 0.2909091
## 3 0.11298489 Census Tract 1607.02, Bexar County, Texas 0.3492222
## 4 0.08311340 Census Tract 1602, Bexar County, Texas 0.1575614
## 5 0.06370955 Census Tract 1416, Bexar County, Texas 0.1166735
## 6 0.07850766 Census Tract 1412, Bexar County, Texas 0.3406908
## poverr15 geometry.x geometry.y
## 1 0.09935877 POLYGON ((-98.5251 29.44358... MULTIPOLYGON (((-98.54106 2...
## 2 0.08782205 POLYGON ((-98.53594 29.4405... MULTIPOLYGON (((-98.54196 2...
## 3 0.07324105 POLYGON ((-98.55955 29.3849... MULTIPOLYGON (((-98.56773 2...
## 4 0.06412144 POLYGON ((-98.51153 29.3992... MULTIPOLYGON (((-98.52883 2...
## 5 0.05433554 POLYGON ((-98.43569 29.3351... MULTIPOLYGON (((-98.45349 2...
## 6 0.14070293 POLYGON ((-98.44832 29.3826... MULTIPOLYGON (((-98.44833 2...
## geometry
## 1 GEOMETRYCOLLECTION EMPTY
## 2 GEOMETRYCOLLECTION EMPTY
## 3 GEOMETRYCOLLECTION EMPTY
## 4 GEOMETRYCOLLECTION EMPTY
## 5 GEOMETRYCOLLECTION EMPTY
## 6 GEOMETRYCOLLECTION EMPTY
Here I create a function that implements the testing procedure used by the Census for comparing estimates across year
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
crit<-qnorm(1-alpha/2)
pval<-2*pnorm(abs(test),lower.tail=F)
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test < 0]<- "significant increase"
result[pval < alpha & test > 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
Here I use the function I just made to do the comparisons
diff0915<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pov11, est2 = mdat$pov15, err1 = mdat$poverr11, err2=mdat$poverr15, alpha = .1, yr1 = 2011, yr2=2015, span = 5)
head(diff0915)
## name geoid est1
## 1 Census Tract 1704.01, Bexar County, Texas 48029170401 0.4730151
## 2 Census Tract 1704.02, Bexar County, Texas 48029170402 0.2710789
## 3 Census Tract 1607.02, Bexar County, Texas 48029160702 0.3448651
## 4 Census Tract 1602, Bexar County, Texas 48029160200 0.1369106
## 5 Census Tract 1416, Bexar County, Texas 48029141600 0.1108108
## 6 Census Tract 1412, Bexar County, Texas 48029141200 0.1447313
## est2 se1 se2 difference test
## 1 0.5043601 0.07209172 0.07753006 -0.031345011 -0.36261649
## 2 0.2909091 0.05403935 0.06852791 -0.019830215 -0.27829175
## 3 0.3492222 0.08816258 0.05715029 -0.004357117 -0.05079068
## 4 0.1575614 0.06485373 0.05003422 -0.020650793 -0.30877316
## 5 0.1166735 0.04971283 0.04239825 -0.005862667 -0.10989527
## 6 0.3406908 0.06125985 0.10979108 -0.195959568 -1.90892748
## result pval
## 1 insignificant change 0.71689139
## 2 insignificant change 0.78078841
## 3 insignificant change 0.95949232
## 4 insignificant change 0.75749409
## 5 insignificant change 0.91249244
## 6 significant increase 0.05627145
options(scipen=999)
acs_merge<-left_join(sa_acs, diff0915, by=c("GEOID"="geoid"))
## Warning: Column `GEOID`/`geoid` joining character vector and factor,
## coercing into character vector
brks<-quantile(c(acs_11$pov11, acs_15$pov15), probs = seq(0,1,.2), na.rm=T)
acs_11map<-acs_11%>%
mutate(pov_q = cut(pov11,breaks = brks, include.lowest = T))
p1<-ggplot(acs_11map, aes( fill=pov_q, color=pov_q))+
geom_sf() +
ggtitle("Poverty Rate 2011 ",
subtitle = "Bexar County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(acs_11map)+
scalebar(acs_11map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
acs_15map<-acs_15%>%
mutate(pov_q = cut(pov15,breaks = brks, include.lowest = T))
p2<-ggplot(acs_15map, aes( fill=pov_q, color=pov_q))+
geom_sf() +
ggtitle("Poverty Rate 2015 ",
subtitle = "Bexar County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(acs_15map)+
scalebar(acs_15map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
myfil<-c("grey", "red", "blue")
p3<-ggplot(acs_merge, aes( fill=result))+
geom_sf() +
ggtitle("Comparison of Poverty Rate 2011 to 2015",
subtitle = "Bexar County Texas")+
#scale_fill_brewer(palette = "Accent") +
#scale_color_brewer(palette = "Accent")+
scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(acs_merge)+
scalebar(acs_merge, dist = 5, dd2km =T, model="GRS80", st.size = 2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
out<-grid.arrange(p1, p2, p3, nrow = 2)
out
## TableGrob (2 x 2) "arrange": 3 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]