library(tidycensus); library(tidyverse); library(sf)
## ── Attaching packages ────────────────────── tidyverse 1.2.1.9000 ──
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.5
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
v15_Profile <- load_variables(2015, "acs5/profile", cache = TRUE) #demographic profile tables
v15_tables <- load_variables(2015 , "acs5", cache = TRUE) #all tables
#v10_sf1_tables <- load_variables(2010 , "sf1", cache = TRUE) #all tables for 2010 SF1
#v10_sf1_tables <- load_variables(2000 , "sf3", cache = TRUE) #all tables for 2000 SF#
#View(v15_Profile)
#Search for variables by
#v15_Profile[grep(x = v15_Profile$label, "B17001_002"), c("name", "label")]
#v15_Profile[grep(x = v15_Profile$label, "Built 2000 to 2009"), c("name", "label")]
Here we take the poverty rate in tracts derived from the 2006-2010 ACS and compare it to the estimate from the 2011-2015 ACS
library(classInt)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source'))`
library(dplyr)
library(ggplot2)
#v11_Profile <- load_variables(2011 ,dataset = "acs5") #demographic profile
options(tigris_use_cache = TRUE)
#2015 ACS
webb_acs_15<-get_acs(geography = "tract", state="48", county = "479", year = 2015, variables="B17001_002",summary_var = "B17001_001", geometry = TRUE, output = "wide")
## Getting data from the 2011-2015 5-year ACS
#2010 ACS
webb_acs_10<-get_acs(geography = "tract", state="TX", county = "479", year = 2010, variables="B17001_002",summary_var = "B17001_001", geometry = TRUE, output = "wide")
## Getting data from the 2006-2010 5-year ACS
#create variables with nice names
acs_15<-webb_acs_15%>%
mutate(pov15=B17001_002E/summary_est, poverr15=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
select(GEOID, NAME, pov15, poverr15)
acs_10<-webb_acs_10%>%
mutate(pov10=B17001_002E/summary_est, poverr10=moe_prop(B17001_002E, denom = summary_est, moe_num = B17001_002M, moe_denom = summary_moe))%>%
select(GEOID, NAME, pov10, poverr10)
#merge the two years worth of data
mdat<-left_join(acs_10, as.data.frame(acs_15), by="GEOID", st_join=FALSE)
head(mdat)
## Simple feature collection with 6 features and 7 fields
## Active geometry column: geometry.x
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -99.50279 ymin: 27.47402 xmax: -99.44769 ymax: 27.53058
## epsg (SRID): 4269
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## GEOID NAME.x pov10 poverr10
## 1 48479000106 Census Tract 1.06, Webb County, Texas 0.3328264 0.1250222
## 2 48479000107 Census Tract 1.07, Webb County, Texas 0.6368375 0.1591927
## 3 48479000300 Census Tract 3, Webb County, Texas 0.3618396 0.1179660
## 4 48479000903 Census Tract 9.03, Webb County, Texas 0.5636975 0.1155601
## 5 48479001003 Census Tract 10.03, Webb County, Texas 0.3406822 0.1925951
## 6 48479001103 Census Tract 11.03, Webb County, Texas 0.3965812 0.1662029
## NAME.y pov15 poverr15
## 1 Census Tract 1.06, Webb County, Texas 0.5017866 0.12427429
## 2 Census Tract 1.07, Webb County, Texas 0.4048738 0.08696312
## 3 Census Tract 3, Webb County, Texas 0.7001698 0.13228184
## 4 Census Tract 9.03, Webb County, Texas 0.5237918 0.14829979
## 5 Census Tract 10.03, Webb County, Texas 0.3663872 0.15673194
## 6 Census Tract 11.03, Webb County, Texas 0.3020908 0.14394579
## geometry.x geometry.y
## 1 MULTIPOLYGON (((-99.47155 2... MULTIPOLYGON (((-99.4717 27...
## 2 MULTIPOLYGON (((-99.46845 2... MULTIPOLYGON (((-99.47458 2...
## 3 MULTIPOLYGON (((-99.49455 2... MULTIPOLYGON (((-99.50269 2...
## 4 MULTIPOLYGON (((-99.48655 2... MULTIPOLYGON (((-99.49271 2...
## 5 MULTIPOLYGON (((-99.45719 2... MULTIPOLYGON (((-99.46422 2...
## 6 MULTIPOLYGON (((-99.46285 2... MULTIPOLYGON (((-99.46799 2...
## geometry
## 1 GEOMETRYCOLLECTION EMPTY
## 2 GEOMETRYCOLLECTION EMPTY
## 3 GEOMETRYCOLLECTION EMPTY
## 4 GEOMETRYCOLLECTION EMPTY
## 5 GEOMETRYCOLLECTION EMPTY
## 6 GEOMETRYCOLLECTION EMPTY
Here I create a function that implements the testing procedure used by the Census for comparing estimates across year
acstest<-function(names,geoid, est1, err1, est2, err2, alpha, yr1, yr2, span){
se1<-err1/qnorm(.90)
se2<-err2/qnorm(.90)
yrs1<-seq(yr1, to=yr1-span)
yrs2<-seq(yr2, to=yr2-span)
C<-mean(yrs2%in%yrs1)
diff<- (est1-est2)
test<-(est1-est2) / (sqrt(1-C)*sqrt((se1^2+se2^2)))
crit<-qnorm(1-alpha/2)
pval<-2*pnorm(abs(test),lower.tail=F)
result<-NULL
result[pval > alpha]<-"insignificant change"
result[pval < alpha & test > 0]<- "significant increase"
result[pval < alpha & test < 0]<-"significant decrease"
data.frame(name=names,geoid=geoid, est1=est1, est2=est2, se1=se1, se2=se2,difference=diff, test=test, result=result, pval=pval)
}
Here I use the function I just made to do the comparisons
diff1015<-acstest(names = mdat$NAME.x, geoid = mdat$GEOID, est1 = mdat$pov10, est2 = mdat$pov15, err1 = mdat$poverr10, err2=mdat$poverr15, alpha = .1, yr1 = 2011, yr2=2015, span = 5)
head(diff1015)
## name geoid est1 est2
## 1 Census Tract 1.06, Webb County, Texas 48479000106 0.3328264 0.5017866
## 2 Census Tract 1.07, Webb County, Texas 48479000107 0.6368375 0.4048738
## 3 Census Tract 3, Webb County, Texas 48479000300 0.3618396 0.7001698
## 4 Census Tract 9.03, Webb County, Texas 48479000903 0.5636975 0.5237918
## 5 Census Tract 10.03, Webb County, Texas 48479001003 0.3406822 0.3663872
## 6 Census Tract 11.03, Webb County, Texas 48479001103 0.3965812 0.3020908
## se1 se2 difference test result
## 1 0.09755536 0.09697174 -0.16896026 -1.5043994 insignificant change
## 2 0.12421875 0.06785768 0.23196378 2.0071116 significant increase
## 3 0.09204934 0.10322007 -0.33833017 -2.9961095 significant decrease
## 4 0.09017200 0.11571894 0.03990566 0.3331503 insignificant change
## 5 0.15028275 0.12229858 -0.02570497 -0.1624819 insignificant change
## 6 0.12968880 0.11232150 0.09449035 0.6745262 insignificant change
## pval
## 1 0.132478571
## 2 0.044737779
## 3 0.002734483
## 4 0.739020859
## 5 0.870926402
## 6 0.499976825
options(scipen=999)
acs_merge<-left_join(webb_acs_15, diff1015, by=c("GEOID"="geoid"))
## Warning: Column `GEOID`/`geoid` joining character vector and factor,
## coercing into character vector
brks<-quantile(c(acs_10$pov10, acs_15$pov15), probs = seq(0,1,.2), na.rm=T)
acs_10map<-acs_10%>%mutate(pov_q = cut(pov10,breaks = brks, include.lowest = T))
library(ggsn)
## Loading required package: grid
p1<-ggplot(acs_10map, aes( fill=pov_q, color=pov_q))+
geom_sf() +
ggtitle("Poverty Rate 2010 ",
subtitle = "Bexar County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+north(acs_10map)+
scalebar(acs_10map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
acs_15map<-acs_15%>%
mutate(pov_q = cut(pov15,breaks = brks, include.lowest = T))
p2<-ggplot(acs_15map, aes( fill=pov_q, color=pov_q))+
geom_sf() +
ggtitle("Poverty Rate 2015 ",
subtitle = "Webb County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(acs_15map)+
scalebar(acs_15map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
cv_10map<-acs_10%>%
mutate(pov_q = cut(poverr10,breaks = brks, include.lowest = T))
p3<-ggplot(cv_10map, aes( fill=pov_q, color=pov_q))+
geom_sf() +
ggtitle("Coefficient of variation for Poverty Rate 2010",
subtitle = "Webb County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(cv_10map)+
scalebar(cv_10map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
cv_15map<-acs_15%>%mutate(pov_q = cut(poverr15,breaks = brks, include.lowest = T))
p4<-ggplot(cv_15map, aes( fill=pov_q, color=pov_q))+geom_sf() +
ggtitle("Coefficient of variation for Poverty Rate 2015",
subtitle = "Webb County Texas")+
scale_fill_brewer(palette = "Blues") +
scale_color_brewer(palette = "Blues")+
#scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(cv_15map)+
scalebar(cv_15map, dist = 5, dd2km =T, model="GRS80", st.size = 2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
out<-grid.arrange(p1, p2, p3, p4, nrow = 2)
out
## TableGrob (2 x 2) "arrange": 4 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
Based on the coeffient of variation seen from the map, majority of the webb county poverty estimates are less reliable (cv=(0.172,0.323)). Only small part of the webb county located south west has shown more reliability (cv=(0.0295,0.172))
myfil<-c("grey", "red", "blue")
p5<-ggplot(acs_merge, aes( fill=result))+
geom_sf() +
ggtitle("Comparison of Poverty Rate 2011 to 2015",
subtitle = "Bexar County Texas")+
#scale_fill_brewer(palette = "Accent") +
#scale_color_brewer(palette = "Accent")+
scale_fill_manual(values=myfil)+
theme(axis.text.x = element_blank(), axis.text.y = element_blank())+
north(acs_merge)+
scalebar(acs_merge, dist = 5, dd2km =T, model="GRS80", st.size = 2)
p5