Calculate segregation indices for a geography of your choosing, calulate two indices of segregation.
What are the two groups you used for your index?
Create a map and a descriptive summary of your indices
For this homework, I’m calculating isolation and dissimilarity indices for Hispanic Households and Black Households by school districts in Texas (small geography) by Metro area (large geography). The reference group for the dissimilarity indices are White HHs.
#Use this to search for variables
#acs2019 <- load_variables(2019 , "acs5", cache = TRUE)
#ec2019 <- load_variables(2019 , "census of governments", cache = TRUE)
#demographic profile tables
#Use this to search for variables
#acs2019 <- load_variables(2019 , "acs5", cache = TRUE) #demographic profile tables
tx_sd<-get_acs(geography = "school district (unified)",
state="TX",
year = 2019,
variables=c("DP05_0001E",
"B11001_001E",
"B11001H_001E",
"B11001B_001E",
"B11001I_001E"),
geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Fetching data by table type ("B/C", "S", "DP") and combining the result.
#rename variables and filter missing cases
tx_sd<-tx_sd%>%
mutate(totpop=DP05_0001E,
totHH = B11001_001E,
whiteHH = B11001H_001E,
pwhiteHH = whiteHH/(totHH+0.000001),
blackHH =B11001B_001E,
pblackHH = blackHH/(totHH+0.000001),
hispHH =B11001I_001E,
phispHH =hispHH/(totHH+0.000001))
tx_sd<-na.omit(tx_sd)
#pull texas shapefile for context
state<-get_acs(geography = "state", variables=c("DP05_0001E"), state="TX", year=2019, geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
# Grab Metro Areas
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:tidycensus':
##
## fips_codes
MSAs<-core_based_statistical_areas(cb=T,year=2019)
SANB<-MSAs[MSAs$GEOID=="41700",]
DFW<-MSAs[MSAs$GEOID=="19100",]
HOU<-MSAs[MSAs$GEOID=="26420",]
AUS<-MSAs[MSAs$GEOID=="12420",]
#Merge MSAs into new shapefile (for some reason couldn't query them all together)
TX_MSAs <- rbind(SANB, DFW, HOU, AUS)
#Spatial Join to show School Districts by MSA
# not completely happy with how the districts were assigned to the MSAs (i tried all the options from intersects to covered_by etc) but that can be dealt with later.
tx_sd <- st_join(tx_sd, left = TRUE, join=st_intersects, largest=TRUE, TX_MSAs["NAME"])
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#Remove NAs (only looking at metro area school districts)
tx_sd<-na.omit(tx_sd)
tx_sd<-tx_sd%>%
mutate(MSA=NAME.y)
tx_sd<- tx_sd%>%
dplyr::select(GEOID, MSA, totpop, totHH, whiteHH, pwhiteHH, blackHH, pblackHH, hispHH, phispHH)%>%
arrange(MSA, GEOID)
# Calculate totals by Metro Area and merge back
MSA_Sum<-tx_sd%>%
group_by(MSA)%>%
summarise(MSA_totHH=sum(totHH),
MSA_whiteHH=sum(whiteHH),
MSA_blackHH=sum(blackHH),
MSA_hispHH=sum(blackHH))
st_geometry(MSA_Sum)<-NULL
tx_sd_merge<-left_join(tx_sd,
MSA_Sum,
by="MSA")
#Examining MSA data
library(tmap)
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
tm_shape(state)+
tm_polygons(alpha=1)+
tm_shape(tx_sd_merge)+
tm_polygons("MSA", title="School Districts by MSA", border.alpha = 1)+
tm_shape(TX_MSAs)+
tm_polygons(alpha=0, border.col = "black")+
tm_format("World", title="Texas School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT", breaks=c(0,2.5,5))+
tm_compass()
tm_shape(state)+
tm_polygons(alpha=1)+
tm_shape(tx_sd_merge)+
tm_polygons("pwhiteHH", title="Percent of White HHs", border.alpha = 0)+
tm_shape(TX_MSAs)+
tm_polygons(alpha=0, border.col = "black")+
tm_format("World", title="Texas School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT", breaks=c(0,2.5,5))+
tm_compass()
tm_shape(state)+
tm_polygons(alpha=1)+
tm_shape(tx_sd_merge)+
tm_polygons("pblackHH", title="Percent of Black HHs", border.alpha = 0)+
tm_shape(TX_MSAs)+
tm_polygons(alpha=0, border.col = "black")+
tm_format("World", title="Texas School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT", breaks=c(0,2.5,5))+
tm_compass()
tm_shape(state)+
tm_polygons(alpha=1)+
tm_shape(tx_sd_merge)+
tm_polygons("phispHH", title="Percent of Hispanic HHs", border.alpha = 0)+
tm_shape(TX_MSAs)+
tm_polygons(alpha=0, border.col = "black")+
tm_format("World", title="Texas School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT", breaks=c(0,2.5,5))+
tm_compass()
#Isolation
MSA.isob<-tx_sd_merge%>%
mutate(isob=(blackHH/MSA_blackHH * blackHH/totHH))%>%
group_by(MSA)%>%
summarise(iso_b= sum(isob, na.rm=T))
MSA.isoh<-tx_sd_merge%>%
mutate(isoh=(hispHH/MSA_hispHH * hispHH/totHH))%>%
group_by(MSA)%>%
summarise(iso_h= sum(isoh, na.rm=T))
#Dissimilarity
#White Black
dis_wb<-tx_sd_merge%>%
mutate(d.wb=abs(whiteHH/MSA_whiteHH - blackHH/MSA_blackHH))%>%
group_by(MSA)%>%
summarise(diss_wb= .5*sum(d.wb, na.rm=T))
#White Hispanic
dis_wh<-tx_sd_merge%>%
mutate(d.wh=abs(whiteHH/MSA_whiteHH - hispHH/MSA_hispHH))%>%
group_by(MSA)%>%
summarise(diss_wh= .5*sum(d.wh, na.rm=T))
#Join Together and Merge
library(tidyverse)
st_geometry(dis_wb)<-NULL
st_geometry(dis_wh)<-NULL
st_geometry(MSA.isob)<-NULL
st_geometry(MSA.isoh)<-NULL
tx_seg<-list(dis_wb, dis_wh, MSA.isob, MSA.isoh)%>% reduce (left_join, by="MSA")
library(tigris)
options(tigris_class = "sf")
tx_counties<- counties(state="TX", cb=T, year=2010)
tx_counties$cofips<-substr(tx_counties$GEO_ID, 10,15)
tx_seg_dat<- geo_join(TX_MSAs, tx_seg, by_sp="NAME", by_df="MSA")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
library(ggplot2)
tx_seg_dat%>%
ggplot()+geom_sf(aes(fill=iso_b))+
scale_fill_viridis_c()+
scale_color_viridis_c()+
ggtitle("Black isolation index by Major Texas MSA ", subtitle = "School District as Small Geography - 2019 ACS")
tx_seg_dat%>%
ggplot()+geom_sf(aes(fill=iso_h))+
scale_fill_viridis_c()+
scale_color_viridis_c()+
ggtitle("Hispanic isolation index by Major Texas MSA", subtitle = "School District as Small Geography - 2019 ACS")
tx_seg_dat%>%
ggplot()+geom_sf(aes(fill=diss_wb))+
scale_fill_viridis_c()+
scale_color_viridis_c()+
ggtitle("White-Black Dissimilarity index by Major Texas MSA", subtitle = "School District as Small Geography - 2019 ACS")
tx_seg_dat%>%
ggplot()+geom_sf(aes(fill=diss_wh))+
scale_fill_viridis_c()+
scale_color_viridis_c()+
ggtitle("White-Hispanic Dissimilarity index by Major Texas MSA", subtitle = "School District as Small Geography - 2019 ACS")