ca_tract <- get_acs(geography = "tract",
year=2019,
output="wide",
table = "B03002",
cache_table = T,
state = "CA",
survey = "acs5",
geometry = F)
## Getting data from the 2015-2019 5-year ACS
## Loading ACS5 variables for 2019 from table B03002 and caching the dataset for faster future access.
trt_data<- ca_tract%>%
mutate(nhwhite=B03002_003E,
nhblack=B03002_004E,
nhother= B03002_005E+B03002_006E+B03002_007E+B03002_008E+B03002_009E+B03002_010E,
hisp=B03002_012E,
total=B03002_001E,
year=2019,
cofips=substr(GEOID, 1,5))%>%
select(GEOID,nhwhite, nhblack , nhother, hisp, total, year, cofips )%>%
arrange(cofips, GEOID)
head(trt_data)
## # A tibble: 6 x 8
## GEOID nhwhite nhblack nhother hisp total year cofips
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 06001400100 2317 107 578 118 3120 2019 06001
## 2 06001400200 1475 52 306 174 2007 2019 06001
## 3 06001400300 3434 459 807 351 5051 2019 06001
## 4 06001400400 2554 266 702 485 4007 2019 06001
## 5 06001400500 1874 882 998 390 4124 2019 06001
## 6 06001400600 860 358 396 131 1745 2019 06001
co_data<-trt_data%>%
group_by(cofips)%>%
summarise(co_total=sum(total),
co_wht=sum(nhwhite),
co_blk=sum(nhblack),
co_oth=sum(nhother),
cohisp=sum(hisp))
merged_data<-left_join(x=trt_data,
y=co_data,
by="cofips")
head(merged_data)
## # A tibble: 6 x 13
## GEOID nhwhite nhblack nhother hisp total year cofips co_total co_wht co_blk
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 06001~ 2317 107 578 118 3120 2019 06001 1656754 520447 171168
## 2 06001~ 1475 52 306 174 2007 2019 06001 1656754 520447 171168
## 3 06001~ 3434 459 807 351 5051 2019 06001 1656754 520447 171168
## 4 06001~ 2554 266 702 485 4007 2019 06001 1656754 520447 171168
## 5 06001~ 1874 882 998 390 4124 2019 06001 1656754 520447 171168
## 6 06001~ 860 358 396 131 1745 2019 06001 1656754 520447 171168
## # ... with 2 more variables: co_oth <dbl>, cohisp <dbl>
\[D = .5* \sum_i \left | \frac{b_i}{B} - \frac{w_i}{W} \right |\] where \(b_i\) is the number of blacks in each tract, B is the number of blacks in the county, \(w_i\) is the number of whites in the tract, and W is the number of whites in the county.
co.dis<-merged_data%>%
mutate(d.wb=abs(nhwhite/co_wht - nhblack/co_blk))%>%
group_by(cofips)%>%
summarise(dissim= .5*sum(d.wb, na.rm=T))
co.dis
## # A tibble: 58 x 2
## cofips dissim
## <chr> <dbl>
## 1 06001 0.535
## 2 06003 0
## 3 06005 0.823
## 4 06007 0.509
## 5 06009 0.377
## 6 06011 0.360
## 7 06013 0.602
## 8 06015 0.386
## 9 06017 0.413
## 10 06019 0.530
## # ... with 48 more rows
\[\text{Interaction} = \sum_i \frac{b_i}{B} * \frac{w_i}{t_i} \]
co.int<-merged_data%>%
mutate(int.bw=(nhblack/co_blk * nhwhite/total))%>%
group_by(cofips)%>%
summarise(inter_bw= sum(int.bw, na.rm=T))
co.int
## # A tibble: 58 x 2
## cofips inter_bw
## <chr> <dbl>
## 1 06001 0.241
## 2 06003 0.530
## 3 06005 0.500
## 4 06007 0.658
## 5 06009 0.812
## 6 06011 0.467
## 7 06013 0.277
## 8 06015 0.576
## 9 06017 0.759
## 10 06019 0.246
## # ... with 48 more rows
Next is is the isolation index for blacks. The formula is:
\[\text{Isolation} = \sum_i \frac{b_i}{B} * \frac{b_i}{t_i} \]
co.isob<-merged_data%>%
mutate(isob=(nhblack/co_blk * nhblack/total))%>%
group_by(cofips)%>%
summarise(iso_b= sum(isob, na.rm=T))
co.isob
## # A tibble: 58 x 2
## cofips iso_b
## <chr> <dbl>
## 1 06001 0.228
## 2 06003 0.00866
## 3 06005 0.136
## 4 06007 0.0364
## 5 06009 0.0123
## 6 06011 0.0281
## 7 06013 0.176
## 8 06015 0.0534
## 9 06017 0.0142
## 10 06019 0.0948
## # ... with 48 more rows
For this assignment the two groups used for the Dissimilarity Index are Non Hispanic Blacks and non Hispanic Whites and i calculate segregation from non Hispanic White residents. For the interaction Index same group was used and i calculate the exposure of non Hispanic Black residents to non Hispanic White residents.
ca_seg<-list(co.dis, co.int)%>% reduce (left_join, by="cofips")
options(tigris_class = "sf")
ca_counties<- counties(state="CA", cb=T, year=2010)
ca_counties$cofips<-substr(ca_counties$GEO_ID, 10,15)
ca_seg_dat<- geo_join(ca_counties, ca_seg, by_sp="cofips", by_df="cofips")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
usa = map_data("county") # get basic map data for all USA counties
ca = subset(usa, region == "california") # subset to counties in Ohio
names(ca)
## [1] "long" "lat" "group" "order" "region" "subregion"
library(stringr)
ca$county = str_to_title(ca$subregion)
getLabelPoint <- # Returns a county-named list of label points
function(county) {Polygon(county[c('long', 'lat')])@labpt}
centroids = by(ca, ca$county, getLabelPoint) # Returns list
centroids2 <- do.call("rbind.data.frame", centroids) # Convert to Data Frame
centroids2$county = rownames(centroids)
names(centroids2) <- c('clong', 'clat', "county")
ca_seg_dat%>%
ggplot()+geom_sf(aes(fill=inter_bw))+
scale_fill_viridis_c()+
scale_color_viridis_c()+ geom_text(data = centroids2, aes(x = clong, y = clat, label = county),
color = "darkblue", size = 2.25) + theme_map() + theme(legend.position="left")+
ggtitle("Non-Hispanic Black interaction index", subtitle = "ACS 2019")
The result of the interaction index shows that the probability of a black resident “interacting” with a white person in their neighborhood is 80% in Sierra and Nevada. Unfortunately, this also means that 80 of every 100 people a black person meets in their neighborhood will be white. Similarly, the probability of a black person interacting with a white person in their neighborhood is about 20% in Los Angeles and Imperial.
ca_seg_dat%>%
ggplot()+geom_sf(aes(fill=dissim))+
scale_fill_viridis_c()+ scale_color_viridis_c()+
geom_text(data = centroids2, aes(x = clong, y = clat, label = county), color = "darkblue", size = 2.25) + theme_map() + ggtitle("Non-Hispanic Black Dissimilarity index", subtitle = "ACS 2019")
The results or map shows that the Dissimilarity Index for Black/White in Toulumne and Amador is 0.8. The interpretation of this value is that 80% of black residents would need to move neighborhoods to achieve a uniform distribution of black residents across neighborhoods. The map also shows the Dissimilarity index for black/white in Sierra and Alpine with a value of 0.0. This means that there is a “perfect integration” and no residential segregation of blacks and whites in these two counties in California.