library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(sp)
loki <- get_acs(geography = "tract",
year = 2015,
state = "CA",
county = 037,
variables = c("DP05_0071PE", "DP05_0078PE"),
output ="wide",
geometry = TRUE)%>%
rename(hispanic =DP05_0071PE,
nh_black =DP05_0078PE) %>%
filter(complete.cases(nh_black, hispanic))
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
loki%>%
mutate(hispanicq=cut(hispanic,
breaks = quantile(loki$hispanic,
p=c(0, .25, .5, .75, 1), na.rm=T),
include.lowest = T))%>%
ggplot()+geom_sf(aes(fill=hispanicq, color=hispanicq))+
scale_fill_brewer(palette = "Greens")+
scale_color_brewer(palette = "Greens")+
ggtitle("Hispanic Population Proportion, 2015")
loki%>%
mutate(nh_blackq=cut(nh_black,
breaks = quantile(loki$nh_black,
p=c(0, .25, .5, .75, 1), na.rm=T),
include.lowest = T))%>%
ggplot()+geom_sf(aes(fill=nh_blackq, color=nh_blackq))+
scale_fill_brewer(palette = "Oranges")+
scale_color_brewer(palette = "Oranges")+
ggtitle("Non-Hispanic Black Population Proportion, 2015")
## Constructing a queen and k=4 nearest neighbor list
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
# Queen
conch<-poly2nb(loki, queen=T)
summary(conch)
## Neighbour list object:
## Number of regions: 2326
## Number of nonzero links: 14794
## Percentage nonzero weights: 0.2734426
## Average number of links: 6.360275
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 19 20 25
## 4 9 49 200 464 609 494 273 134 51 23 5 2 2 2 2 1 1 1
## 4 least connected regions:
## 145 264 1330 1331 with 1 link
## 1 most connected region:
## 1015 with 25 links
# K=4 nn
deer<-nb2listw(conch, style="W")
knn<-knearneigh(x=coordinates(as(loki, "Spatial")), k=4)
knn4<-knn2nb(knn=knn)
plot(as(loki, "Spatial"),
main="Queen Neighbors")
plot(deer,
coords=coordinates(as(loki, "Spatial")),
add=T,
col=2)
plot(as(loki, "Spatial"),
main="k=4 Neighbors")
plot(knn4,
coords=coordinates(as(loki, "Spatial")),
add=T,
col=2)
moran.test(loki$hispanic,
listw=deer)
##
## Moran I test under randomisation
##
## data: loki$hispanic
## weights: deer
##
## Moran I statistic standard deviate = 68.81, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.8165160858 -0.0004301075 0.0001409572
moran.test(loki$nh_black,
listw=deer)
##
## Moran I test under randomisation
##
## data: loki$nh_black
## weights: deer
##
## Moran I statistic standard deviate = 33.351, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.3947038302 -0.0004301075 0.0001403648
# The global Moran I statistics, using the queen rule, for hispanic and non-hispanic black are 0.82 and 0.39 respectively. This suggests that tracts with high proportions of hispanics tend to cluster around each other. While tracts with greater proportions of non-hispanic blacks tend to be next to those with lower proportions, or vice versa.
knn4lw<-nb2listw(knn4)
moran.test(loki$hispanic,
listw=knn4lw)
##
## Moran I test under randomisation
##
## data: loki$hispanic
## weights: knn4lw
##
## Moran I statistic standard deviate = 60.208, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.8315918392 -0.0004301075 0.0001909709
knn4lw<-nb2listw(knn4)
moran.test(loki$nh_black,
listw=knn4lw)
##
## Moran I test under randomisation
##
## data: loki$nh_black
## weights: knn4lw
##
## Moran I statistic standard deviate = 30.166, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.4155597307 -0.0004301075 0.0001901684
# Performing the same test using the k=4 nearest neighbor rule instead did little to change the Moran I value for each variable. Both rules resulted in the same pattern of autocorrelation.
# For Proportion of Hispanic Pop
bird<-nb2listw(neighbours = knn4, style = "W")
locali<-localmoran(loki$hispanic, listw = bird, alternative = "two.sided" )
loki$locali<-locali[,1]
loki$localp<-locali[,5]
loki$shisp<-scale(loki$hispanic)
loki$lag_hisp<-lag.listw(var=loki$shisp, x = bird)
loki$quad_sig <- NA
loki$quad_sig[(loki$shisp >= 0 & loki$lag_hisp >= 0) & (loki$localp <= 0.05)] <- "H-H" #high high
loki$quad_sig[(loki$shisp <= 0 & loki$lag_hisp <= 0) & (loki$localp <= 0.05)] <- "L-L" #low low
loki$quad_sig[(loki$shisp >= 0 & loki$lag_hisp <= 0) & (loki$localp <= 0.05)] <- "H-L" #high low
loki$quad_sig[(loki$shisp <= 0 & loki$lag_hisp >= 0) & (loki$localp <= 0.05)] <- "L-H" #low high
#WE ASSIGN A # Set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")
# see ?findInterval - This is necessary for making a map
np <- findInterval(loki$quad_sig, breaks)
## Warning in findInterval(loki$quad_sig, breaks): NAs introduced by coercion
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
loki%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -Proportion Hispanic",
sub=" Los Angeles County, LA")
# The map of the clustered local Moran I analysis for "hispanic" aligns with the results suggested by the global Moran I. It shows that high hispanic proportion tracts tend to be next to each other, likewise, low hispanic proportion tracts are often seen close to one another.
bird<-nb2listw(neighbours = knn4, style = "W")
locali<-localmoran(loki$nh_black, listw = bird, alternative = "two.sided" )
loki$locali<-locali[,1]
loki$localp<-locali[,5]
loki$snhb<-scale(loki$nh_black)
loki$lag_nhb<-lag.listw(var=loki$snhb, x = bird)
loki$quad_sig <- NA
loki$quad_sig[(loki$snhb >= 0 & loki$lag_nhb >= 0) & (loki$localp <= 0.05)] <- "H-H" #high high
loki$quad_sig[(loki$snhb <= 0 & loki$lag_nhb <= 0) & (loki$localp <= 0.05)] <- "L-L" #low low
loki$quad_sig[(loki$snhb >= 0 & loki$lag_nhb <= 0) & (loki$localp <= 0.05)] <- "H-L" #high low
loki$quad_sig[(loki$snhb <= 0 & loki$lag_nhb >= 0) & (loki$localp <= 0.05)] <- "L-H" #low high
#WE ASSIGN A # Set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")
# see ?findInterval - This is necessary for making a map
np <- findInterval(loki$quad_sig, breaks)
## Warning in findInterval(loki$quad_sig, breaks): NAs introduced by coercion
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
loki%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -Proportion Non-Hispanic Black",
sub=" Los Angeles County, LA")
# The Local Moran I analysis for "nh_black" shows that some tracts of high proportions of non-hispanic blacks are next to one another (H-H). But the analysis points out tracts where low proporiton non-hispanic blacks are next to high proportion tracts (L-H). While these results may not be as straightforward as those for hispanics, Moran's I tests for autocorrelation were useful in identifying both patterns and leads to further questions about the nature of this pattern and what it's potential policy implications.