library(tidycensus)
library(dplyr)
library(sf)
library(spdep)
library(ggplot2)
df <-get_acs(geography = "tract",
state="TX",
county = "Travis",
year = 2015,
variables=c("DP05_0073P", "DP05_0073", #Not Hispanic Black
"DP05_0066P", "DP05_0066"), #Hispanic population
geometry = T,
output = "wide")
plot(st_geometry(df))
### Queen neighborhood
df.ne <- df %>%
filter(!st_is_empty(df), !is.na(DP05_0073PE), !is.na(DP05_0066PE))
nl.queen <- df.ne %>%
poly2nb(row.names=df$GEOID, queen = TRUE)
plot(as(df.ne, "Spatial"), main = "Queen Neighborhoods: Travis County")
plot.nb(nl.queen, coordinates(as(df.ne, "Spatial")), add = T, col = 2)
nl.k4 <- knearneigh(coordinates(as(df.ne, "Spatial")), k = 4) %>%
knn2nb()
plot(as(df.ne, "Spatial"),main = "K-4 Neighborhoods: Travis County")
plot.nb(nl.k4, coordinates(as(df.ne, "Spatial")), add = T, col = 2)
nl.q.w <- nl.queen %>%
nb2listw(style="W")
moran.test(df.ne$DP05_0073PE, nl.q.w)
##
## Moran I test under randomisation
##
## data: df.ne$DP05_0073PE
## weights: nl.q.w
##
## Moran I statistic standard deviate = 16.38, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.63194007 -0.00462963 0.00151024
moran.plot(df.ne$DP05_0073PE,
listw = nl.q.w)
We can see that Moran’s I is signficant and that there is positive global autocorrelation with Black population in Travis county
nl.k.w <- nl.k4 %>%
nb2listw(style="W")
moran.test(df.ne$DP05_0073PE, nl.k.w )
##
## Moran I test under randomisation
##
## data: df.ne$DP05_0073PE
## weights: nl.k.w
##
## Moran I statistic standard deviate = 13.986, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.612405478 -0.004629630 0.001946398
moran.plot(df.ne$DP05_0073PE,
listw = nl.k.w)
Both queen and k-4 neighborhood results in similar results with positive autocorrelation being reported
moran.test(df.ne$DP05_0066PE, nl.q.w)
##
## Moran I test under randomisation
##
## data: df.ne$DP05_0066PE
## weights: nl.q.w
##
## Moran I statistic standard deviate = 19.488, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.759556432 -0.004629630 0.001537646
moran.plot(df.ne$DP05_0066PE,
listw = nl.q.w)
We can see that Moran’s I is signficant and that there is positive global autocorrelation with Hispanic population in Travis county
moran.test(df.ne$DP05_0066PE, nl.k.w )
##
## Moran I test under randomisation
##
## data: df.ne$DP05_0066PE
## weights: nl.k.w
##
## Moran I statistic standard deviate = 17.429, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.771271981 -0.004629630 0.001981725
moran.plot(df.ne$DP05_0066PE,
listw = nl.k.w)
Both queen and k-4 neighborhood results in similar results with positive autocorrelation being reported
lm.b <- localmoran(df.ne$DP05_0073PE, nl.k.w)
df.ne$locali<-lm.b[,1]
df.ne$localp<-lm.b[,5]
df.ne$sinc<-scale(df.ne$DP05_0073PE)
df.ne$lag_inc<-lag.listw(var=df.ne$sinc, x = nl.k.w)
df.ne$quad_sig <- NA
df.ne$quad_sig[(df.ne$sinc >= 0 & df.ne$lag_inc >= 0) & (df.ne$localp <= 0.1)] <- "H-H" #high high
df.ne$quad_sig[(df.ne$sinc <= 0 & df.ne$lag_inc <= 0) & (df.ne$localp <= 0.1)] <- "L-L" #low low
df.ne$quad_sig[(df.ne$sinc >= 0 & df.ne$lag_inc <= 0) & (df.ne$localp <= 0.1)] <- "H-L" #high low
df.ne$quad_sig[(df.ne$sinc <= 0 & df.ne$lag_inc >= 0) & (df.ne$localp <= 0.1)] <- "L-H" #low high
#WE ASSIGN A # Set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")
# see ?findInterval - This is necessary for making a map
np <- findInterval(df.ne$quad_sig, breaks)
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
df.ne%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -\nBlack residence",
sub=" Travis County, TX")
### Hispanic
lm.b <- localmoran(df.ne$DP05_0066PE, nl.k.w)
df.ne$locali<-lm.b[,1]
df.ne$localp<-lm.b[,5]
df.ne$sinc<-scale(df.ne$DP05_0066PE)
df.ne$lag_inc<-lag.listw(var=df.ne$sinc, x = nl.k.w)
df.ne$quad_sig <- NA
df.ne$quad_sig[(df.ne$sinc >= 0 & df.ne$lag_inc >= 0) & (df.ne$localp <= 0.1)] <- "H-H" #high high
df.ne$quad_sig[(df.ne$sinc <= 0 & df.ne$lag_inc <= 0) & (df.ne$localp <= 0.1)] <- "L-L" #low low
df.ne$quad_sig[(df.ne$sinc >= 0 & df.ne$lag_inc <= 0) & (df.ne$localp <= 0.1)] <- "H-L" #high low
df.ne$quad_sig[(df.ne$sinc <= 0 & df.ne$lag_inc >= 0) & (df.ne$localp <= 0.1)] <- "L-H" #low high
df.ne%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -\nHispanic residence",
sub=" Travis County, TX")
Utilizing the Local Moran’s I we see that the black and hispanic population are in low amounts in the North West of Travis County and in high amounts in the east side of Travis County. Though the hispanic population is found the the south east side more compared to the black who reside in the north east.