library(haven)
library(foreign) 
library(readr)
library(dplyr)
library(ggplot2)
library(broom)
library(car)
library(MASS) 
library(lmtest)
library(zoo)
library(nortest)
library(plotrix)
library(scales)
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
library(VGAM)
library(stargazer)
library(sandwich)
library(pastecs)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
library(reshape2)
library(data.table)
library(magrittr)
library(tidyverse)
library(sjmisc)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(weights)
library(GGally)
library(tigris)
library(RColorBrewer)
library(patchwork)
library(tidycensus)
library(censusapi)
library(spdep)

A

  1. Download ACS data for the year 2015 for Los Angeles County, California census tracts in R using the tidycensus library. In this extract, request both the proportion of the population that is Non-Hispanic Black and the proportion of the population that is Hispanic.
la<- na.omit(get_acs(geography = "tract",
                    year = 2015,
                    state = "CA", 
                    county = "Los Angeles", 
                    variables = c("DP05_0078PE","DP05_0070PE"),
                    output ="wide", 
                    geometry = TRUE)%>%
  rename(p.nhblack =DP05_0078PE, p.hisp=DP05_0070PE))
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile

B

  1. Construct a Queen – based contiguity and a k=4 nearest neighbor list.
queen.cont<-poly2nb(la, queen = T)
summary(queen.cont)
## Neighbour list object:
## Number of regions: 2326 
## Number of nonzero links: 14794 
## Percentage nonzero weights: 0.2734426 
## Average number of links: 6.360275 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  19  20  25 
##   4   9  49 200 464 609 494 273 134  51  23   5   2   2   2   2   1   1   1 
## 4 least connected regions:
## 145 266 1341 1342 with 1 link
## 1 most connected region:
## 1025 with 25 links
lalw<-nb2listw(queen.cont, style="W")
knn<-knearneigh(x = coordinates(as(la, "Spatial")), k = 4)
knn4<-knn2nb(knn = knn)
k4lw<-nb2listw(knn4)

C

  1. Produce a map for each of the two outcomes in R.
plot(as(la, "Spatial"),
     main="Queen Neighbors")
plot(lalw,
     coords=coordinates(as(la, "Spatial")),
     add=T,
     col=2)

plot(as(la, "Spatial"),
     main="k=4 Neighbors")
plot(knn4,
     coords=coordinates(as(la, "Spatial")),
     add=T,
     col=3)

D

  1. Test the global null hypothesis of independence of theses two outcomes, for each neighbor rule (Queen and k=4).
    1. Be sure to report the global Moran I statistic for each outcome, and the associated test statistic.
    2. Describe if the two neighbor types give the same results.
#1
moran.test(la$p.nhblack, 
           listw=lalw)
## 
##  Moran I test under randomisation
## 
## data:  la$p.nhblack  
## weights: lalw    
## 
## Moran I statistic standard deviate = 33.351, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.3947038302     -0.0004301075      0.0001403648
moran.test(la$p.nhblack, 
           listw=k4lw)
## 
##  Moran I test under randomisation
## 
## data:  la$p.nhblack  
## weights: k4lw    
## 
## Moran I statistic standard deviate = 30.166, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.4155597307     -0.0004301075      0.0001901684
moran.mc(la$p.nhblack,
         listw=lalw,
         nsim=999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  la$p.nhblack 
## weights: lalw  
## number of simulations + 1: 1000 
## 
## statistic = 0.3947, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
moran.mc(la$p.nhblack,
         listw=k4lw,
         nsim=999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  la$p.nhblack 
## weights: k4lw  
## number of simulations + 1: 1000 
## 
## statistic = 0.41556, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
moran.plot(la$p.nhblack,
           listw=lalw)

moran.plot(la$p.nhblack,
           listw=k4lw)

  1. The Moran’s I for the proportion of non-Hispanic blacks using Queen contiguity is 0.3947038302 (p<.05) and 0.4155597307 (p<.05) using k-4 nearest neighbor. Both types of neighbor rule generate positive Moran’s I statistics indicating weak positive autocorrelation between the location and the indicator. However, the k-nearest neighbor clustering yielded a stronger positive autocorrelation.
#2
moran.test(la$p.hisp, 
           listw=lalw)
## 
##  Moran I test under randomisation
## 
## data:  la$p.hisp  
## weights: lalw    
## 
## Moran I statistic standard deviate = 60.722, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7196681961     -0.0004301075      0.0001406360
moran.test(la$p.hisp, 
           listw=k4lw)
## 
##  Moran I test under randomisation
## 
## data:  la$p.hisp  
## weights: k4lw    
## 
## Moran I statistic standard deviate = 53.605, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7395077523     -0.0004301075      0.0001905358
moran.mc(la$p.hisp,
         listw=lalw,
         nsim=999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  la$p.hisp 
## weights: lalw  
## number of simulations + 1: 1000 
## 
## statistic = 0.71967, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
moran.mc(la$p.hisp,
         listw=k4lw,
         nsim=999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  la$p.hisp 
## weights: k4lw  
## number of simulations + 1: 1000 
## 
## statistic = 0.73951, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
moran.plot(la$p.hisp,
           listw=lalw)

moran.plot(la$p.hisp,
           listw=k4lw)

  1. The Moran’s I for the proportion of Hispanics using Queen contiguity is 0.7196681961 (p<.05) and 0.7395077523 (p<.05) using k-4 nearest neighbor. Both types of neighbor rule generate positive Moran’s I statistics indicating strong positive autocorrelation between the location and the indicator. However, the k-nearest neighbor clustering yielded a stronger positive autocorrelation.

E

  1. Perform a local Moran I analysis
  1. Produce a local Moran I cluster map for each outcome, using the k=4 neighbor list only
# Percent Non-Hispanic Black

locali<-localmoran(la$p.nhblack, k4lw, p.adjust.method="fdr")
la$locali<-locali[,1]
la$localp<-locali[,5]

la$cl<-as.factor(ifelse(la$localp<=.05,"Clustered","NotClustered"))

# Locali

la%>%
  ggplot()+
  geom_sf(aes(fill = locali))+
  scale_fill_viridis_c()+
  ggtitle(label = 
"Non-Hispanic Black (%) LA County, CA
Cluster Map (Local Moran's I Value)" )

# Localp
la%>%
  ggplot()+
  geom_sf(aes(fill = localp))+
  scale_fill_viridis_c()+
  ggtitle(label = 
"Non-Hispanic Black (%) LA County, CA
Cluster Map (Local Moran's I - (P below 0.05)" )

# Percent Hispanic

# Locali

locali<-localmoran(la$p.hisp, k4lw, p.adjust.method="fdr")
la$locali<-locali[,1]
la$localp<-locali[,5]

la$cl<-as.factor(ifelse(la$localp<=.05,"Clustered","NotClustered"))

la%>%
  ggplot()+
  geom_sf(aes(fill = locali))+
  scale_fill_viridis_c()+
  ggtitle(label = 
"Hispanic (%) LA County, CA 
Cluster Map (Local Moran's I Value)")

# Localp

locali<-localmoran(la$p.hisp, k4lw, p.adjust.method="fdr")
la$locali<-locali[,1]
la$localp<-locali[,5]

la$cl<-as.factor(ifelse(la$localp<=.05,"Clustered","NotClustered"))

la%>%
  ggplot()+
  geom_sf(aes(fill = localp))+
  scale_fill_viridis_c()+
  ggtitle(label = 
"Hispanic (%) LA County, CA 
Cluster Map (Local Moran's I - (P below 0.05)")

  1. Interpret your findings from the local Moran I analysis.

The analysis shows that many of the clusters of non-Hispanic blacks and Hispanics within LA county are either near or adjacent to each other. The clustering is more visable using the p-value scale for local Moran I. The map for non-Hispanic blacks shows significant localized clusters of non-Hispanic blacks located north of 34.25 parallel in the western and eastern region of the county. The map for Hispanics shows significant localized clusters of Hispanics mostly located north of 34.25 parallel in the central, eastern, and slightly western region of the county. Additionally, there are significant local clusters in the southwestern region of the county for both non-Hispanic blacks and Hispanics.