Homework 1

#Load ACS Data & Extract Variables
library(tidycensus)
library (tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(dplyr)
library(spdep)

## Loading required package: sp

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

## Loading required package: sf

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

library(sf)
library(ggplot2)

LArace <- get_acs(geography = "tract",
                    year = 2015,
                    state = "CA", 
                    county = "Los Angeles", 
                    variables = c("DP05_0033PE", "DP05_0065PE"),
                    output ="wide", 
                    geometry = TRUE)

## Getting data from the 2011-2015 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## Using the ACS Data Profile

#Subset by complete cases
library(dplyr)
LArace<-LArace%>%
  filter(complete.cases(DP05_0033PE))%>%
  filter(complete.cases(DP05_0065PE))

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

library(sf)
LArace <- st_as_sf(LArace)

#Examine variable using thematic map
library(dplyr)
LArace%>%
  ggplot()+
  geom_sf(aes(fill=DP05_0033PE,
              color=DP05_0033PE))+
  ggtitle("Los Angeles County, California Non-Hispanic Black Proportion")

#Examine variable using thematic map
library(dplyr)
LArace%>%
  ggplot()+
  geom_sf(aes(fill=DP05_0065PE,
              color=DP05_0065PE))+
  ggtitle("Los Angeles County, California Hispanic Proportion")

#Construct a Queen based contiguity
Queen<-poly2nb(LArace, queen=T)
summary (Queen)

## Neighbour list object:
## Number of regions: 2326 
## Number of nonzero links: 14794 
## Percentage nonzero weights: 0.2734426 
## Average number of links: 6.360275 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  19  20  25 
##   4   9  49 200 464 609 494 273 134  51  23   5   2   2   2   2   1   1   1 
## 4 least connected regions:
## 145 264 1330 1331 with 1 link
## 1 most connected region:
## 1015 with 25 links

salw<-nb2listw(Queen, style="W")

#Construct k=4 nearest neighbor list
knn<-knearneigh(x = coordinates(as(LArace, "Spatial")), k = 4)
knn4<-knn2nb(knn = knn)
knn4lw<-nb2listw(knn4)

plot(as(LArace, "Spatial"),
     main="Queen Neighbors")
plot(salw,
     coords=coordinates(as(LArace, "Spatial")),
     add=T,
     col=2)

plot(as(LArace, "Spatial"),
     main="k=4 Neighbors")
plot(knn4,
     coords=coordinates(as(LArace, "Spatial")),
     add=T,
     col=2)

#Calculate the global univariate moran’s I
moran.test(LArace$DP05_0033PE, 
           listw=salw)

## 
##  Moran I test under randomisation
## 
## data:  LArace$DP05_0033PE  
## weights: salw    
## 
## Moran I statistic standard deviate = 66.499, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7869649712     -0.0004301075      0.0001402028

#Calculate the global univariate moran’s I
moran.test(LArace$DP05_0065PE, 
           listw=salw)

## 
##  Moran I test under randomisation
## 
## data:  LArace$DP05_0065PE  
## weights: salw    
## 
## Moran I statistic standard deviate = 10.12, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.1196602306     -0.0004301075      0.0001408151

#Calculate the global univariate moran’s I
moran.test(LArace$DP05_0033PE, 
           listw=knn4lw)

## 
##  Moran I test under randomisation
## 
## data:  LArace$DP05_0033PE  
## weights: knn4lw    
## 
## Moran I statistic standard deviate = 56.372, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.7765037114     -0.0004301075      0.0001899490

#Calculate the global univariate moran’s I
moran.test(LArace$DP05_0065PE, 
           listw=knn4lw)

## 
##  Moran I test under randomisation
## 
## data:  LArace$DP05_0065PE  
## weights: knn4lw    
## 
## Moran I statistic standard deviate = 8.5042, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.1170321007     -0.0004301075      0.0001907785

locali<-localmoran(LArace$DP05_0033PE, knn4lw, p.adjust.method="fdr")
LArace$locali<-locali[,1]
LArace$localp<-locali[,5]
LArace$cl<-as.factor(ifelse(LArace$localp<=.05,"Clustered","NotClustered"))

LArace%>%
  ggplot()+
  geom_sf(aes(fill = cl, color = cl))+
  ggtitle(label = "Local Moran's I Value-Los Angeles County, California Non-Hispanic Black Proportion" )

locali2<-localmoran(LArace$DP05_0065PE, knn4lw, p.adjust.method="fdr")
LArace$locali2<-locali2[,1]
LArace$localp2<-locali2[,5]
LArace$cl2<-as.factor(ifelse(LArace$localp2<=.05,"Clustered","NotClustered"))

LArace%>%
  ggplot()+
  geom_sf(aes(fill = cl2, color = cl2))+
  ggtitle(label = "Local Moran's I Value-Los Angeles County, California Hispanic Proportion" )

Global Moran I Statistic for • Non-Hispanic Black proportion of the population using Queen – based contiguity. Moran’s I = 0.79, p < 0.05. This indicates that we reject the null hypothesis of independence and conclude that there is spatial correlation. • Hispanic proportion of the population using Queen – based contiguity. Moran’s I = 0.12, p < 0.05. This indicates that we reject the null hypothesis of independence and conclude that there is spatial correlation. • Non-Hispanic Black proportion of the population using k=4 nearest neighbor. Moran’s I = 0.78, p < 0.05. This indicates that we reject the null hypothesis of independence and conclude that there is spatial correlation. • Hispanic proportion of the population using k=4 nearest neighbor. Moran’s I = 0.12, p < 0.05. This indicates that we reject the null hypothesis of independence and conclude that there is spatial correlation.

Though the two neighbor types give the slightly different test statistics, the same trend of spatial correlation occurring among the non-Hispanic Black proportion of the population living in Los Angeles County is consistent. Similarly, the same trend of spatial correlation occurring among the Hispanic proportion of the population living in Los Angeles County is consistent.

In interpreting the clustered local Moran’s I cluster map for each outcome, using the k=4 neighbor list only, there is localized clustering of the variable in both non-Hispanic Black proportion of the population and Hispanic proportion of the population living in Los Angeles County. In particular, localized clustering is primarily found in South Los Angeles County among non-Hispanic Black while localized clustering is found in the northeast, central, and west of Los Angeles County among Hispanics.

In the Thematic maps, higher proportion of Black populations are found in the south of Los Angeles County, while higher proportion of Hispanic populations are found in the northeast, central, and south of Los Angeles County.

The mapping of queen neighbors shows much more connections between tracts than the mapping of spatial neighbors than the k=4 neighbor list only.