tidycensus::census_api_key(key = "8c44183ca40ce5fa3fcaf95cc87927d974513b9f", install=T)
## Error: A CENSUS_API_KEY already exists. You can overwrite it with the argument overwrite=TRUE
library(tidycensus)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(sf)
library(ggplot2)
library(dplyr)
library(rgdal)
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/chris/OneDrive/Documents/R/win-library/4.1/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/chris/OneDrive/Documents/R/win-library/4.1/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/chris/OneDrive/Documents/R/win-library/4.1/rgdal/proj
library(maptools)
## Checking rgeos availability: TRUE
library(RColorBrewer)
library(sp)
ca_acs <- get_acs(geography = "tract",
year = 2015,
state = "cA",
county = 037,
variables = c("DP05_0001E","DP05_0073PE",
"DP05_0066PE"),
output ="wide",
geometry = TRUE)
## Getting data from the 2011-2015 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Using the ACS Data Profile
dat<-ca_acs%>%
mutate(totpop=DP05_0001E, nhblack=DP05_0073PE,
hispanic=DP05_0066PE)%>%
dplyr::select(GEOID,NAME,totpop, nhblack,hispanic)%>%
na.omit()
head(dat)
dat%>%
mutate(hispanicq=cut(hispanic,
breaks = quantile(dat$hispanic,
p=c(0, .25, .5, .75, 1), na.rm=T),
include.lowest = T))%>%
ggplot()+geom_sf(aes(fill=hispanicq, color=hispanicq))+
scale_fill_brewer(palette = "Reds")+
scale_color_brewer(palette = "Reds")+
ggtitle("Proportion of Hispanic Population in Los Angeles, 2015")
## Proportion of the Population that is Non-Hispanic Black
dat%>%
mutate(nhblackq=cut(nhblack,
breaks = quantile(dat$nhblack,
p=c(0, .25, .5, .75, 1), na.rm=T),
include.lowest = T))%>%
ggplot()+geom_sf(aes(fill=nhblackq, color=nhblackq))+
scale_fill_brewer(palette = "Blues")+
scale_color_brewer(palette = "Blueses")+
ggtitle("Proportion Non-Hispanic Black Population in Los Angeles, 2015")
## Warning in pal_name(palette, type): Unknown palette Blueses
# B Construct a Queen-based continguity and a K=4 nearest neighbor list
wadt<-poly2nb(dat, queen=T) # Queen-based contiguity neighbor list
summary(wadt)
## Neighbour list object:
## Number of regions: 2326
## Number of nonzero links: 14794
## Percentage nonzero weights: 0.2734426
## Average number of links: 6.360275
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 19 20 25
## 4 9 49 200 464 609 494 273 134 51 23 5 2 2 2 2 1 1 1
## 4 least connected regions:
## 145 266 1341 1342 with 1 link
## 1 most connected region:
## 1025 with 25 links
OA<-nb2listw(wadt, style="W")
knn<-knearneigh(x=coordinates(as(dat, "Spatial")), k=4) # K= 4 nearest neighbor
knn4<-knn2nb(knn=knn)
plot(as(dat, "Spatial"),
main="Queen-based Contiguity")
plot(OA,
coords=coordinates(as(dat, "Spatial")),
add=T,
col=2)
plot(as(dat, "Spatial"),
main="k=4 Neighbors")
plot(knn4,
coords=coordinates(as(dat, "Spatial")),
add=T,
col=2)
moran.test(dat$hispanic,
listw=OA) # Queen's case
##
## Moran I test under randomisation
##
## data: dat$hispanic
## weights: OA
##
## Moran I statistic standard deviate = 68.81, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.8165201722 -0.0004301075 0.0001409572
moran.test(dat$nhblack,
listw=OA) # Queen's case
##
## Moran I test under randomisation
##
## data: dat$nhblack
## weights: OA
##
## Moran I statistic standard deviate = 66.482, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.7867342028 -0.0004301075 0.0001401897
The Moran I statistics for both Queen-based contiguity for Hispanics and non-Hispanic black show that there’s not much of a visible difference between the two outcomes. The moran I statistical value of 0.8165 for hispanics and 0.7867 for non-hispanic black indicates positive autocorrelation meaning there is spatial correlation between the two races. This also means hispanics in tract cluster to each other geographically compared to non-hispanic blacks
knn4lw<-nb2listw(knn4)
moran.test(dat$hispanic, listw=knn4lw)
##
## Moran I test under randomisation
##
## data: dat$hispanic
## weights: knn4lw
##
## Moran I statistic standard deviate = 60.208, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.8315955900 -0.0004301075 0.0001909709
knn4lw<-nb2listw(knn4)
moran.test(dat$nhblack, listw=knn4lw)
##
## Moran I test under randomisation
##
## data: dat$nhblack
## weights: knn4lw
##
## Moran I statistic standard deviate = 56.32, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.7757472131 -0.0004301075 0.0001899312
Similarly, the Moran I statistics for K=4 neighbors for Hispanics and non-Hispanic black show that there’s not much of a visible difference between the two outcomes. The moran I statistical value of 0.8316 for hispanics and 0.7757 for non-hispanic black indicates positive autocorrelation meaning there is spatial correlation between the two races. ## b. Describe if the two neighbor types give the same results
The two neighbor types both have positive autocorrelation with values not much of a visible difference and same pattern of autocorrelation.
# Local Moran I for Proportion of Hispanic Population
sot<-nb2listw(neighbours = knn4, style = "W")
locali<-localmoran(dat$hispanic, listw = sot, alternative = "two.sided" )
dat$locali<-locali[,1]
dat$localp<-locali[,5]
dat$shispnc<-scale(dat$hispanic)
dat$lag_hispnc<-lag.listw(var=dat$shispnc, x = sot)
dat$quad_sig <- NA
dat$quad_sig[(dat$shispnc >= 0 & dat$lag_hispnc >= 0) & (dat$localp <= 0.05)] <- "H-H" #high high
dat$quad_sig[(dat$shispnc <= 0 & dat$lag_hispnc <= 0) & (dat$localp <= 0.05)] <- "L-L" #low low
dat$quad_sig[(dat$shispnc >= 0 & dat$lag_hispnc <= 0) & (dat$localp <= 0.05)] <- "H-L" #high low
dat$quad_sig[(dat$shispnc <= 0 & dat$lag_hispnc >= 0) & (dat$localp <= 0.05)] <- "L-H" #low high
#WE ASSIGN A # Set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")
# see ?findInterval - This is necessary for making a map
np <- findInterval(dat$quad_sig, breaks)
## Warning in findInterval(dat$quad_sig, breaks): NAs introduced by coercion
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
dat%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -Proportion of Hispanic Population",
sub=" Los Angeles County, CA")
# Local Moran I for Proportion of Non-Hispanic Black Population
sot<-nb2listw(neighbours = knn4, style = "W")
locali<-localmoran(dat$nhblack, listw = sot, alternative = "two.sided" )
dat$locali<-locali[,1]
dat$localp<-locali[,5]
dat$snhblk<-scale(dat$nhblack)
dat$lag_nhblk<-lag.listw(var=dat$snhblk, x = sot)
dat$quad_sig <- NA
dat$quad_sig[(dat$snhblk >= 0 & dat$lag_nhblk >= 0) & (dat$localp <= 0.05)] <- "H-H" #high high
dat$quad_sig[(dat$snhblk <= 0 & dat$lag_nhblk <= 0) & (dat$localp <= 0.05)] <- "L-L" #low low
dat$quad_sig[(dat$snhblk >= 0 & dat$lag_nhblk <= 0) & (dat$localp <= 0.05)] <- "H-L" #high low
dat$quad_sig[(dat$snhblk <= 0 & dat$lag_nhblk >= 0) & (dat$localp <= 0.05)] <- "L-H" #low high
#WE ASSIGN A # Set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")
# see ?findInterval - This is necessary for making a map
np <- findInterval(dat$quad_sig, breaks)
## Warning in findInterval(dat$quad_sig, breaks): NAs introduced by coercion
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
dat%>%
ggplot()+
geom_sf(aes(fill = quad_sig))+
ggtitle("Moran LISA Cluster Map -Proportion of Non-Hispanic Black",
sub=" Los Angeles County, CA")
# F. Interpret your findings from the local Moran I analysis.
The Local Moran I analysis for Hispanics indicates varying autocorrelation of high and high, high and low, and low and low patterns. This means Hispanics of higher social status are more likely to live in same neighborhood with Hispanics of the same class. People of lower economic status are also likely to live in a poor area with others of similar background. This means you have wealthy hispanics living next to other affluent hispanics in affluent neighborhoods and poor neighborhoods next to other poor neighborhoods. This indicates both positive and negetaive spatial autocorrelation.
The Local Moran I for non-Hispanic blacks just like the hispanics indicates high and high, and low and high autocorrelation patterns. Meaning affluent blacks living in neighborhoods close to other affluent neighborhoods, and low-high, which is a negative autocorrelation. This situation or scenarios of low and high does occur due to gentrification where new home construction or new business construction are happening in mostly deprived communities. In a nutshell, both analysis depcits both positive and negetaive autocorrelation patterns due to the features of the tracts.