GLY 4751C/5757C Lab 04 Introduction to Spatial Statistics

Installed libraries

install.packages("dplyr")
install.packages("ggplot2")
install.packages("knitr")
install.packages("leaflet")
install.packages("maptools")
install.packages("raster")
install.packages("sf")
install.packages("spatstat")
install.packages("spdep")
install.packages("tmap")
install.packages("tmaptools")
library(dplyr)
library(ggplot2)
library(knitr)
library(leaflet)
library(maptools)
library(raster)
library(sf)
library(spatstat)
library(spdep)
library(tmap)
library(tmaptools)

Imported sighting data and necessary shapefiles

turtleData <- read.csv("Sea Turtle Data/Turtle Data.csv", header = TRUE) # Brings in sea turtle sighting CSV file
turtleData$Date <- as.POSIXct(turtleData$Date, format = "%m/%d/%Y") # converts the date column into an R-readable format

turtleData_Spatial <- st_as_sf(turtleData, coords = c("GPS_X", "GPS_Y"), crs = CRS("+init=epsg:4326")) # creates a new Simple Features spatial object with a WGS84 geographic coordinate system assigned. "EPSG" is a code shorthand representing many geographic and projected coordinate systems.
## Warning in CPL_crs_from_input(x): GDAL Message 1: +init=epsg:XXXX syntax is
## deprecated. It might return a CRS with a non-EPSG compliant axis order.
loggerheads <- turtleData_Spatial[turtleData_Spatial$Species == "Loggerhead",] # Isolates all Loggerhead Sea Turtle sightings
greens <- turtleData_Spatial[turtleData_Spatial$Species == "Green",] # Isolates all Green Sea Turtle sightings
kemps <- turtleData_Spatial[turtleData_Spatial$Species == "Kemps",] # Isolates all Kemp's Ridley Sea Turtle sightings
Study_Area <- st_read(dsn = "Sea Turtle Data/Shapefiles/Study_Area.shp", quiet = TRUE) # a general Study Area polygon

FL_Shoreline <- st_read(dsn = "Sea Turtle Data/Shapefiles/FL_Shoreline.shp", quiet = TRUE) # 1:12,000 scale shoreline polygon of Florida

Survey_Area_Simple <- st_read(dsn = "Sea Turtle Data/Shapefiles/Survey_Area_Simple.shp", quiet = TRUE) # simplified Survey Area covering the regions within the Study Area actually patrolled during bimonthly surveys, including small mangrove islands

Survey_Area_Hex <- st_read(dsn = "Sea Turtle Data/Shapefiles/SurveyArea_HexGrid.shp", quiet = TRUE) # division of the Survey Area into a hexagonal grid

Summarized sighting data

turtleData %>%               # start with the full data set
  group_by(Species) %>%      # group the observations by species
  summarize(nT = n())        # count up the number of observations per species
## # A tibble: 3 x 2
##   Species       nT
##   <chr>      <int>
## 1 Green         74
## 2 Kemps          9
## 3 Loggerhead    17

Interactive map showing sighting locations

leafletMap <- leaflet() %>% 
  fitBounds(lat1 = 28.98629, lng1 = -82.92938, lat2 = 28.63606, lng2 = -82.55936) %>%
  addTiles() %>%
  addCircles(data = loggerheads, color = "red", radius = 0.2, fillOpacity = 0.8) %>% 
  addCircles(data = greens, color = "green", radius = 0.2, fillOpacity = 0.8) %>% 
  addCircles(data = kemps, color = "purple", radius = 0.2, fillOpacity = 0.8)

leafletMap

Fixed map showing sighting locations

tm_shape(Study_Area) +
  tm_polygons(alpha = 0) +
tm_shape(FL_Shoreline) +
  tm_polygons(col = "grey", border.alpha = 0) +
tm_shape(loggerheads) +
  tm_dots(col = "red", size = 0.1) +
tm_shape(greens) +
  tm_dots(col = "green", size = 0.1) +
tm_shape(kemps) +
  tm_dots(col = "purple", size = 0.1) +
tm_add_legend(type = "symbol", labels = c("Loggerhead", "Green", "Kemps"), col = c("red", "green", "purple"), size = 0.5, shape = 16, 
              border.col = NA, is.portrait = TRUE, title = "Species Sightings") + 
tm_compass(type = "rose", size = 3, text.size = 0.7, position = c(0.05, 0.10)) + 
tm_scale_bar(position = c(0.05, 0.05), text.size = 0.5) + 
tm_layout(title = "Crystal River", title.size = 1.2, title.position = c("center", 0.95), title.bg.color = "white",
          legend.position = c(0.05, 0.5), fontfamily = "serif")

Hot spot map

surveyAreaSimple_win <- as.owin(as_Spatial(Survey_Area_Simple)) # converts the Survey Area into a "window" (i.e., a boundary to spatial constrain an analysis)


Sightings_UTM <- st_transform(turtleData_Spatial, crs = CRS("+init=epsg:26917")) # Project the sighting data from the WGS84 geographic coordinate system into the NAD83 UTM 17N projected coordinate system. 

Sightings_spdf <- as_Spatial(Sightings_UTM) # Converts the sightings data from a Simple Features object to a SpatialPointsDataFrame
Sightings_ppp <- as.ppp(Sightings_spdf) # converts the sightings SpatialPointDataFrame into a marked Planar Point Pattern
Sightings_ppp <- Sightings_ppp[surveyAreaSimple_win] # Assigns the Survey Area "window" as the new boundary around the sightings data

Sightings_ppp_km <- rescale(Sightings_ppp, s = 1000, unitname = "km") # the UTM projected coordinate system uses a unit of meters. This rescales the units to kilometers.

Kernel Density Estimate (KDE)

sightingKDEImage <- as.im(density(Sightings_ppp)*1000000) # the 'density' function creates the KDE whereas 'as.im' saves the output as an image object. In order for the KDE to show up properly aligned with the Florida Shoreline, we must use the non-rescaled version of our sightings data. However, we can multiple the KDE by 1,000,000 (1,000 meters x 1,000 meters) to adjust the estimates from sightings per square meter to sightings per square kilometer.
sightingKDEGrid <- as.SpatialGridDataFrame.im(sightingKDEImage) # converts the KDE image object into a regular gridded object
sightingKDE <- raster(sightingKDEGrid) # converts the KDE gridded object into a raster object
crs(sightingKDE) <- CRS("+init=epsg:26917") # assigns the NAD83 UTM 17N projected coordinate system to the KDE raster object

tm_shape(Study_Area, is.master = TRUE) +
  tm_polygons(alpha = 0) +
tm_shape(FL_Shoreline) +
  tm_polygons(col = "grey", border.alpha = 0) +
tm_shape(sightingKDE) +
  tm_raster(palette = "-RdYlBu", title = "Sightings - All Species") +
tm_compass(type = "rose", size = 3, text.size = 0.7, position = c(0.05, 0.10)) + 
tm_scale_bar(position = c(0.05, 0.05), text.size = 0.5) + 
tm_layout(title = "Crystal River KDE", title.size = 1.2, title.position = c("center", 0.95), title.bg.color = "white",
          legend.position = c(0.05, 0.5), fontfamily = "serif")

Summarized spatial aspects

sightingsSimple_ppp <- Sightings_ppp[surveyAreaSimple_win] # assigns the basic Survey Area as the analysis "window"
sightingsSimple_ppp_km <- rescale(sightingsSimple_ppp, s = 1000, unitname = "km") # rescales the units from meters to kilometers

sightIntensity <- round(summary(sightingsSimple_ppp_km)$intensity, 2) # extracts the intensity (i.e., average number of sightings per square kilometer)
sightNearNeighbor <- round(mean(nndist(sightingsSimple_ppp)), 2) # calculates the average nearest neighbor distance

print(paste0("The average spatial intensity for all species sightings is ", sightIntensity, " sightings per sq. km.")) # prints a sentence helping to interpret the extracted intensity value
## [1] "The average spatial intensity for all species sightings is 0.5 sightings per sq. km."
print(paste0("The mean nearest neighbor distance for all species sightings is ", sightNearNeighbor, " m.")) # prints a sentence helping to interpret the calculated nearest neighbor distance
## [1] "The mean nearest neighbor distance for all species sightings is 482.73 m."

Are these sightings clustered together? If there is clustering, is it greater than expected by random chance?

sightMADTest <- mad.test(sightingsSimple_ppp, fun = Kest, nsim = 99, verbose = FALSE) # executes the Median Absolute Deviation test with 99 simulations of Complete Spatial Randomness


if(sightMADTest$p.value < 0.05){ # if the MAD test has a p value less than 0.05, this prints a statement indicating statistical significance
  print(paste0("These sightings are significantly more clustered than expected by Complete Spatial Randomness (CSR) - Median absolute deviation p value = ", round(sightMADTest$p.value, 3), "."))
} else { # if the p value is greater than or equal to 0.05, this prints a statement indicating non-significance
  print(paste0("These sightings are not significantly more clustered than expected by Complete Spatial Randomness (CSR) - Median absolute deviation p value = ", round(sightMADTest$p.value, 3), "."))
}
## [1] "These sightings are significantly more clustered than expected by Complete Spatial Randomness (CSR) - Median absolute deviation p value = 0.01."

Panel A shows where the black curve is the observed distribution and the blue dotted curve is that expected from CSR.

In Panel B, we compare our observed Ripley’s K statistic (the black curve) to that expected from CSR (the blue dotted curve). In Panel C, we display the confidence interval around our Ripley’s K statistic estimations of CSR compared to our observed Ripley’s K curve, whereas in Panel D we display the confidence interval around our observed Ripley’s K curve compared to the CSR Ripley’s K curve.

Since all of our observed curves fall above and/or outside the confidence interval of our CSR curves, we can safely say that our turtle sightings are statistically significantly clustered.

nearestNeighborSight <- Gest(sightingsSimple_ppp) # calculate the G-Function

ripleysKSight <- Kest(sightingsSimple_ppp_km, verbose = FALSE) # Compare observed K statistic to CSR K statistic

ripleysKenvelope <- envelope(sightingsSimple_ppp_km, fun = Kest, nsim = 99, verbose = FALSE) # calculate the confidence interval around the CSR K statistic

ripleysKlohboot <- lohboot(sightingsSimple_ppp_km, fun = Kest, verbose = FALSE) # calculate a confidence interval around our observed K statistic through bootstrapping


par(family = "serif", mfrow = c(2,2))
plot(nearestNeighborSight, main = "Nearest Neighbor Distances - All Species", xlab = "Nearest Neighbor Distance (m)", ylim = c(0, 1),
     ylab = "Cumulative Proportion", cex.lab = 1.2, cex.axis = 1.2)
mtext("A.", side = 3, line = 1, outer = FALSE, at = 0.05, cex = 1.2)
plot(ripleysKSight, main = "Ripley's K - All Species", xlab = "Distance from Observation (km)", ylim = c(0, 80), ylab = "Number of Additional Sightings", 
     cex.lab = 1.2, cex.axis = 1.2)
mtext("B.", side = 3, line = 1, outer = FALSE, at = 0.05, cex = 1.2)
plot(ripleysKenvelope, main = "Variation in CSR Simulations", xlab = "Distance from Observation (km)", ylim = c(0, 80), 
     ylab = "Number of Additional Sightings", cex.lab = 1.2, cex.axis = 1.2)
mtext("C.", side = 3, line = 1, outer = FALSE, at = 0.05, cex = 1.2)
plot(ripleysKlohboot, main = "Confidence Interval for K Function", xlab = "Distance from Observation (km)", ylim = c(0, 80), 
     ylab = "Number of Additional Sightings", cex.lab = 1.2, cex.axis = 1.2)
mtext("D.", side = 3, line = 1, outer = FALSE, at = 0.05, cex = 1.2)

Moran’s I

surveyAreaHex_win <- as.owin(as_Spatial(Survey_Area_Hex)) # converts the Survey Area hexagonal grid into a "window" (i.e., a boundary to spatial constrain an analysis)

intersectSight <- st_intersection(x = Survey_Area_Hex, y = Sightings_UTM$geometry) # identifies which hex grid cell each sighting falls in
intersectSightHex <- intersectSight %>% # counts the number of sightings per hex grid cell. If a grid cell has no sightings, it is excluded
  group_by(GRID_ID) %>% 
  count()
intersectSightHex <- as.data.frame(intersectSightHex)[,-3] # converts the hex grid cell counts to a data frame
sightingsHex <- merge(Survey_Area_Hex, intersectSightHex, by = "GRID_ID", all.x = TRUE) # merges the hex grid count data frame with the hex grid itself. Any hex grid cells without a sighting receive an "NA" value
sightingsHex[is.na(sightingsHex)] <- 0 # converts any "NA" values to 0 to indicate no sightings


tm_shape(Study_Area, is.master = TRUE) +
  tm_polygons(alpha = 0) +
tm_shape(FL_Shoreline) +
  tm_polygons(col = "grey", border.alpha = 0) +
tm_shape(sightingsHex) +
  tm_polygons("n", breaks = c(0, 1, 2, 3, 4, 5, 7, 10), palette = "-RdYlBu", border.alpha = 0, title = "Sightings - All Species") +
tm_compass(type = "rose", size = 3, text.size = 0.7, position = c(0.05, 0.10)) + 
tm_scale_bar(position = c(0.05, 0.05), text.size = 0.5) + 
tm_layout(title = "Crystal River", title.size = 1.2, title.position = c("center", 0.95), title.bg.color = "white",
          legend.position = c(0.05, 0.5), fontfamily = "serif")

Our sighting data shows statistically significant spatial autocorrelation, further supporting our clustered conclusion.

sightingsHex_spdf <- as(sightingsHex, 'Spatial') # converts the hex counts from a Simple Features object to a SpatialPointsDataFrame

neighborsTurtles <- poly2nb(sightingsHex_spdf) # identifies neighboring hex grid cells
neighborsTurtles_weightedList <- nb2listw(neighborsTurtles, style = "B") # creates a weighting system to prioritize hex grid cells nearest each other

moran.test(sightingsHex_spdf$n, listw = neighborsTurtles_weightedList)
## 
##  Moran I test under randomisation
## 
## data:  sightingsHex_spdf$n  
## weights: neighborsTurtles_weightedList    
## 
## Moran I statistic standard deviate = 8.2059, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.299537918      -0.004237288       0.001370423