#knitr::purl("wing_spatial.Rmd", output = "wing_spatial.R") # convert Rmd to R script

Barrier Model

Sourcing Scripts and Cleaning the Data

source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"

script_names = c("compare_models.R",
                 "regression_output.R",
                 "remove_torn_wings.R",
                 "clean_morph_data.R", # two functions: read_morph_data and remove_torn_wings
                 "get_Akaike_weights.R",
                 "AICprobabilities.R",
                 "HighstatLibV13.R")

for (script in script_names) { 
  path = paste0(source_path, script)
  source(path) 
}

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:rgeos':
## 
##     intersect, setdiff, union

## The following object is masked from 'package:reshape':
## 
##     stamp

## The following objects are masked from 'package:raster':
## 
##     intersect, union

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RTsrc/ts_auxiliary_funs.R") # time
source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RSsrc/spatial_dependencies.R") # space

data_list <- read_morph_data("data/allmorphology04.30.21-coors.csv")

## morph types: L S  NA LS L/S SL 
##    recoding missing morph types...
##    S if wing2thorax <=2.2, L if wing2thorax >=2.5
## 
## ambiguous wing morph bug count:  32

raw_data = data_list[[1]]
all_bugs = nrow(raw_data)
data_long = data_list[[2]] 

# Remove individuals with torn wings first
raw_data = remove_torn_wings(raw_data) # **don't need to remove torn wings for wing morph analysis

## 
## number of bugs with torn wings: 193

data_long = remove_torn_wings(data_long) # **don't need to remove torn wings for wing morph analysis

## 
## number of bugs with torn wings: 190

clean_bugs = nrow(raw_data)

cat("number of bugs with torn wings:", all_bugs - clean_bugs, "\n\n")

## number of bugs with torn wings: 193

should be 147

Checking coordinates

coors = unique(data_long[,c("population", "lat","long")])
coors[order(-coors$lat),]

##           population      lat      long
## 2148     Gainesville 29.66679 -82.35761
## 1545     Gainesville 29.66668 -82.35574
## 1975     Gainesville 29.66648 -82.35720
## 1125     Gainesville 29.66374 -82.36091
## 2914     Gainesville 29.66202 -82.34731
## 2897     Gainesville 29.66200 -82.34731
## 2157     Gainesville 29.66197 -82.34728
## 2637     Gainesville 29.66182 -82.34721
## 2147        Leesburg 28.81527 -81.88217
## 2772        Leesburg 28.81298 -81.87789
## 1917        Leesburg 28.80530 -81.88184
## 1276        Leesburg 28.80523 -81.88178
## 3646        Leesburg 28.79642 -81.87792
## 3147        Leesburg 28.79634 -81.87788
## 1077        Leesburg 28.79602 -81.87777
## 2107      Lake Wales 27.93575 -81.57412
## 1268      Lake Wales 27.93574 -81.57414
## 1826      Lake Wales 27.93573 -81.57409
## 1858      Lake Wales 27.93573 -81.57409
## 202       Lake Wales 27.90335 -81.58946
## 2747      Lake Wales 27.90335 -81.58946
## 3097      Lake Wales 27.89674 -81.58047
## 3592      Lake Wales 27.89664 -81.58064
## 2291     Lake Placid 27.29866 -81.36612
## 2704     Lake Placid 27.29866 -81.36612
## 3054     Lake Placid 27.29866 -81.36612
## 3078     Lake Placid 27.29866 -81.36612
## 3541     Lake Placid 27.29863 -81.36615
## 1          Ft. Myers 26.63401 -81.87987
## 1605       Homestead 25.94157 -80.48569
## 1665       Homestead 25.94157 -80.48569
## 3566       Homestead 25.55090 -80.42110
## 2653       Homestead 25.49197 -80.48562
## 105        Homestead 25.49172 -80.48586
## 2933       Homestead 25.49136 -80.48582
## 2198       Homestead 25.48862 -80.48976
## 3529 North Key Largo 25.28675 -80.29059
## 3196 North Key Largo 25.28661 -80.29081
## 3348 North Key Largo 25.28661 -80.29081
## 2596 North Key Largo 25.28637 -80.29077
## 2566 North Key Largo 25.27352 -80.30428
## 3177 North Key Largo 25.27349 -80.30410
## 1806 North Key Largo 25.25688 -80.30938
## 1203 North Key Largo 25.25687 -80.30942
## 3187 North Key Largo 25.25656 -80.31080
## 2831 North Key Largo 25.25648 -80.31069
## 2842 North Key Largo 25.25284 -80.31183
## 2354 North Key Largo 25.22809 -80.32888
## 3199 North Key Largo 25.22752 -80.32838
## 2878 North Key Largo 25.19515 -80.34592
## 2039 North Key Largo 25.19492 -80.34596
## 2398 North Key Largo 25.19484 -80.34590
## 2456 North Key Largo 25.18229 -80.36370
## 2812 North Key Largo 25.18229 -80.36370
## 3205 North Key Largo 25.18209 -80.36319
## 3586 North Key Largo 25.18088 -80.36466
## 3192 North Key Largo 25.17557 -80.36780
## 3379 North Key Largo 25.17557 -80.36781
## 2466 North Key Largo 25.17550 -80.36782
## 2806 North Key Largo 25.17369 -80.37153
## 1769       Key Largo 25.12858 -80.40809
## 3045       Key Largo 25.12858 -80.40809
## 2218       Key Largo 25.12848 -80.40754
## 2979       Key Largo 25.12846 -80.40809
## 1734       Key Largo 25.12811 -80.40805
## 1751       Key Largo 25.12800 -80.40808
## 141        Key Largo 25.12308 -80.41528
## 2975       Key Largo 25.10002 -80.43752
## 2884  Plantation Key 24.98519 -80.54710
## 1340  Plantation Key 24.97998 -80.54862
## 3404  Plantation Key 24.97357 -80.55392
## 905   Plantation Key 24.96448 -80.56739
## 3408  Plantation Key 24.96424 -80.56733

Off marks are written at the top of the Python script

Plotting

d2 = data_long %>%
  filter(!is.na(lat), !is.na(wing2body))

spatial_data <- st_as_sf(d2,
                         coords = c("long", "lat"),
                         crs = 4326,
                         agr = "constant")

spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)

## Warning: plotting the first 10 out of 27 attributes; use max.plot = 27 to plot
## all

Modeling

Make a model and then create a variogram for it to check for spatial dependencies.

What effects wing2body?

# And this is the Poisson GLMM

M1 <- inla(wing2body ~ sex_b + pophost_b + month_of_year + months_since_start, 
                    control.compute = list(dic = TRUE), 
                    family = "gaussian", 
                    data = d2)

M2 <- inla(wing2body ~ sex + pophost + month_of_year + months_since_start +
                            f(population, model = "iid"), 
                    control.compute = list(dic = TRUE),          
                    family = "gaussian", 
                    data = d2)
# Compare them
dic  <- c(M1$dic$dic, M2$dic$dic) # M1 better
dic

## [1] -9591.720 -9591.614

Model Validation, Variograms, and Spatial Plots

A variogram is used to display the variability between data points as a function of distance. Here, from distances 0 to 4000 m, the variability between points starts high and then decreases (the points become more similar) until it levels out because at some point the distance between data points will be great enough that the points are no longer considered to be related to each other. The variability will flatten out into a “sill”.

temp = d2 
d2 = check_spatial_dependencies(M2, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition

## Warning in proj4string(res): CRS object has comment, which is lost in output

## [1] "m"

Model Ony the Keys and Homestead

unique(d2$population)

## [1] Gainesville     Leesburg        Lake Wales      Lake Placid    
## [5] Ft. Myers       Homestead       North Key Largo Key Largo      
## [9] Plantation Key 
## 9 Levels: Gainesville Leesburg Lake Wales Lake Placid Ft. Myers ... Plantation Key

keys = c("North Key Largo", "Key Largo", "Plantation Key", "Homestead")

d3 = d2 %>%
  filter(population == keys[1] | population == keys[2] | population == keys[3] | population == keys[4])

unique(d3$population)

## [1] Homestead       North Key Largo Key Largo       Plantation Key 
## 9 Levels: Gainesville Leesburg Lake Wales Lake Placid Ft. Myers ... Plantation Key

spatial_data <- st_as_sf(d3,
                         coords = c("long", "lat"),
                         crs = 4326,
                         agr = "constant")

spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)

## Warning: plotting the first 9 out of 29 attributes; use max.plot = 29 to plot
## all

M1 <- inla(wing2body ~ sex_b + pophost + month_of_year + months_since_start +
                    f(population, model = "iid"),  
                    control.compute = list(dic = TRUE), 
                    family = "gaussian", 
                    data = d3)

temp = d3 
d3 = check_spatial_dependencies(M1, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition

## Warning in proj4string(res): CRS object has comment, which is lost in output

## [1] "m"

## Model Only Keys

keys = c("North Key Largo", "Key Largo", "Plantation Key")

d4 = d2 %>%
  filter(population == keys[1] | population == keys[2] | population == keys[3])

spatial_data <- st_as_sf(d4,
                         coords = c("long", "lat"),
                         crs = 4326,
                         agr = "constant")

spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)

## Warning: plotting the first 10 out of 29 attributes; use max.plot = 29 to plot
## all

M1 <- inla(wing2body ~ sex_b + month_of_year + months_since_start +
                    f(population, model = "iid"),  
                    control.compute = list(dic = TRUE), 
                    family = "gaussian", 
                    data = d4)

temp = d4

source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RSsrc/spatial_dependencies.R") # space
d4 = check_spatial_dependencies(M1, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition

## Warning in proj4string(res): CRS object has comment, which is lost in output

## [1] "m"

Probably best to take some means because the layering is obscuring what is happening per site, but can see clustering regardless.

Mesh + Stack Models

We will run all models via the stack, and we therefore implement the following 8 steps. 1. Make a mesh. 2. Define the weighting factors (also called the projector matrix A). 3. Define the SPDE. 4. Define the spatial field. 5. Make a stack. In this process we tell INLA at which points on the mesh we sampled the response variable and the covariates. 6. Specify the model formula in terms of the response variable, covariates and the spatial correlated term. 7. Run the spatial model in INLA. 8. Inspect the results.

# 1. Make a mesh.


# Step 1 of making a mesh:  Get a sense for the distribution of distances \
# between sampling locations. 
Loc <- cbind(d4$X.utm, d4$Y.utm)
D   <- dist(Loc)
par(mfrow = c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
hist(D / 1000, 
     freq = TRUE,
     main = "", 
     xlab = "Distance between sites (km)",
     ylab = "Frequency")

# Small scale distances are about 5 km. (max a bug can fly is 14 km)

# Step 2 of making a mesh: The primary tool to control the shape of the 
# triangles is max.edge. Other useful arguments are the cutoff and offset. 
# Use an outer area to avoid a boundary effect. The outer area can be less fine 
# than the inner area to reduce computing time.

# Come up with an initial guess for the range:
#  At what distance is the correlation smaller than 0.1?
#  10 km? See also the histogram if the distances again.
#  If we choose 5 km then the correlation will affect only a small %
#  of the distance combinations. 

# So..let's use 4 km for the range?
RangeGuess <- 4 * 1000   


# Recommended settings 
# See Section 3.1 at: https://haakonbakka.bitbucket.io/btopic104.html
Hull <- inla.nonconvex.hull(Loc, convex = -0.1)
MaxEdge  <- RangeGuess / 5
mesh     <- inla.mesh.2d(boundary = Hull,
                         max.edge = c(1, 5) * MaxEdge,
                         cutoff =  MaxEdge / 5)
mesh$n

## [1] 1614

# This is the mesh
par(mfrow = c(1,1), mar=c(0,0,0,0))
plot(mesh, asp=1, main = "")
points(Loc, col = 2, pch = 1, cex = 0.5)

#########################################
# Step 2. Define the weighting factors a_ik (also called 
#         the projector matrix).
A <- inla.spde.make.A(mesh, loc = Loc)
############################################

############################################
# Step 3. Define the SPDE.

# We need to specify weakly informative priors for the two Matern covariance 
# parameters Range and sigma.

# P(Range < range0) = alpha  and 
# P(sigma > sigma0) = alpha

# and substitute these into: 
#   prior.range = c(range0, alpha) 
#   prior.sigma = c(sigma0, alpha)

# These are Penalised Complexity (PC) priors
# See:  Constructing Priors that Penalize the Complexity of Gaussian Random Fields
#       Fuglstad et al. 
#       arXiv:1503.00256v4 [stat.ME] 27 Nov 2017


# This seems to be a sensible choice, based on the sample-variogram of the
# Pearson residuals obtained by the GLM without spatial correlation:
#           P(Range < 4 km) = 0.05


# Specifying a value for sigma is more difficult. We started with this:
#           P(sigma_u > 0.5) = 0.05
# This states that it is unlikely that sigma_u is larger than 0.5.
spde <- inla.spde2.pcmatern(mesh, 
                            prior.range = c(4 * 1000, 0.05), 
                            prior.sigma = c(1, 0.05)) # can make this 1

##########################################

##########################################
# Step 4. Define the spatial field.
# Next we set up a list for the spatial random  intercept u. As explained in Section 13.6, 
# u is rewritten internally as A * w. We need to specify the w in INLA. This is done with 
# the inla.spde.make.index function

# The size of the mesh is: 
mesh$n

## [1] 1614

# This number is also in
spde$n.spde

## [1] 1614

# It is also the number of w_k values that we will get.
w.index <- inla.spde.make.index('w', n.spde = spde$n.spde)
#####################################

#####################################
# Step 5.   Make a stack. 

# Make the X matrix
Xm <- model.matrix(~ sex_b + pophost_b + month_of_year, data = d4)
Xm <- as.data.frame(Xm)
colnames(Xm)

## [1] "(Intercept)"   "sex_b"         "pophost_b"     "month_of_year"

# And here is the stack. 
N <- nrow(d4)
StackFit <- inla.stack(
  tag = "Fit",
  data = list(wing2body = d4$wing2body), 
  A = list(1, 1, A),                  
  effects = list(   
    Intercept = rep(1, N),
    Xm        = Xm[,-1],    #Covariates without the intercept
    w         = w.index))

#####################################################
# First we run the model without spatial dependency.
I1 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year, 
           family = "gaussian", 
           data = inla.stack.data(StackFit),
           control.compute = list(dic = TRUE, waic = TRUE),
           control.predictor = list(A = inla.stack.A(StackFit)))

# And this is the model with the spatial random field:
I2 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year + 
                         f(w, model = spde), 
           family = "gaussian", 
           data = inla.stack.data(StackFit),
           control.compute = list(dic = TRUE, waic = TRUE),
           control.predictor = list(A = inla.stack.A(StackFit)))


# And compare I1 and I2 with DICs and WAICs
dic   <- c(I1$dic$dic, I2$dic$dic)   
waic  <- c(I1$waic$waic, I2$waic$waic)
Z.out <- cbind(dic, waic)
rownames(Z.out) <- c("I1: Gaussian GLM",  
                     "I2: Gaussian GLM + SRF")
Z.out

##                              dic      waic
## I1: Gaussian GLM       -4030.459 -4029.007
## I2: Gaussian GLM + SRF -4031.466 -4030.490

# Adding spatial correlation doesn't improve the model...

# Model validation of I2 (spatial gaussian GLM):
temp = d4
d4 = check_spatial_dependencies(I2, temp, temp$long, temp$lat, zone = 16, cutoff=5000, is_inla=TRUE)

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition

## Warning in proj4string(res): CRS object has comment, which is lost in output

## [1] "m"

##########################################################
# Model interpretation

# Posterior mean values of the regression parameters
Beta2 <- I2$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
print(Beta2, digits = 3)

##                    mean 0.025quant 0.975quant
## sex_b         -0.003490  -0.005288   -0.00169
## pophost_b     -0.740622  -0.955762   -0.33190
## month_of_year  0.000791   0.000246    0.00134

data_source_name = "~/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Detailed_Florida_State_Boundary.shp"
Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Detailed_Florida_State_Boundary")

## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/anastasiabernat/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Detailed_Florida_State_Boundary.shp", layer: "Detailed_Florida_State_Boundary"
## with 1 features
## It has 4 fields
## Integer64 fields read as strings:  Shape_STAr

# Convert the shapefile to UTM
Florida_ShapeFile.UTM <- spTransform(Florida_ShapeFile, 
                           CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))
Keys.UTM = crop(Florida_ShapeFile.UTM, extent(439300.8+500000, 1194383, 12721244-1200000, 13431336-600000))
Keys.UTM = crop(Florida_ShapeFile.UTM, extent(439300.8+700000, 1194383, 12721244-1200000, 13431336-600000)) # need to figure out why I can't overlap anything on these utm objects

plot(Florida_ShapeFile.UTM)

plot(Florida_ShapeFile.UTM)
points(Loc[,1], Loc[,2] + 10000000, col=2, cex=1)

tm_shape(Florida_ShapeFile.UTM) + tm_polygons(col="white")

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

tm_shape(Florida_ShapeFile.UTM) +
  tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons(col="#b5ed80")

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

tm_shape(Keys.UTM) + 
  tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons(col="#a5c2f0")

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

tm_shape(Keys.UTM) + tm_polygons(col="white")

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

coors = as.data.frame(cbind(d4$lat, d4$long))
colnames(coors) = c("lat", "lon")
collection_sites <- st_as_sf(coors,
                             coords = c("lon", "lat"),
                             crs = 4326,
                             agr = "constant")

tm_shape(Keys.UTM) + 
  tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons() +
tm_shape(collection_sites) +
  tm_dots(alpha = 0.9, size= 0.2, shape=2, title = 'Collection Sites', labels = "geometry", legend.show=TRUE)

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

cat("Keys\n\n")

## Keys

extent(Keys.UTM)

## class      : Extent 
## xmin       : 1139301 
## xmax       : 1185209 
## ymin       : 12766169 
## ymax       : 12831336

cat("\nPoints\n\n")

## 
## Points

extent(Loc)

## class      : Extent 
## xmin       : 1150184 
## xmax       : 1176453 
## ymin       : 2776436 
## ymax       : 2813669

# do they fit?
eKx = extent(Keys.UTM)[2] - extent(Keys.UTM)[1]
eKy = extent(Keys.UTM)[4] - extent(Keys.UTM)[3]
eLx = extent(Loc)[2] - extent(Loc)[1]
eLy = extent(Loc)[4] - extent(Loc)[3]
eKx - eLx # if positive then yes.

## [1] 19640.16

eKy - eLy

## [1] 27933.88

coors = as.data.frame(Loc)
colnames(coors) = c("lon", "lat")
coors$lat = coors$lat + 10000000 # 10,000,000 off for some reason.........
collection_sites <- st_as_sf(coors,
                             coords = c("lon", "lat"),
                             crs = 4326,
                             agr = "constant")

tm_shape(Keys.UTM) + 
  tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons() +
tm_shape(collection_sites) +
  tm_dots(alpha = 0.9, size= 0.2, shape=2, title = 'Collection Sites', labels = "geometry", legend.show=TRUE) # still doesn't show up...must be somewhere else in the world...

## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output

plot(Keys.UTM)
points(Loc[,1], Loc[,2] + 10000000, col="red")

mesh <- inla.mesh.2d(boundary = Hull,
                     max.edge = c(1, 5) * MaxEdge,
                     cutoff =  MaxEdge / 5)
mesh$loc[,2] = mesh$loc[,2] + 10000000 # 10,000,000 off for some reason.........

# Plot spatial random fields obtained with the ordinary mesh 
wpm.I2 <- I2$summary.random$w$mean
wsd.I2 <- I2$summary.random$w$sd

rx = range(mesh$loc[,1])
ry = c(range(mesh$loc[,2])[1] - 2000, range(mesh$loc[,2])[2] + 7000)

colnames(Loc) = c("lon", "lat")

PlotField2(field = wpm.I2, 
           mesh = mesh, 
           xlim = rx, 
           ylim = ry,
           MyTitle = "Posterior mean SRF")

points(Loc[,1], Loc[,2] + 10000000, pch=17)
plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE) # border = "white")

# Problems with the spatial correlation

# Extract the spatial hyper-parameters.
SpatPar <- MySpatialParams(I2, spde)
SpatPar

##        Kappa      Sigma_u        Range 
## 3.232952e-05 8.690030e-03 1.806280e+06

Kappa   <- SpatPar[1]
Sigma.u <- SpatPar[2]
Range   <- SpatPar[3]

# Show correlation structure. First we obtain the locations of each point of the mesh.
LocMesh <- mesh$loc[,1:2]

# And then we calculate the distance between each vertex.
D <- as.matrix(dist(LocMesh))

# Using the estimated parameters from the model (see above), we can calculate 
# the imposed Matern correlation values.
d.vec <- seq(0, max(D), length = 100)      
Cor.M <- (Kappa * d.vec) * besselK(Kappa * d.vec, 1) 
Cor.M[1] <- 1

# Which we plot here:
par(mfrow=c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
plot(x = d.vec / 1000, 
     y = Cor.M, 
     pch = 16, 
     type = "l", 
     cex.lab = 1.5,
     xlab = "Distance (km)", 
     ylab = "Correlation",
     xlim = c(0, 70))

# This indicates how far the correlation goes...can see that the correlation becomes weak around 8 km

###############################################
# Let us pick a specific point and visualize how far the correlation goes 
# from this point.
plot(Keys.UTM, axes=TRUE)
# These are all sampled sites
points(x = Loc[,1],
       y = Loc[,2] + 10000000, 
       cex = 0.5, 
       col = "blue", 
       pch = 17)
# Random point that we picked:
points(x = Loc[158,1], 
       y = Loc[158,2] + 10000000, 
       col = 2, pch = 16, cex = 1)

# How far does the correlation from the red point reach? How many of the 
# sampling locations are affected? We will visualize the correlation on the mesh
# and plot this up to correlation = 0.1

# loc = c(485000, 5971248) is the target point
# Get the inverse of the covariance matrix of the w
Q <- inla.spde2.precision(spde, 
                          theta = c(log(Range ),log(Sigma.u)))

# Get the correlation values
Corr <- local.find.correlation(Q, 
                               loc = c(Loc[158,1], Loc[158,2] + 10000000), 
                               mesh)

## [1] "The location used was c( 1174631.144 ,  12810321.7113 )"

#It will find the closest point on the mesh:
plot(Keys.UTM, axes=TRUE)
# These are all sampled sites
points(x = Loc[,1],
       y = Loc[,2] + 10000000, 
       cex = 0.5, 
       col = "blue", 
       pch = 17)
# Random point that we picked:
points(x = Loc[158,1], 
       y = Loc[158,2] + 10000000, 
       col = 2, pch = 16, cex = 1)
points(x = 1174631.144 , y = 12810321.7113 , col = 2, pch = 16, cex = 2)

# And the rest is a matter of plotting the correlation values, contour of the 
# coast and the sampling locations.
PlotField2(Corr, 
           mesh, 
           ContourMap = Keys.UTM,  
           zlim = c(0.2, 1),
           Add = FALSE)
plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE)
points(x = Loc[,1],
       y = Loc[,2] + 10000000,
       cex = 0.5, 
       col = "black", 
       pch = 17)

# Yikes...not sure why this is happening

Mesh + Stack + Barrier Models

Need to add the barrier on the right of the keys. Could make that shapefile in QGIS. Need Straits_of_Florida.UTM

data_source_name = "~/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Strait.shp"
#Strait_of_Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Water")
Strait_of_Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Strait")

## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/anastasiabernat/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Strait.shp", layer: "Strait"
## with 1 features
## It has 3 fields
## Integer64 fields read as strings:  id Area

# Convert the shapefile to UTM
Water.UTM <- spTransform(Strait_of_Florida_ShapeFile, 
                           CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))

Loc[,2] = Loc[,2] + 10000000 # 10,000,000 off for some reason.........

plot(Keys.UTM)
points(Loc[,1], Loc[,2], col="red")
plot(Water.UTM,
     add=TRUE,
     col="blue")

###################################################
# Barrier model applied on benthic coverage data
# I4: A beta model in which the mesh contains two barriers.

# Create a mesh for the barrier model
NonConvHull <- inla.nonconvex.hull(Loc)
mesh.barrier <- inla.mesh.2d(boundary = NonConvHull,
                             interior = Water.UTM, 
                             max.edge = c(1) * MaxEdge, 
                             cutoff = MaxEdge / 5)

mesh.barrier$loc[,2] = mesh.barrier$loc[,2]

# Plot the mesh
par(mfrow = c(1,1), mar = c(0,0,2,0))
plot(mesh.barrier)
points(Loc, col = 1, pch = 16, cex = 1)

mesh.barrier$n

## [1] 2306

# Projector matrix for model I4
A4 <- inla.spde.make.A(mesh.barrier, loc = Loc)

##########################
# Define the SPDE with the same PC priors as above. Recall from the theory 
# presentation that the barrier model uses two SPDE equations; one for the 
# triangles outside the barrier and one for the triangles inside the barrier. 
# We therefore first need to figure out which triangles are inside the 
# barrier(s).

# Get the spatial location of the center of each triangle.
NumTri <- length(mesh.barrier$graph$tv[,1])
PosTri <- matrix(0, NumTri, 2)
for (t in 1:NumTri){
  Temp       <- mesh.barrier$loc[mesh.barrier$graph$tv[t, ], ]
  PosTri[t,] <- colMeans(Temp)[c(1,2)] 
}

# Add the CRS to this object so that we can use the 'over' function
PosTri <- SpatialPoints(PosTri, 
                        CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))
plot(PosTri, col="darkgreen")
plot(Water.UTM, 
     add=T)
points(Loc[,1], Loc[,2], col="white")

# Geometry reports
temp <- st_as_sf(Water.UTM, 4326)
#temp <- st_as_sf(PosTri, 4326)
poly_valid_report <- st_is_valid(temp)
poly_valid_report

## [1] TRUE

which(poly_valid_report == FALSE)

## integer(0)

# make sure CRS identical
identicalCRS(Water.UTM, PosTri)

## [1] TRUE

# Determine which triangles are inside the barrier(s).
TriangleInBarrier <- over(Water.UTM, PosTri, returnList=T) # none are inside the barrier...
TriangleInBarrier <- unlist(TriangleInBarrier)
mesh.barrier$n - length(TriangleInBarrier)

## [1] 1136

rgeos::set_RGEOS_CheckValidity(2L)

BarrierPoly <- inla.barrier.polygon(mesh.barrier, TriangleInBarrier)

## Warning in RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid"): Self-intersection
## at or near point 1144610.73448928 12775958.6543086

## mesh.polys is invalid

## Attempting to make mesh.polys valid by zero-width buffering

## Warning in RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid"): Self-intersection
## at or near point 1146742.951568 12772475.38900359

## mesh.polys is invalid
## Attempting to make mesh.polys valid by zero-width buffering

# Now INLA will know which triangles are inside the barrier(s).
# Not sure where the warning messages all of a sudden come from. But they do not
# seem to affect the results.

Range <- 4 * 1000
Sigma <- 0.5
spde.barrier <- inla.barrier.pcmatern(mesh.barrier, 
                                       barrier.triangles = TriangleInBarrier, 
                                       prior.range = c(Range, 0.05), 
                                       prior.sigma = c(Sigma, 0.05))

##########################
# Spatial random field for model I4
w4.index <- inla.spde.make.index(
  name    = 'w', 
  n.spde  = spde.barrier$f$n)

##########################
# And here is the stack. 
N <- nrow(d4)
StackFit.Barrier <- inla.stack(
  tag = "Fit",
  data = list(wing2body = d4$wing2body), 
  A = list(1, 1, A),                  
  effects = list(   
    Intercept = rep(1, N),
    Xm        = Xm[,-1],    #Covariates without the intercept
    w         = w.index))

# And this is the model with the spatial random field:
I4 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year + 
                         f(w, model = spde.barrier), 
           family = "gaussian", 
           data = inla.stack.data(StackFit.Barrier),
           control.compute = list(dic = TRUE, waic = TRUE),
           control.predictor = list(A = inla.stack.A(StackFit.Barrier)))

## Warning in inla.model.properties.generic(inla.trim.family(model), mm[names(mm) == : Model 'rgeneric' in section 'latent' is marked as 'experimental'; changes may appear at any time.
##   Use this model with extra care!!! Further warnings are disabled.

# Covariate effects and spatial random field
Beta4 <- I4$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
print(Beta4, digits = 3)

##                    mean 0.025quant 0.975quant
## sex_b         -0.003496  -0.005297   -0.00170
## pophost_b     -0.719740  -0.726856   -0.71242
## month_of_year  0.000805   0.000258    0.00135

theta <- I4$summary.hyper[1, "mean"]
theta # 4344.969? What does that mean?

## [1] 4623.327

# Plot the SRF obtained by I4
wpm.4 <- I4$summary.random$w$mean
wsd.4 <- I4$summary.random$w$sd

PlotField2(field = wpm.4, 
           mesh = mesh.barrier, 
           xlim = range(mesh.barrier$loc[,1]), 
           ylim = range(mesh.barrier$loc[,2]),
           MyTitle = "Posterior mean SRF")
plot(Water.UTM, 
     axes = TRUE, 
     add = TRUE,
     border = "white")
points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE) # border = "white")

# Note that the correlation does not cross into the strait.

# This is the SD:
PlotField2(field = wsd.4, 
           mesh = mesh.barrier, 
           xlim = range(mesh.barrier$loc[,1]), 
           ylim = range(mesh.barrier$loc[,2]),
           MyTitle = "Posterior mean SD")
plot(Water.UTM, 
     axes = TRUE, 
     add = TRUE,
     border = "white")

plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE) # border = "white")
points(Loc[,1], Loc[,2], pch=17)

# This is how you get them for a barrier model:
# It is a little bit of fancy code.
HP <- I4$marginals.hyperpar

TakeExp <- function(x){exp(x)}
Sigma4.u <- inla.emarginal(TakeExp, HP$`Theta1 for w`) 
Range4   <- inla.emarginal(TakeExp, HP$`Theta2 for w`) 

paste('Sigma.u = ', round(Sigma4.u, digits = 3))

## [1] "Sigma.u =  0.002"

paste('Range = ', round(Range4 / 1000, digits = 3), "km")

## [1] "Range =  20.012 km"

# Show the extend of the spatial correlation
# Get the inverse of the covariance matrix of the w.
# Note that we are using a different function as before.
Q <- inla.rgeneric.q(spde.barrier, 
                     "Q", 
                     theta = c(log(Sigma4.u ), log(Range4)))

Corr <- local.find.correlation(Q, 
                               loc = c(Loc[157,1], Loc[157,2]), 
                               mesh.barrier)

## [1] "The location used was c( 1174723.0528 ,  12810472.1565 )"

# And the rest is a matter of plotting the
# correlation values, contour of the coast and the
# sampling locations.
PlotField2(Corr, 
           mesh.barrier, 
           ContourMap = Keys.UTM,  
           zlim = c(0.2, 1),
           Add = FALSE)
#It will find the closest point on the mesh:
points(x = 1174723.0528, y = 12810472.1565, col = 2, pch = 16, cex = 2)

plot(Keys.UTM, 
     axes = TRUE, 
     add = TRUE)
points(x = Loc[,1],
       y = Loc[,2],
       cex = 0.5, 
       col = "black", 
       pch = 16)

# This is not working...
##########################################

# And the rest is a matter of plotting the
# correlation values, contour of the coast and the
# sampling locations.
PlotField2(Corr, 
           mesh.barrier, 
           ContourMap = Keys.UTM,  
           zlim = c(0.2, 1),
           Add = FALSE)
#It will find the closest point on the mesh:
points(x = 1174723.0528, y = 12810472.1565, col = 2, pch = 16, cex = 2)

plot(Keys.UTM, 
     axes = TRUE, 
     add = TRUE)
points(x = Loc[,1],
       y = Loc[,2],
       cex = 0.5, 
       col = "black", 
       pch = 16)

# This is not working...
##########################################

Is a barrier really needed? Even without:

spde.barrier <- inla.spde2.pcmatern(mesh.barrier, 
                            prior.range = c(4 * 1000, 0.05), 
                            prior.sigma = c(1, 0.05)) # can make this 1

# The size of the mesh is: 
mesh.barrier$n

## [1] 2306

# This number is also in
spde.barrier$n.spde

## [1] 2306

# It is also the number of w_k values that we will get.
w.index <- inla.spde.make.index('w', n.spde = spde$n.spde)

#####################################
# Step 5.   Make a stack. 

# Make the X matrix
Xm <- model.matrix(~ sex_b + pophost_b + month_of_year, data = d4)
Xm <- as.data.frame(Xm)
colnames(Xm)

## [1] "(Intercept)"   "sex_b"         "pophost_b"     "month_of_year"

# And here is the stack. 
N <- nrow(d4)
StackFit.Barrier <- inla.stack(
  tag = "Fit",
  data = list(wing2body = d4$wing2body), 
  A = list(1, 1, A),                  
  effects = list(   
    Intercept = rep(1, N),
    Xm        = Xm[,-1],    #Covariates without the intercept
    w         = w.index))

#####################################################
# First we run the model without spatial dependency.
I3 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year, 
           family = "gaussian", 
           data = inla.stack.data(StackFit.Barrier),
           control.compute = list(dic = TRUE, waic = TRUE),
           control.predictor = list(A = inla.stack.A(StackFit.Barrier)))

# And this is the model with the spatial random field:
I4 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year + 
                         f(w, model = spde.barrier), 
           family = "gaussian", 
           data = inla.stack.data(StackFit.Barrier),
           control.compute = list(dic = TRUE, waic = TRUE),
           control.predictor = list(A = inla.stack.A(StackFit.Barrier)))


# And compare I1 and I2 with DICs and WAICs
dic   <- c(I1$dic$dic, I2$dic$dic)   
waic  <- c(I1$waic$waic, I2$waic$waic)
Z.out <- cbind(dic, waic)
rownames(Z.out) <- c("I1: Gaussian GLM",  
                     "I2: Gaussian GLM + SRF")
Z.out

##                              dic      waic
## I1: Gaussian GLM       -4030.459 -4029.007
## I2: Gaussian GLM + SRF -4031.466 -4030.490

# Adding spatial correlation doesn't improve the model...

# Plot spatial random fields obtained with the ordinary mesh 
wpm.I4 <- I4$summary.random$w$mean
wsd.I4 <- I4$summary.random$w$sd

rx = range(mesh.barrier$loc[,1])
ry = c(range(mesh.barrier$loc[,2])[1] - 2000, range(mesh.barrier$loc[,2])[2] + 7000)

colnames(Loc) = c("lon", "lat")

PlotField2(field = wpm.I4, 
           mesh = mesh.barrier, 
           xlim = rx, 
           ylim = range(mesh.barrier$loc[,2]),
           MyTitle = "Posterior mean SRF") # sample regression function (SRF)

points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE) # border = "white")

PlotField2(field = wsd.I4, 
           mesh = mesh.barrier, 
           xlim = rx, 
           ylim = range(mesh.barrier$loc[,2]),
           MyTitle = "Posterior mean SD")

points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM, 
     axes = TRUE,
     add = TRUE) # border = "white")

All Morphology: Testing for Spatial Dependencies

Anastasia Bernat

4/23/2021