#knitr::purl("wing_spatial.Rmd", output = "wing_spatial.R") # convert Rmd to R script
source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"
script_names = c("compare_models.R",
"regression_output.R",
"remove_torn_wings.R",
"clean_morph_data.R", # two functions: read_morph_data and remove_torn_wings
"get_Akaike_weights.R",
"AICprobabilities.R",
"HighstatLibV13.R")
for (script in script_names) {
path = paste0(source_path, script)
source(path)
}
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:rgeos':
##
## intersect, setdiff, union
## The following object is masked from 'package:reshape':
##
## stamp
## The following objects are masked from 'package:raster':
##
## intersect, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RTsrc/ts_auxiliary_funs.R") # time
source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RSsrc/spatial_dependencies.R") # space
data_list <- read_morph_data("data/allmorphology04.30.21-coors.csv")
## morph types: L S NA LS L/S SL
## recoding missing morph types...
## S if wing2thorax <=2.2, L if wing2thorax >=2.5
##
## ambiguous wing morph bug count: 32
raw_data = data_list[[1]]
all_bugs = nrow(raw_data)
data_long = data_list[[2]]
# Remove individuals with torn wings first
raw_data = remove_torn_wings(raw_data) # **don't need to remove torn wings for wing morph analysis
##
## number of bugs with torn wings: 193
data_long = remove_torn_wings(data_long) # **don't need to remove torn wings for wing morph analysis
##
## number of bugs with torn wings: 190
clean_bugs = nrow(raw_data)
cat("number of bugs with torn wings:", all_bugs - clean_bugs, "\n\n")
## number of bugs with torn wings: 193
coors = unique(data_long[,c("population", "lat","long")])
coors[order(-coors$lat),]
## population lat long
## 2148 Gainesville 29.66679 -82.35761
## 1545 Gainesville 29.66668 -82.35574
## 1975 Gainesville 29.66648 -82.35720
## 1125 Gainesville 29.66374 -82.36091
## 2914 Gainesville 29.66202 -82.34731
## 2897 Gainesville 29.66200 -82.34731
## 2157 Gainesville 29.66197 -82.34728
## 2637 Gainesville 29.66182 -82.34721
## 2147 Leesburg 28.81527 -81.88217
## 2772 Leesburg 28.81298 -81.87789
## 1917 Leesburg 28.80530 -81.88184
## 1276 Leesburg 28.80523 -81.88178
## 3646 Leesburg 28.79642 -81.87792
## 3147 Leesburg 28.79634 -81.87788
## 1077 Leesburg 28.79602 -81.87777
## 2107 Lake Wales 27.93575 -81.57412
## 1268 Lake Wales 27.93574 -81.57414
## 1826 Lake Wales 27.93573 -81.57409
## 1858 Lake Wales 27.93573 -81.57409
## 202 Lake Wales 27.90335 -81.58946
## 2747 Lake Wales 27.90335 -81.58946
## 3097 Lake Wales 27.89674 -81.58047
## 3592 Lake Wales 27.89664 -81.58064
## 2291 Lake Placid 27.29866 -81.36612
## 2704 Lake Placid 27.29866 -81.36612
## 3054 Lake Placid 27.29866 -81.36612
## 3078 Lake Placid 27.29866 -81.36612
## 3541 Lake Placid 27.29863 -81.36615
## 1 Ft. Myers 26.63401 -81.87987
## 1605 Homestead 25.94157 -80.48569
## 1665 Homestead 25.94157 -80.48569
## 3566 Homestead 25.55090 -80.42110
## 2653 Homestead 25.49197 -80.48562
## 105 Homestead 25.49172 -80.48586
## 2933 Homestead 25.49136 -80.48582
## 2198 Homestead 25.48862 -80.48976
## 3529 North Key Largo 25.28675 -80.29059
## 3196 North Key Largo 25.28661 -80.29081
## 3348 North Key Largo 25.28661 -80.29081
## 2596 North Key Largo 25.28637 -80.29077
## 2566 North Key Largo 25.27352 -80.30428
## 3177 North Key Largo 25.27349 -80.30410
## 1806 North Key Largo 25.25688 -80.30938
## 1203 North Key Largo 25.25687 -80.30942
## 3187 North Key Largo 25.25656 -80.31080
## 2831 North Key Largo 25.25648 -80.31069
## 2842 North Key Largo 25.25284 -80.31183
## 2354 North Key Largo 25.22809 -80.32888
## 3199 North Key Largo 25.22752 -80.32838
## 2878 North Key Largo 25.19515 -80.34592
## 2039 North Key Largo 25.19492 -80.34596
## 2398 North Key Largo 25.19484 -80.34590
## 2456 North Key Largo 25.18229 -80.36370
## 2812 North Key Largo 25.18229 -80.36370
## 3205 North Key Largo 25.18209 -80.36319
## 3586 North Key Largo 25.18088 -80.36466
## 3192 North Key Largo 25.17557 -80.36780
## 3379 North Key Largo 25.17557 -80.36781
## 2466 North Key Largo 25.17550 -80.36782
## 2806 North Key Largo 25.17369 -80.37153
## 1769 Key Largo 25.12858 -80.40809
## 3045 Key Largo 25.12858 -80.40809
## 2218 Key Largo 25.12848 -80.40754
## 2979 Key Largo 25.12846 -80.40809
## 1734 Key Largo 25.12811 -80.40805
## 1751 Key Largo 25.12800 -80.40808
## 141 Key Largo 25.12308 -80.41528
## 2975 Key Largo 25.10002 -80.43752
## 2884 Plantation Key 24.98519 -80.54710
## 1340 Plantation Key 24.97998 -80.54862
## 3404 Plantation Key 24.97357 -80.55392
## 905 Plantation Key 24.96448 -80.56739
## 3408 Plantation Key 24.96424 -80.56733
Off marks are written at the top of the Python script
d2 = data_long %>%
filter(!is.na(lat), !is.na(wing2body))
spatial_data <- st_as_sf(d2,
coords = c("long", "lat"),
crs = 4326,
agr = "constant")
spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)
## Warning: plotting the first 10 out of 27 attributes; use max.plot = 27 to plot
## all
Make a model and then create a variogram for it to check for spatial dependencies.
What effects wing2body?
# And this is the Poisson GLMM
M1 <- inla(wing2body ~ sex_b + pophost_b + month_of_year + months_since_start,
control.compute = list(dic = TRUE),
family = "gaussian",
data = d2)
M2 <- inla(wing2body ~ sex + pophost + month_of_year + months_since_start +
f(population, model = "iid"),
control.compute = list(dic = TRUE),
family = "gaussian",
data = d2)
# Compare them
dic <- c(M1$dic$dic, M2$dic$dic) # M1 better
dic
## [1] -9591.720 -9591.614
A variogram is used to display the variability between data points as a function of distance. Here, from distances 0 to 4000 m, the variability between points starts high and then decreases (the points become more similar) until it levels out because at some point the distance between data points will be great enough that the points are no longer considered to be related to each other. The variability will flatten out into a “sill”.
temp = d2
d2 = check_spatial_dependencies(M2, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
## [1] "m"
unique(d2$population)
## [1] Gainesville Leesburg Lake Wales Lake Placid
## [5] Ft. Myers Homestead North Key Largo Key Largo
## [9] Plantation Key
## 9 Levels: Gainesville Leesburg Lake Wales Lake Placid Ft. Myers ... Plantation Key
keys = c("North Key Largo", "Key Largo", "Plantation Key", "Homestead")
d3 = d2 %>%
filter(population == keys[1] | population == keys[2] | population == keys[3] | population == keys[4])
unique(d3$population)
## [1] Homestead North Key Largo Key Largo Plantation Key
## 9 Levels: Gainesville Leesburg Lake Wales Lake Placid Ft. Myers ... Plantation Key
spatial_data <- st_as_sf(d3,
coords = c("long", "lat"),
crs = 4326,
agr = "constant")
spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)
## Warning: plotting the first 9 out of 29 attributes; use max.plot = 29 to plot
## all
M1 <- inla(wing2body ~ sex_b + pophost + month_of_year + months_since_start +
f(population, model = "iid"),
control.compute = list(dic = TRUE),
family = "gaussian",
data = d3)
temp = d3
d3 = check_spatial_dependencies(M1, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
## [1] "m"
## Model Only Keys
keys = c("North Key Largo", "Key Largo", "Plantation Key")
d4 = d2 %>%
filter(population == keys[1] | population == keys[2] | population == keys[3])
spatial_data <- st_as_sf(d4,
coords = c("long", "lat"),
crs = 4326,
agr = "constant")
spatial_data <- st_transform(spatial_data, 2236)
plot(spatial_data)
## Warning: plotting the first 10 out of 29 attributes; use max.plot = 29 to plot
## all
M1 <- inla(wing2body ~ sex_b + month_of_year + months_since_start +
f(population, model = "iid"),
control.compute = list(dic = TRUE),
family = "gaussian",
data = d4)
temp = d4
source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RSsrc/spatial_dependencies.R") # space
d4 = check_spatial_dependencies(M1, temp, temp$long, temp$lat, zone = 16, cutoff=10000, is_inla=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
## [1] "m"
Probably best to take some means because the layering is obscuring what is happening per site, but can see clustering regardless.
We will run all models via the stack, and we therefore implement the following 8 steps. 1. Make a mesh. 2. Define the weighting factors (also called the projector matrix A). 3. Define the SPDE. 4. Define the spatial field. 5. Make a stack. In this process we tell INLA at which points on the mesh we sampled the response variable and the covariates. 6. Specify the model formula in terms of the response variable, covariates and the spatial correlated term. 7. Run the spatial model in INLA. 8. Inspect the results.
# 1. Make a mesh.
# Step 1 of making a mesh: Get a sense for the distribution of distances \
# between sampling locations.
Loc <- cbind(d4$X.utm, d4$Y.utm)
D <- dist(Loc)
par(mfrow = c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
hist(D / 1000,
freq = TRUE,
main = "",
xlab = "Distance between sites (km)",
ylab = "Frequency")
# Small scale distances are about 5 km. (max a bug can fly is 14 km)
# Step 2 of making a mesh: The primary tool to control the shape of the
# triangles is max.edge. Other useful arguments are the cutoff and offset.
# Use an outer area to avoid a boundary effect. The outer area can be less fine
# than the inner area to reduce computing time.
# Come up with an initial guess for the range:
# At what distance is the correlation smaller than 0.1?
# 10 km? See also the histogram if the distances again.
# If we choose 5 km then the correlation will affect only a small %
# of the distance combinations.
# So..let's use 4 km for the range?
RangeGuess <- 4 * 1000
# Recommended settings
# See Section 3.1 at: https://haakonbakka.bitbucket.io/btopic104.html
Hull <- inla.nonconvex.hull(Loc, convex = -0.1)
MaxEdge <- RangeGuess / 5
mesh <- inla.mesh.2d(boundary = Hull,
max.edge = c(1, 5) * MaxEdge,
cutoff = MaxEdge / 5)
mesh$n
## [1] 1614
# This is the mesh
par(mfrow = c(1,1), mar=c(0,0,0,0))
plot(mesh, asp=1, main = "")
points(Loc, col = 2, pch = 1, cex = 0.5)
#########################################
# Step 2. Define the weighting factors a_ik (also called
# the projector matrix).
A <- inla.spde.make.A(mesh, loc = Loc)
############################################
############################################
# Step 3. Define the SPDE.
# We need to specify weakly informative priors for the two Matern covariance
# parameters Range and sigma.
# P(Range < range0) = alpha and
# P(sigma > sigma0) = alpha
# and substitute these into:
# prior.range = c(range0, alpha)
# prior.sigma = c(sigma0, alpha)
# These are Penalised Complexity (PC) priors
# See: Constructing Priors that Penalize the Complexity of Gaussian Random Fields
# Fuglstad et al.
# arXiv:1503.00256v4 [stat.ME] 27 Nov 2017
# This seems to be a sensible choice, based on the sample-variogram of the
# Pearson residuals obtained by the GLM without spatial correlation:
# P(Range < 4 km) = 0.05
# Specifying a value for sigma is more difficult. We started with this:
# P(sigma_u > 0.5) = 0.05
# This states that it is unlikely that sigma_u is larger than 0.5.
spde <- inla.spde2.pcmatern(mesh,
prior.range = c(4 * 1000, 0.05),
prior.sigma = c(1, 0.05)) # can make this 1
##########################################
##########################################
# Step 4. Define the spatial field.
# Next we set up a list for the spatial random intercept u. As explained in Section 13.6,
# u is rewritten internally as A * w. We need to specify the w in INLA. This is done with
# the inla.spde.make.index function
# The size of the mesh is:
mesh$n
## [1] 1614
# This number is also in
spde$n.spde
## [1] 1614
# It is also the number of w_k values that we will get.
w.index <- inla.spde.make.index('w', n.spde = spde$n.spde)
#####################################
#####################################
# Step 5. Make a stack.
# Make the X matrix
Xm <- model.matrix(~ sex_b + pophost_b + month_of_year, data = d4)
Xm <- as.data.frame(Xm)
colnames(Xm)
## [1] "(Intercept)" "sex_b" "pophost_b" "month_of_year"
# And here is the stack.
N <- nrow(d4)
StackFit <- inla.stack(
tag = "Fit",
data = list(wing2body = d4$wing2body),
A = list(1, 1, A),
effects = list(
Intercept = rep(1, N),
Xm = Xm[,-1], #Covariates without the intercept
w = w.index))
#####################################################
# First we run the model without spatial dependency.
I1 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year,
family = "gaussian",
data = inla.stack.data(StackFit),
control.compute = list(dic = TRUE, waic = TRUE),
control.predictor = list(A = inla.stack.A(StackFit)))
# And this is the model with the spatial random field:
I2 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year +
f(w, model = spde),
family = "gaussian",
data = inla.stack.data(StackFit),
control.compute = list(dic = TRUE, waic = TRUE),
control.predictor = list(A = inla.stack.A(StackFit)))
# And compare I1 and I2 with DICs and WAICs
dic <- c(I1$dic$dic, I2$dic$dic)
waic <- c(I1$waic$waic, I2$waic$waic)
Z.out <- cbind(dic, waic)
rownames(Z.out) <- c("I1: Gaussian GLM",
"I2: Gaussian GLM + SRF")
Z.out
## dic waic
## I1: Gaussian GLM -4030.459 -4029.007
## I2: Gaussian GLM + SRF -4031.466 -4030.490
# Adding spatial correlation doesn't improve the model...
# Model validation of I2 (spatial gaussian GLM):
temp = d4
d4 = check_spatial_dependencies(I2, temp, temp$long, temp$lat, zone = 16, cutoff=5000, is_inla=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
## [1] "m"
##########################################################
# Model interpretation
# Posterior mean values of the regression parameters
Beta2 <- I2$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
print(Beta2, digits = 3)
## mean 0.025quant 0.975quant
## sex_b -0.003490 -0.005288 -0.00169
## pophost_b -0.740622 -0.955762 -0.33190
## month_of_year 0.000791 0.000246 0.00134
data_source_name = "~/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Detailed_Florida_State_Boundary.shp"
Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Detailed_Florida_State_Boundary")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/anastasiabernat/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Detailed_Florida_State_Boundary.shp", layer: "Detailed_Florida_State_Boundary"
## with 1 features
## It has 4 fields
## Integer64 fields read as strings: Shape_STAr
# Convert the shapefile to UTM
Florida_ShapeFile.UTM <- spTransform(Florida_ShapeFile,
CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))
Keys.UTM = crop(Florida_ShapeFile.UTM, extent(439300.8+500000, 1194383, 12721244-1200000, 13431336-600000))
Keys.UTM = crop(Florida_ShapeFile.UTM, extent(439300.8+700000, 1194383, 12721244-1200000, 13431336-600000)) # need to figure out why I can't overlap anything on these utm objects
plot(Florida_ShapeFile.UTM)
plot(Florida_ShapeFile.UTM)
points(Loc[,1], Loc[,2] + 10000000, col=2, cex=1)
tm_shape(Florida_ShapeFile.UTM) + tm_polygons(col="white")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(Florida_ShapeFile.UTM) +
tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons(col="#b5ed80")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(Keys.UTM) +
tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons(col="#a5c2f0")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(Keys.UTM) + tm_polygons(col="white")
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
coors = as.data.frame(cbind(d4$lat, d4$long))
colnames(coors) = c("lat", "lon")
collection_sites <- st_as_sf(coors,
coords = c("lon", "lat"),
crs = 4326,
agr = "constant")
tm_shape(Keys.UTM) +
tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons() +
tm_shape(collection_sites) +
tm_dots(alpha = 0.9, size= 0.2, shape=2, title = 'Collection Sites', labels = "geometry", legend.show=TRUE)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
cat("Keys\n\n")
## Keys
extent(Keys.UTM)
## class : Extent
## xmin : 1139301
## xmax : 1185209
## ymin : 12766169
## ymax : 12831336
cat("\nPoints\n\n")
##
## Points
extent(Loc)
## class : Extent
## xmin : 1150184
## xmax : 1176453
## ymin : 2776436
## ymax : 2813669
# do they fit?
eKx = extent(Keys.UTM)[2] - extent(Keys.UTM)[1]
eKy = extent(Keys.UTM)[4] - extent(Keys.UTM)[3]
eLx = extent(Loc)[2] - extent(Loc)[1]
eLy = extent(Loc)[4] - extent(Loc)[3]
eKx - eLx # if positive then yes.
## [1] 19640.16
eKy - eLy
## [1] 27933.88
coors = as.data.frame(Loc)
colnames(coors) = c("lon", "lat")
coors$lat = coors$lat + 10000000 # 10,000,000 off for some reason.........
collection_sites <- st_as_sf(coors,
coords = c("lon", "lat"),
crs = 4326,
agr = "constant")
tm_shape(Keys.UTM) +
tm_graticules(n.x = 3, n.y = 7, labels.format = list(digits=1), col= "grey79") + tm_polygons() +
tm_shape(collection_sites) +
tm_dots(alpha = 0.9, size= 0.2, shape=2, title = 'Collection Sites', labels = "geometry", legend.show=TRUE) # still doesn't show up...must be somewhere else in the world...
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
plot(Keys.UTM)
points(Loc[,1], Loc[,2] + 10000000, col="red")
mesh <- inla.mesh.2d(boundary = Hull,
max.edge = c(1, 5) * MaxEdge,
cutoff = MaxEdge / 5)
mesh$loc[,2] = mesh$loc[,2] + 10000000 # 10,000,000 off for some reason.........
# Plot spatial random fields obtained with the ordinary mesh
wpm.I2 <- I2$summary.random$w$mean
wsd.I2 <- I2$summary.random$w$sd
rx = range(mesh$loc[,1])
ry = c(range(mesh$loc[,2])[1] - 2000, range(mesh$loc[,2])[2] + 7000)
colnames(Loc) = c("lon", "lat")
PlotField2(field = wpm.I2,
mesh = mesh,
xlim = rx,
ylim = ry,
MyTitle = "Posterior mean SRF")
points(Loc[,1], Loc[,2] + 10000000, pch=17)
plot(Keys.UTM,
axes = TRUE,
add = TRUE) # border = "white")
# Problems with the spatial correlation
# Extract the spatial hyper-parameters.
SpatPar <- MySpatialParams(I2, spde)
SpatPar
## Kappa Sigma_u Range
## 3.232952e-05 8.690030e-03 1.806280e+06
Kappa <- SpatPar[1]
Sigma.u <- SpatPar[2]
Range <- SpatPar[3]
# Show correlation structure. First we obtain the locations of each point of the mesh.
LocMesh <- mesh$loc[,1:2]
# And then we calculate the distance between each vertex.
D <- as.matrix(dist(LocMesh))
# Using the estimated parameters from the model (see above), we can calculate
# the imposed Matern correlation values.
d.vec <- seq(0, max(D), length = 100)
Cor.M <- (Kappa * d.vec) * besselK(Kappa * d.vec, 1)
Cor.M[1] <- 1
# Which we plot here:
par(mfrow=c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
plot(x = d.vec / 1000,
y = Cor.M,
pch = 16,
type = "l",
cex.lab = 1.5,
xlab = "Distance (km)",
ylab = "Correlation",
xlim = c(0, 70))
# This indicates how far the correlation goes...can see that the correlation becomes weak around 8 km
###############################################
# Let us pick a specific point and visualize how far the correlation goes
# from this point.
plot(Keys.UTM, axes=TRUE)
# These are all sampled sites
points(x = Loc[,1],
y = Loc[,2] + 10000000,
cex = 0.5,
col = "blue",
pch = 17)
# Random point that we picked:
points(x = Loc[158,1],
y = Loc[158,2] + 10000000,
col = 2, pch = 16, cex = 1)
# How far does the correlation from the red point reach? How many of the
# sampling locations are affected? We will visualize the correlation on the mesh
# and plot this up to correlation = 0.1
# loc = c(485000, 5971248) is the target point
# Get the inverse of the covariance matrix of the w
Q <- inla.spde2.precision(spde,
theta = c(log(Range ),log(Sigma.u)))
# Get the correlation values
Corr <- local.find.correlation(Q,
loc = c(Loc[158,1], Loc[158,2] + 10000000),
mesh)
## [1] "The location used was c( 1174631.144 , 12810321.7113 )"
#It will find the closest point on the mesh:
plot(Keys.UTM, axes=TRUE)
# These are all sampled sites
points(x = Loc[,1],
y = Loc[,2] + 10000000,
cex = 0.5,
col = "blue",
pch = 17)
# Random point that we picked:
points(x = Loc[158,1],
y = Loc[158,2] + 10000000,
col = 2, pch = 16, cex = 1)
points(x = 1174631.144 , y = 12810321.7113 , col = 2, pch = 16, cex = 2)
# And the rest is a matter of plotting the correlation values, contour of the
# coast and the sampling locations.
PlotField2(Corr,
mesh,
ContourMap = Keys.UTM,
zlim = c(0.2, 1),
Add = FALSE)
plot(Keys.UTM,
axes = TRUE,
add = TRUE)
points(x = Loc[,1],
y = Loc[,2] + 10000000,
cex = 0.5,
col = "black",
pch = 17)
# Yikes...not sure why this is happening
Need to add the barrier on the right of the keys. Could make that shapefile in QGIS. Need Straits_of_Florida.UTM
data_source_name = "~/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Strait.shp"
#Strait_of_Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Water")
Strait_of_Florida_ShapeFile <- readOGR(dsn = data_source_name, layer = "Strait")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/anastasiabernat/Desktop/git_repositories/SBB-dispersal/avbernat/All_Morphology/stats/data/shapefiles/Strait.shp", layer: "Strait"
## with 1 features
## It has 3 fields
## Integer64 fields read as strings: id Area
# Convert the shapefile to UTM
Water.UTM <- spTransform(Strait_of_Florida_ShapeFile,
CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))
Loc[,2] = Loc[,2] + 10000000 # 10,000,000 off for some reason.........
plot(Keys.UTM)
points(Loc[,1], Loc[,2], col="red")
plot(Water.UTM,
add=TRUE,
col="blue")
###################################################
# Barrier model applied on benthic coverage data
# I4: A beta model in which the mesh contains two barriers.
# Create a mesh for the barrier model
NonConvHull <- inla.nonconvex.hull(Loc)
mesh.barrier <- inla.mesh.2d(boundary = NonConvHull,
interior = Water.UTM,
max.edge = c(1) * MaxEdge,
cutoff = MaxEdge / 5)
mesh.barrier$loc[,2] = mesh.barrier$loc[,2]
# Plot the mesh
par(mfrow = c(1,1), mar = c(0,0,2,0))
plot(mesh.barrier)
points(Loc, col = 1, pch = 16, cex = 1)
mesh.barrier$n
## [1] 2306
# Projector matrix for model I4
A4 <- inla.spde.make.A(mesh.barrier, loc = Loc)
##########################
# Define the SPDE with the same PC priors as above. Recall from the theory
# presentation that the barrier model uses two SPDE equations; one for the
# triangles outside the barrier and one for the triangles inside the barrier.
# We therefore first need to figure out which triangles are inside the
# barrier(s).
# Get the spatial location of the center of each triangle.
NumTri <- length(mesh.barrier$graph$tv[,1])
PosTri <- matrix(0, NumTri, 2)
for (t in 1:NumTri){
Temp <- mesh.barrier$loc[mesh.barrier$graph$tv[t, ], ]
PosTri[t,] <- colMeans(Temp)[c(1,2)]
}
# Add the CRS to this object so that we can use the 'over' function
PosTri <- SpatialPoints(PosTri,
CRS("+proj=utm +zone=16 +south +ellps=WGS84 +datum=WGS84"))
plot(PosTri, col="darkgreen")
plot(Water.UTM,
add=T)
points(Loc[,1], Loc[,2], col="white")
# Geometry reports
temp <- st_as_sf(Water.UTM, 4326)
#temp <- st_as_sf(PosTri, 4326)
poly_valid_report <- st_is_valid(temp)
poly_valid_report
## [1] TRUE
which(poly_valid_report == FALSE)
## integer(0)
# make sure CRS identical
identicalCRS(Water.UTM, PosTri)
## [1] TRUE
# Determine which triangles are inside the barrier(s).
TriangleInBarrier <- over(Water.UTM, PosTri, returnList=T) # none are inside the barrier...
TriangleInBarrier <- unlist(TriangleInBarrier)
mesh.barrier$n - length(TriangleInBarrier)
## [1] 1136
rgeos::set_RGEOS_CheckValidity(2L)
BarrierPoly <- inla.barrier.polygon(mesh.barrier, TriangleInBarrier)
## Warning in RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid"): Self-intersection
## at or near point 1144610.73448928 12775958.6543086
## mesh.polys is invalid
## Attempting to make mesh.polys valid by zero-width buffering
## Warning in RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid"): Self-intersection
## at or near point 1146742.951568 12772475.38900359
## mesh.polys is invalid
## Attempting to make mesh.polys valid by zero-width buffering
# Now INLA will know which triangles are inside the barrier(s).
# Not sure where the warning messages all of a sudden come from. But they do not
# seem to affect the results.
Range <- 4 * 1000
Sigma <- 0.5
spde.barrier <- inla.barrier.pcmatern(mesh.barrier,
barrier.triangles = TriangleInBarrier,
prior.range = c(Range, 0.05),
prior.sigma = c(Sigma, 0.05))
##########################
# Spatial random field for model I4
w4.index <- inla.spde.make.index(
name = 'w',
n.spde = spde.barrier$f$n)
##########################
# And here is the stack.
N <- nrow(d4)
StackFit.Barrier <- inla.stack(
tag = "Fit",
data = list(wing2body = d4$wing2body),
A = list(1, 1, A),
effects = list(
Intercept = rep(1, N),
Xm = Xm[,-1], #Covariates without the intercept
w = w.index))
# And this is the model with the spatial random field:
I4 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year +
f(w, model = spde.barrier),
family = "gaussian",
data = inla.stack.data(StackFit.Barrier),
control.compute = list(dic = TRUE, waic = TRUE),
control.predictor = list(A = inla.stack.A(StackFit.Barrier)))
## Warning in inla.model.properties.generic(inla.trim.family(model), mm[names(mm) == : Model 'rgeneric' in section 'latent' is marked as 'experimental'; changes may appear at any time.
## Use this model with extra care!!! Further warnings are disabled.
# Covariate effects and spatial random field
Beta4 <- I4$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
print(Beta4, digits = 3)
## mean 0.025quant 0.975quant
## sex_b -0.003496 -0.005297 -0.00170
## pophost_b -0.719740 -0.726856 -0.71242
## month_of_year 0.000805 0.000258 0.00135
theta <- I4$summary.hyper[1, "mean"]
theta # 4344.969? What does that mean?
## [1] 4623.327
# Plot the SRF obtained by I4
wpm.4 <- I4$summary.random$w$mean
wsd.4 <- I4$summary.random$w$sd
PlotField2(field = wpm.4,
mesh = mesh.barrier,
xlim = range(mesh.barrier$loc[,1]),
ylim = range(mesh.barrier$loc[,2]),
MyTitle = "Posterior mean SRF")
plot(Water.UTM,
axes = TRUE,
add = TRUE,
border = "white")
points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM,
axes = TRUE,
add = TRUE) # border = "white")
# Note that the correlation does not cross into the strait.
# This is the SD:
PlotField2(field = wsd.4,
mesh = mesh.barrier,
xlim = range(mesh.barrier$loc[,1]),
ylim = range(mesh.barrier$loc[,2]),
MyTitle = "Posterior mean SD")
plot(Water.UTM,
axes = TRUE,
add = TRUE,
border = "white")
plot(Keys.UTM,
axes = TRUE,
add = TRUE) # border = "white")
points(Loc[,1], Loc[,2], pch=17)
# This is how you get them for a barrier model:
# It is a little bit of fancy code.
HP <- I4$marginals.hyperpar
TakeExp <- function(x){exp(x)}
Sigma4.u <- inla.emarginal(TakeExp, HP$`Theta1 for w`)
Range4 <- inla.emarginal(TakeExp, HP$`Theta2 for w`)
paste('Sigma.u = ', round(Sigma4.u, digits = 3))
## [1] "Sigma.u = 0.002"
paste('Range = ', round(Range4 / 1000, digits = 3), "km")
## [1] "Range = 20.012 km"
# Show the extend of the spatial correlation
# Get the inverse of the covariance matrix of the w.
# Note that we are using a different function as before.
Q <- inla.rgeneric.q(spde.barrier,
"Q",
theta = c(log(Sigma4.u ), log(Range4)))
Corr <- local.find.correlation(Q,
loc = c(Loc[157,1], Loc[157,2]),
mesh.barrier)
## [1] "The location used was c( 1174723.0528 , 12810472.1565 )"
# And the rest is a matter of plotting the
# correlation values, contour of the coast and the
# sampling locations.
PlotField2(Corr,
mesh.barrier,
ContourMap = Keys.UTM,
zlim = c(0.2, 1),
Add = FALSE)
#It will find the closest point on the mesh:
points(x = 1174723.0528, y = 12810472.1565, col = 2, pch = 16, cex = 2)
plot(Keys.UTM,
axes = TRUE,
add = TRUE)
points(x = Loc[,1],
y = Loc[,2],
cex = 0.5,
col = "black",
pch = 16)
# This is not working...
##########################################
# And the rest is a matter of plotting the
# correlation values, contour of the coast and the
# sampling locations.
PlotField2(Corr,
mesh.barrier,
ContourMap = Keys.UTM,
zlim = c(0.2, 1),
Add = FALSE)
#It will find the closest point on the mesh:
points(x = 1174723.0528, y = 12810472.1565, col = 2, pch = 16, cex = 2)
plot(Keys.UTM,
axes = TRUE,
add = TRUE)
points(x = Loc[,1],
y = Loc[,2],
cex = 0.5,
col = "black",
pch = 16)
# This is not working...
##########################################
Is a barrier really needed? Even without:
spde.barrier <- inla.spde2.pcmatern(mesh.barrier,
prior.range = c(4 * 1000, 0.05),
prior.sigma = c(1, 0.05)) # can make this 1
# The size of the mesh is:
mesh.barrier$n
## [1] 2306
# This number is also in
spde.barrier$n.spde
## [1] 2306
# It is also the number of w_k values that we will get.
w.index <- inla.spde.make.index('w', n.spde = spde$n.spde)
#####################################
# Step 5. Make a stack.
# Make the X matrix
Xm <- model.matrix(~ sex_b + pophost_b + month_of_year, data = d4)
Xm <- as.data.frame(Xm)
colnames(Xm)
## [1] "(Intercept)" "sex_b" "pophost_b" "month_of_year"
# And here is the stack.
N <- nrow(d4)
StackFit.Barrier <- inla.stack(
tag = "Fit",
data = list(wing2body = d4$wing2body),
A = list(1, 1, A),
effects = list(
Intercept = rep(1, N),
Xm = Xm[,-1], #Covariates without the intercept
w = w.index))
#####################################################
# First we run the model without spatial dependency.
I3 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year,
family = "gaussian",
data = inla.stack.data(StackFit.Barrier),
control.compute = list(dic = TRUE, waic = TRUE),
control.predictor = list(A = inla.stack.A(StackFit.Barrier)))
# And this is the model with the spatial random field:
I4 <- inla(wing2body ~ -1 + sex_b + pophost_b + month_of_year +
f(w, model = spde.barrier),
family = "gaussian",
data = inla.stack.data(StackFit.Barrier),
control.compute = list(dic = TRUE, waic = TRUE),
control.predictor = list(A = inla.stack.A(StackFit.Barrier)))
# And compare I1 and I2 with DICs and WAICs
dic <- c(I1$dic$dic, I2$dic$dic)
waic <- c(I1$waic$waic, I2$waic$waic)
Z.out <- cbind(dic, waic)
rownames(Z.out) <- c("I1: Gaussian GLM",
"I2: Gaussian GLM + SRF")
Z.out
## dic waic
## I1: Gaussian GLM -4030.459 -4029.007
## I2: Gaussian GLM + SRF -4031.466 -4030.490
# Adding spatial correlation doesn't improve the model...
# Plot spatial random fields obtained with the ordinary mesh
wpm.I4 <- I4$summary.random$w$mean
wsd.I4 <- I4$summary.random$w$sd
rx = range(mesh.barrier$loc[,1])
ry = c(range(mesh.barrier$loc[,2])[1] - 2000, range(mesh.barrier$loc[,2])[2] + 7000)
colnames(Loc) = c("lon", "lat")
PlotField2(field = wpm.I4,
mesh = mesh.barrier,
xlim = rx,
ylim = range(mesh.barrier$loc[,2]),
MyTitle = "Posterior mean SRF") # sample regression function (SRF)
points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM,
axes = TRUE,
add = TRUE) # border = "white")
PlotField2(field = wsd.I4,
mesh = mesh.barrier,
xlim = rx,
ylim = range(mesh.barrier$loc[,2]),
MyTitle = "Posterior mean SD")
points(Loc[,1], Loc[,2], pch=17)
plot(Keys.UTM,
axes = TRUE,
add = TRUE) # border = "white")