library(automap)
library(gstat)
library(nlme)
library(spdep)
library(ncf)
library(tidyverse)
library(spatialreg)
birds <- read.csv("data/birdDiv.csv")
head(birds)
## birdDiv plantDiv UTME UTMN
## 1 7.181452 4.12 458393.2 3837109
## 2 7.540323 6.12 552279.8 3884222
## 3 4.891129 4.10 480864.9 3874203
## 4 4.149194 4.80 508001.6 3853486
## 5 5.899194 3.80 487815.7 3864575
## 6 4.717742 3.70 471130.8 3841434
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)
birds_sf
## Simple feature collection with 64 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
## birdDiv plantDiv geometry
## 1 7.181452 4.12 POINT (458393.2 3837109)
## 2 7.540323 6.12 POINT (552279.8 3884222)
## 3 4.891129 4.10 POINT (480864.9 3874203)
## 4 4.149194 4.80 POINT (508001.6 3853486)
## 5 5.899194 3.80 POINT (487815.7 3864575)
## 6 4.717742 3.70 POINT (471130.8 3841434)
## 7 3.157258 4.07 POINT (451012.8 3837023)
## 8 4.052885 5.00 POINT (466039.6 3840762)
## 9 7.266129 4.20 POINT (522672.2 3878709)
## 10 6.532258 4.90 POINT (490232.3 3864731)
ggplot() +
geom_sf(data = birds_sf, aes(col = birdDiv, size = birdDiv, alpha = 0.95)) +
scale_color_continuous(type = "viridis") +
guides(size = "none", alpha = "none")
ggplot() +
geom_sf(data = birds_sf, aes(col = plantDiv, size = plantDiv, alpha = 0.95)) +
scale_color_continuous(type = "viridis") +
guides(size = "none", alpha = "none")
# fit model assuming no autocorrelation in residuals with gls()
glsNaive <- gls(birdDiv ~ plantDiv, birds)
summary(glsNaive)
## Generalized least squares fit by REML
## Model: birdDiv ~ plantDiv
## Data: birds
## AIC BIC logLik
## 267.5524 273.9338 -130.7762
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.090378 1.2076960 0.902858 0.3701
## plantDiv 1.136234 0.2086721 5.445067 0.0000
##
## Correlation:
## (Intr)
## plantDiv -0.981
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.5553997 -0.5916143 -0.0140065 0.5449248 3.3889099
##
## Residual standard error: 1.86175
## Degrees of freedom: 64 total; 62 residual
The initial model of bird diversity as a function of plant diversity gives a positive slope that is significantly different from zero (slope = 1.136, t = 5.445, p < 0.001) and has an error of 0.2087. However, if there is autocorrelation in the residuals, the OLS linear model may not be efficiently predicting values (and overestimating the t-value for plantDiv).
# add model residuals to dataframe
birds$glsNaiveResids <- residuals(glsNaive, type = "normalized")
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)
birds_sf
## Simple feature collection with 64 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
## birdDiv plantDiv glsNaiveResids geometry
## 1 7.181452 4.12 0.7572399 POINT (458393.2 3837109)
## 2 7.540323 6.12 -0.2706083 POINT (552279.8 3884222)
## 3 4.891129 4.10 -0.4607532 POINT (480864.9 3874203)
## 4 4.149194 4.80 -1.2864810 POINT (508001.6 3853486)
## 5 5.899194 3.80 0.2637991 POINT (487815.7 3864575)
## 6 4.717742 3.70 -0.3097627 POINT (471130.8 3841434)
## 7 3.157258 4.07 -1.3737567 POINT (451012.8 3837023)
## 8 4.052885 5.00 -1.4602722 POINT (466039.6 3840762)
## 9 7.266129 4.20 0.7538981 POINT (522672.2 3878709)
## 10 6.532258 4.90 -0.0674983 POINT (490232.3 3864731)
ggplot() +
geom_sf(data = birds_sf, aes(col = glsNaiveResids, size = glsNaiveResids,
alpha = 0.95)) +
scale_color_continuous(type = "viridis") +
guides(size = "none", alpha = "none")
Residuals from the model look very strongly autocorrelated, with low error values clumped to the southwest and higher error clumped in the northeast.
# test for spatial autocorrelation using a correlogram
residsI <- spline.correlog(x = birds$UTME, y = birds$UTMN,
z = birds$glsNaiveResids,
resamp = 50, quiet = TRUE)
# save coordinate points as matrix for dist()
points <- cbind(birds$UTME, birds$UTMN)
# plot Moran's I for residuals
plot(residsI, xlim = c(0, max(dist(points))/3))
The correlogram of Moran’s I shows that there is strong autocorrelation in the residuals at distances below ~ 5000m. To account for this, I will use a variogram model of the residuals to better inform the bird diversity model.
# using autofitVariogram() to produce experimental and fitted variogram
## tests Gaussian, Spherical, and Exponential fits and produces the best
birds_fit <- autofitVariogram(glsNaiveResids~1, input_data = birds_sf,
model = c("Gau", "Sph", "Exp"))
plot(birds_fit)
The best-fit variogram model uses a Gaussian framework (nugget = 0.05, sill = 0.83, range = 2398). This will be used as a correlation structure to find the variance-covariance matrix for the updated model.
# observational variogram
birds_var <- variogram(glsNaiveResids~1, birds_sf)
# Variogram model
birds_naivemod <- vgm(psill = 0.83, model = "Gau", range = 2398, nugget = 0.05)
# fit variogram model
birds_fit <- fit.variogram(object = birds_var, model = birds_naivemod)
## Warning in fit.variogram(object = birds_var, model = birds_naivemod): No
## convergence after 200 iterations: try different initial values?
# k-fold cross validation
## I believe the krige.cv() function should be appropriate here
set.seed(42)
n <- 10
birds_kfold_sf <- krige.cv(formula = glsNaiveResids ~ 1,
locations = birds_sf,
model = birds_fit,
nfold = n,
verbose = FALSE)
birds_kfold_sf
## Simple feature collection with 64 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
## var1.pred var1.var observed residual zscore fold
## 1 -0.034715933 0.8987798 0.7572399 0.7919559 0.8353613 1
## 2 0.007277309 0.8990555 -0.2706083 -0.2778856 -0.2930710 5
## 3 -0.034614615 0.8987826 -0.4607532 -0.4261386 -0.4494936 1
## 4 0.018166106 0.8983228 -1.2864810 -1.3046471 -1.3765020 9
## 5 -0.758413653 0.2182314 0.2637991 1.0222128 2.1881785 10
## 6 0.099580345 0.8993297 -0.3097627 -0.4093431 -0.4316463 4
## 7 -0.059462299 0.8978699 -1.3737567 -1.3142944 -1.3870303 2
## 8 -0.034678043 0.8984436 -1.4602722 -1.4255942 -1.5040092 10
## 9 -0.033443851 0.8986769 0.7538981 0.7873419 0.8305420 1
## 10 -0.967913489 0.5912608 -0.0674983 0.9004152 1.1709902 8
## geometry
## 1 POINT (458393.2 3837109)
## 2 POINT (552279.8 3884222)
## 3 POINT (480864.9 3874203)
## 4 POINT (508001.6 3853486)
## 5 POINT (487815.7 3864575)
## 6 POINT (471130.8 3841434)
## 7 POINT (451012.8 3837023)
## 8 POINT (466039.6 3840762)
## 9 POINT (522672.2 3878709)
## 10 POINT (490232.3 3864731)
birds_kfold <- data.frame(birds_kfold_sf$observed, birds_kfold_sf$var1.pred, birds_kfold_sf$fold)
colnames(birds_kfold) <- c("observed", "var1.pred", "fold")
head(birds_kfold)
## observed var1.pred fold
## 1 0.7572399 -0.034715933 1
## 2 -0.2706083 0.007277309 5
## 3 -0.4607532 -0.034614615 1
## 4 -1.2864810 0.018166106 9
## 5 0.2637991 -0.758413653 10
## 6 -0.3097627 0.099580345 4
rsq <- 1:n
rmse <- 1:n
fold <- 1:n
mod_fit <- data.frame(rsq, rmse, fold)
for (i in 1:n) {
birds_kfold_sub <- subset(birds_kfold, fold == i)
mod_fit$rsq[i] <- cor(birds_kfold_sub$observed, birds_kfold_sub$var1.pred)^2
mod_fit$rmse[i] <- sqrt(mean((birds_kfold_sub$observed - birds_kfold_sub$var1.pred)^2))
}
mod_fit
## rsq rmse fold
## 1 0.57432365 1.0675178 1
## 2 0.14930269 1.2808064 2
## 3 0.56453154 0.7602949 3
## 4 0.45845835 0.9681768 4
## 5 0.17750979 0.6802339 5
## 6 0.85658623 0.4926150 6
## 7 0.02763525 0.5504683 7
## 8 0.04576460 1.3513138 8
## 9 0.20551161 0.8424176 9
## 10 0.02192716 0.8596451 10
mean(mod_fit$rsq)
## [1] 0.3081551
mean(mod_fit$rmse)
## [1] 0.885349
The variogram model is not a very good fit of the residuals (\(R^2\) = 0.336, RMSE = 0.919).
csSpatial <- corSpatial(form=~UTME+UTMN, nugget = TRUE, type = "gaussian")
glsUpdated <- update(glsNaive, correlation = csSpatial)
summary(glsUpdated)
## Generalized least squares fit by REML
## Model: birdDiv ~ plantDiv
## Data: birds
## AIC BIC logLik
## 219.2873 229.923 -104.6437
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~UTME + UTMN
## Parameter estimate(s):
## range nugget
## 1.204259e+05 1.313037e-01
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 7.023390 2.2246321 3.1571017 0.0025
## plantDiv 0.121523 0.1612945 0.7534264 0.4540
##
## Correlation:
## (Intr)
## plantDiv -0.424
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -1.41387032 -0.63569540 -0.02778106 0.43501488 1.72272767
##
## Residual standard error: 3.084252
## Degrees of freedom: 64 total; 62 residual
The updated regression model shows a different result to the naive model: a non-significant slope in plant diversity predicting bird diversity.
# add the residuals to the spatial dat object
birds$glsResids <- residuals(glsUpdated,type="normalized")
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)
# Map the residuals from gls
ggplot() +
geom_sf(data = birds_sf, aes(col = glsResids, size = glsResids,
alpha = 0.95)) +
scale_color_continuous(type = "viridis") +
guides(size = "none", alpha = "none")
The residuals of the updated model look significantly less autocorrelated than the naive model’s.
# test for spatial autocorrelation using a correlogram
residsI <- spline.correlog(x = birds$UTME, y = birds$UTMN,
z = birds$glsResids,
resamp = 50, quiet = TRUE)
# save coordinate points as matrix for dist()
points <- cbind(birds$UTME, birds$UTMN)
# plot Moran's I for residuals
plot(residsI, xlim = c(0, max(dist(points))/3))
Moran’s I of the residuals shows that there is no significant spatial autocorrelation among them.
Not accounting for spatial autocorrelation, plant diversity appears to be a moderate predictor of bird diversity. However, when we adjust for autocorrelation, the slope is no longer significant. Other factors contributing to the spatial autocorrelation between plant and bird diversity - possibly climate/weather, terrain/topography, or other environmental variables not included in the data - are likely responsible for this, as bird diversity alone is not a significant predictor of plant diversity.