library(automap)
library(gstat)
library(nlme)
library(spdep)
library(ncf)
library(tidyverse)
library(spatialreg)

Data

birds <- read.csv("data/birdDiv.csv")
head(birds)
##    birdDiv plantDiv     UTME    UTMN
## 1 7.181452     4.12 458393.2 3837109
## 2 7.540323     6.12 552279.8 3884222
## 3 4.891129     4.10 480864.9 3874203
## 4 4.149194     4.80 508001.6 3853486
## 5 5.899194     3.80 487815.7 3864575
## 6 4.717742     3.70 471130.8 3841434
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)
birds_sf
## Simple feature collection with 64 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
##     birdDiv plantDiv                 geometry
## 1  7.181452     4.12 POINT (458393.2 3837109)
## 2  7.540323     6.12 POINT (552279.8 3884222)
## 3  4.891129     4.10 POINT (480864.9 3874203)
## 4  4.149194     4.80 POINT (508001.6 3853486)
## 5  5.899194     3.80 POINT (487815.7 3864575)
## 6  4.717742     3.70 POINT (471130.8 3841434)
## 7  3.157258     4.07 POINT (451012.8 3837023)
## 8  4.052885     5.00 POINT (466039.6 3840762)
## 9  7.266129     4.20 POINT (522672.2 3878709)
## 10 6.532258     4.90 POINT (490232.3 3864731)
ggplot() +
  geom_sf(data = birds_sf, aes(col = birdDiv, size = birdDiv, alpha = 0.95)) +
  scale_color_continuous(type = "viridis") +
  guides(size = "none", alpha = "none")

ggplot() +
  geom_sf(data = birds_sf, aes(col = plantDiv, size = plantDiv, alpha = 0.95)) +
  scale_color_continuous(type = "viridis") +
  guides(size = "none", alpha = "none")

Analysis

Naive Model

# fit model assuming no autocorrelation in residuals with gls()
glsNaive <- gls(birdDiv ~ plantDiv, birds)
summary(glsNaive)
## Generalized least squares fit by REML
##   Model: birdDiv ~ plantDiv 
##   Data: birds 
##        AIC      BIC    logLik
##   267.5524 273.9338 -130.7762
## 
## Coefficients:
##                Value Std.Error  t-value p-value
## (Intercept) 1.090378 1.2076960 0.902858  0.3701
## plantDiv    1.136234 0.2086721 5.445067  0.0000
## 
##  Correlation: 
##          (Intr)
## plantDiv -0.981
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.5553997 -0.5916143 -0.0140065  0.5449248  3.3889099 
## 
## Residual standard error: 1.86175 
## Degrees of freedom: 64 total; 62 residual

The initial model of bird diversity as a function of plant diversity gives a positive slope that is significantly different from zero (slope = 1.136, t = 5.445, p < 0.001) and has an error of 0.2087. However, if there is autocorrelation in the residuals, the OLS linear model may not be efficiently predicting values (and overestimating the t-value for plantDiv).

Visualize Residuals

# add model residuals to dataframe
birds$glsNaiveResids <- residuals(glsNaive, type = "normalized")
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)
birds_sf
## Simple feature collection with 64 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
##     birdDiv plantDiv glsNaiveResids                 geometry
## 1  7.181452     4.12      0.7572399 POINT (458393.2 3837109)
## 2  7.540323     6.12     -0.2706083 POINT (552279.8 3884222)
## 3  4.891129     4.10     -0.4607532 POINT (480864.9 3874203)
## 4  4.149194     4.80     -1.2864810 POINT (508001.6 3853486)
## 5  5.899194     3.80      0.2637991 POINT (487815.7 3864575)
## 6  4.717742     3.70     -0.3097627 POINT (471130.8 3841434)
## 7  3.157258     4.07     -1.3737567 POINT (451012.8 3837023)
## 8  4.052885     5.00     -1.4602722 POINT (466039.6 3840762)
## 9  7.266129     4.20      0.7538981 POINT (522672.2 3878709)
## 10 6.532258     4.90     -0.0674983 POINT (490232.3 3864731)
ggplot() +
  geom_sf(data = birds_sf, aes(col = glsNaiveResids, size = glsNaiveResids,
                               alpha = 0.95)) +
  scale_color_continuous(type = "viridis") +
  guides(size = "none", alpha = "none")

Residuals from the model look very strongly autocorrelated, with low error values clumped to the southwest and higher error clumped in the northeast.

Correlogram

# test for spatial autocorrelation using a correlogram
residsI <- spline.correlog(x = birds$UTME, y = birds$UTMN,
                           z = birds$glsNaiveResids, 
                           resamp = 50, quiet = TRUE)
# save coordinate points as matrix for dist()
points <- cbind(birds$UTME, birds$UTMN)

# plot Moran's I for residuals
plot(residsI, xlim = c(0, max(dist(points))/3))

The correlogram of Moran’s I shows that there is strong autocorrelation in the residuals at distances below ~ 5000m. To account for this, I will use a variogram model of the residuals to better inform the bird diversity model.

Variogram Fitting

# using autofitVariogram() to produce experimental and fitted variogram
## tests Gaussian, Spherical, and Exponential fits and produces the best
birds_fit <- autofitVariogram(glsNaiveResids~1, input_data = birds_sf, 
                      model = c("Gau", "Sph", "Exp"))
plot(birds_fit)

The best-fit variogram model uses a Gaussian framework (nugget = 0.05, sill = 0.83, range = 2398). This will be used as a correlation structure to find the variance-covariance matrix for the updated model.

Cross Validation

# observational variogram
birds_var <- variogram(glsNaiveResids~1, birds_sf)
# Variogram model
birds_naivemod <- vgm(psill = 0.83, model = "Gau", range = 2398, nugget = 0.05)
# fit variogram model
birds_fit <- fit.variogram(object = birds_var, model = birds_naivemod)
## Warning in fit.variogram(object = birds_var, model = birds_naivemod): No
## convergence after 200 iterations: try different initial values?
# k-fold cross validation
## I believe the krige.cv() function should be appropriate here
set.seed(42)
n <- 10
birds_kfold_sf <- krige.cv(formula = glsNaiveResids ~ 1,
                               locations = birds_sf,
                               model = birds_fit,
                               nfold = n,
                               verbose = FALSE)
birds_kfold_sf
## Simple feature collection with 64 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 450985.4 ymin: 3831865 xmax: 668641.7 ymax: 3962277
## Projected CRS: WGS 84 / UTM zone 10N
## First 10 features:
##       var1.pred  var1.var   observed   residual     zscore fold
## 1  -0.034715933 0.8987798  0.7572399  0.7919559  0.8353613    1
## 2   0.007277309 0.8990555 -0.2706083 -0.2778856 -0.2930710    5
## 3  -0.034614615 0.8987826 -0.4607532 -0.4261386 -0.4494936    1
## 4   0.018166106 0.8983228 -1.2864810 -1.3046471 -1.3765020    9
## 5  -0.758413653 0.2182314  0.2637991  1.0222128  2.1881785   10
## 6   0.099580345 0.8993297 -0.3097627 -0.4093431 -0.4316463    4
## 7  -0.059462299 0.8978699 -1.3737567 -1.3142944 -1.3870303    2
## 8  -0.034678043 0.8984436 -1.4602722 -1.4255942 -1.5040092   10
## 9  -0.033443851 0.8986769  0.7538981  0.7873419  0.8305420    1
## 10 -0.967913489 0.5912608 -0.0674983  0.9004152  1.1709902    8
##                    geometry
## 1  POINT (458393.2 3837109)
## 2  POINT (552279.8 3884222)
## 3  POINT (480864.9 3874203)
## 4  POINT (508001.6 3853486)
## 5  POINT (487815.7 3864575)
## 6  POINT (471130.8 3841434)
## 7  POINT (451012.8 3837023)
## 8  POINT (466039.6 3840762)
## 9  POINT (522672.2 3878709)
## 10 POINT (490232.3 3864731)
birds_kfold <- data.frame(birds_kfold_sf$observed, birds_kfold_sf$var1.pred, birds_kfold_sf$fold)
colnames(birds_kfold) <- c("observed", "var1.pred", "fold")
head(birds_kfold)
##     observed    var1.pred fold
## 1  0.7572399 -0.034715933    1
## 2 -0.2706083  0.007277309    5
## 3 -0.4607532 -0.034614615    1
## 4 -1.2864810  0.018166106    9
## 5  0.2637991 -0.758413653   10
## 6 -0.3097627  0.099580345    4
rsq <- 1:n
rmse <- 1:n
fold <- 1:n

mod_fit <- data.frame(rsq, rmse, fold)

for (i in 1:n) {
  birds_kfold_sub <- subset(birds_kfold, fold == i)
  
  mod_fit$rsq[i] <- cor(birds_kfold_sub$observed, birds_kfold_sub$var1.pred)^2
  
  mod_fit$rmse[i] <- sqrt(mean((birds_kfold_sub$observed - birds_kfold_sub$var1.pred)^2))
}
mod_fit
##           rsq      rmse fold
## 1  0.57432365 1.0675178    1
## 2  0.14930269 1.2808064    2
## 3  0.56453154 0.7602949    3
## 4  0.45845835 0.9681768    4
## 5  0.17750979 0.6802339    5
## 6  0.85658623 0.4926150    6
## 7  0.02763525 0.5504683    7
## 8  0.04576460 1.3513138    8
## 9  0.20551161 0.8424176    9
## 10 0.02192716 0.8596451   10
mean(mod_fit$rsq)
## [1] 0.3081551
mean(mod_fit$rmse)
## [1] 0.885349

The variogram model is not a very good fit of the residuals (\(R^2\) = 0.336, RMSE = 0.919).

Updated Model

csSpatial <- corSpatial(form=~UTME+UTMN, nugget = TRUE, type = "gaussian")
glsUpdated <- update(glsNaive, correlation = csSpatial)
summary(glsUpdated)
## Generalized least squares fit by REML
##   Model: birdDiv ~ plantDiv 
##   Data: birds 
##        AIC     BIC    logLik
##   219.2873 229.923 -104.6437
## 
## Correlation Structure: Gaussian spatial correlation
##  Formula: ~UTME + UTMN 
##  Parameter estimate(s):
##        range       nugget 
## 1.204259e+05 1.313037e-01 
## 
## Coefficients:
##                Value Std.Error   t-value p-value
## (Intercept) 7.023390 2.2246321 3.1571017  0.0025
## plantDiv    0.121523 0.1612945 0.7534264  0.4540
## 
##  Correlation: 
##          (Intr)
## plantDiv -0.424
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.41387032 -0.63569540 -0.02778106  0.43501488  1.72272767 
## 
## Residual standard error: 3.084252 
## Degrees of freedom: 64 total; 62 residual

The updated regression model shows a different result to the naive model: a non-significant slope in plant diversity predicting bird diversity.

# add the residuals to the spatial dat object
birds$glsResids <- residuals(glsUpdated,type="normalized")
birds_sf <- st_as_sf(birds, coords = c("UTME", "UTMN"), crs = 32610)

# Map the residuals from gls
ggplot() +
  geom_sf(data = birds_sf, aes(col = glsResids, size = glsResids,
                               alpha = 0.95)) +
  scale_color_continuous(type = "viridis") +
  guides(size = "none", alpha = "none")

The residuals of the updated model look significantly less autocorrelated than the naive model’s.

# test for spatial autocorrelation using a correlogram
residsI <- spline.correlog(x = birds$UTME, y = birds$UTMN,
                           z = birds$glsResids, 
                           resamp = 50, quiet = TRUE)
# save coordinate points as matrix for dist()
points <- cbind(birds$UTME, birds$UTMN)

# plot Moran's I for residuals
plot(residsI, xlim = c(0, max(dist(points))/3))

Moran’s I of the residuals shows that there is no significant spatial autocorrelation among them.

Interpretation.

Not accounting for spatial autocorrelation, plant diversity appears to be a moderate predictor of bird diversity. However, when we adjust for autocorrelation, the slope is no longer significant. Other factors contributing to the spatial autocorrelation between plant and bird diversity - possibly climate/weather, terrain/topography, or other environmental variables not included in the data - are likely responsible for this, as bird diversity alone is not a significant predictor of plant diversity.