Modeling

Load packages

Code

# for path management
library(here)
# for raster data
library(raster)
# for data manipulation
library(tidyverse)
# for spatial operations
library(sf)
# for basemaps
library(rnaturalearth)
# for assessing SAC
library(spdep)
# for bayesian linear model
library(rstanarm)
# for spatial glmm
library(glmmfields)
# for model selection (projective predictive inference)
library(projpred)
# for spatial CV
library(spatialsample)
# for easy exploratory plots
library(bayesplot)
# for easy working with posteriors
library(tidybayes)
# for modeling workflow
library(tidymodels)
# for stitching together model output plots
library(patchwork)
# for spatial t-tests of differences in GDE/GDM across the global freezeline
library(SpatialPack) 



source(here("R", "modeling_fns.R"))

Read in and process raw data

Read raw data

Code

# reading in the raw pairwise distances, just in case I need that info later
# I already created per-cell summaries, which I'm reading in below
raw_pi <- read_csv(here("output", "spreadsheets", "cell_medium_3_10_pi.csv"))

# only keeping relevant columns
raw_df <- read_csv(here("output", "spreadsheets", "cell_medium_3_10_sumstats.csv")) %>% 
  select(cell, gdm = sqrt_pi, gde = hill_1, num_ind, num_otu, num_order)

Filter out bad cells

These cells contain wonky environmental data that impacts modeling. The wonky environmental data is likely due to their coastal location, resulting in faulty projection of the environmental models.

HOLDING OFF ON THIS FOR NOW

Code

filter_df <- raw_df

Add in spatial and environmental metadata

This includes converting the data frames to sf objects to make spatial operations and plotting easier, mapping cells to their continents, and extracting relevant environmental data.

Code

# setting the crs for maps and environmental data
# behrmann projection
crs_behr <- "+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +datum=WGS84 +ellps=WGS84 +units=m +no_defs"

# for assigning cells to continents. Islands are missed at coarser resolutions
world_large <- rnaturalearth::ne_countries(scale = "large", returnclass = "sf") %>%
  select(continent, name_long) %>% 
  st_transform(crs = crs_behr) 

# for mapping. this will be smaller so plotting is faster
world_small <- rnaturalearth::ne_countries(scale = "small", returnclass = "sf") %>%
  select(continent, name_long) %>% 
  st_transform(crs = crs_behr) 

# all predictors
predictors_all <- c("current_medium_bio_13", 
                    "current_medium_bio_14", 
                    "current_medium_bio_15", 
                    "current_medium_bio_2", 
                    "current_medium_bio_5", 
                    "ghh_medium_std_dev", 
                    "human_medium_gHM", 
                    "GlobalExtreme_tsTrendExt", 
                    "GlobalExtreme_tsVarExt", 
                    "GlobalExtreme_prTrendExt", 
                    "GlobalExtreme_prVarExt")

# Read in predictor rasters
rast_list <- list.files(here("data", "climate_agg"),
                        pattern = "medium",
                        full.names = TRUE) 


rast_list_pred <- rast_list[str_detect(rast_list,  paste(predictors_all, collapse = "|"))]


rasters_full <- raster::stack(rast_list_pred)

crs(rasters_full) <- crs_behr

# convert to data frame 
predictors_sf <- rasters_full %>%
  rasterToPolygons() %>%
  st_as_sf() 

predictors_sf <- predictors_sf %>% 
  mutate(cell = cellFromXY(rasters_full, st_coordinates(st_centroid(predictors_sf)))) %>% 
  rename_with(~str_remove_all(.x, "current_medium_"), contains("current_medium")) %>% rename_with(~str_remove_all(.x, "medium_"), contains("_medium_"))

# read in stability 
stability_sf <- read_sf(here("data", "climate_poly", "new_stability.geojson"), crs = crs_behr) %>% 
  rename(precip_trend = GlobalExtreme_prTrendExt,
         precip_var = GlobalExtreme_prVarExt,
         temp_trend = GlobalExtreme_tsTrendExt,
         temp_var = GlobalExtreme_tsVarExt)

# for freezeline delineation
div_sf <- st_read(here("data", "climate_poly", "min_temp_binary.geojson")) %>% 
  st_transform(crs = crs_behr)

Reading layer `min_temp_binary' from data source 
  `/Users/connorfrench/Dropbox/Old_Mac/School_Stuff/CUNY/BigAss-bird-phylogeography/BigAss-phylogeography/data/climate_poly/min_temp_binary.geojson' 
  using driver `GeoJSON'
Simple feature collection with 2 features and 1 field
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180.0001 ymin: -88.29181 xmax: 179.6665 ymax: 83.66653
Geodetic CRS:  WGS 84

Code

# hack to validate the multipolygon
div_sf_buf <- st_buffer(div_sf, dist = 0)

Code

# add predictors and continents 
full_sf <- left_join(predictors_sf, filter_df, by = "cell") %>% 
  st_join(stability_sf, left = TRUE, largest = TRUE) %>% 
  st_join(world_large, largest = TRUE) %>% 
  st_join(div_sf_buf, largest = TRUE) %>% 
  mutate(freezeline = if_else(min_temp == "temperate", 1L, 0L))
  
ggplot() +
  geom_sf(data = world_small) +
  geom_sf(data = full_sf, aes(geometry = geometry, fill = gde, color = gde)) +
  scale_fill_viridis_c(na.value = "transparent") +
  scale_color_viridis_c(na.value = "transparent") +
  theme_bw()

Code

write_sf(full_sf, here("output", "spreadsheets", "full_sf.geojson"))

Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
that

Create data sets with minimum OTU thresholds

10, 25, 50, 100, 150, 200

Code

min_otu_vals <- c(10, 25, 50, 100, 150, 200)

splits_sf <- map(min_otu_vals, ~filter(full_sf, num_otu >= .x, 
                                       # there's a single missing value
                                       !is.na(ghh_std_dev)))

# there is a single cell in South America that doesn't map to the continent across all data sets
splits_sf <- map(splits_sf, ~mutate(.x, continent = replace_na(continent, "South America")))

# name the data frames
names(splits_sf) <- paste0("min_otu_", min_otu_vals)

Split into testing and training data

Stratify the split according to continent to ensure somewhat even spatial representation of the test vs training data. I’m doing a 75% train to 25% test split. The test data will not be touched until the end- this is not a train-validate split.

Code

set.seed(9999)
initial_split_sf <- map(splits_sf, ~initial_split(data = .x, prop = 3/4, strata = continent))

train_sf <- map(initial_split_sf, training)

test_sf <- map(initial_split_sf, testing)

GLMMFields

Projection predictive variable selection

Need to scale the data before modeling (mean 0, sd 1). When predicting to test data, I will apply the centering and scaling factors of the training data to the test data to prevent data leakage.

Code

train_scaled <- suppressWarnings(map(train_sf, scale_data))

Fit full models (all 11 predictors) as the reference models for projpred selection.

Code

options(mc.cores = parallel::detectCores())
set.seed(1977)
full_mod_gde <- map(train_scaled, ~run_stan(response = "gde", df = .x))

set.seed(137777)
full_mod_gdm <- map(train_scaled, ~run_stan(response = "gdm", df = .x))

Code

write_rds(full_mod_gde, here("output", "models", "full_mod_gde.rds"))

write_rds(full_mod_gdm, here("output", "models", "full_mod_gdm.rds"))

Do projection predictive selection. After visually inspecting the performance statistics plots (ELPD and RMSE; example plot below), I determined that the model size suggested by projpred::suggest_size() was the most reasonable model size for all OTU filtering schemes.

Code

projpred_gde <- map(full_mod_gde, run_projpred)

projpred_gdm <- map(full_mod_gdm, run_projpred)


plot(projpred_gde$min_otu_100, stats = c("elpd", "rmse"), deltas = TRUE)
plot(projpred_gdm$min_otu_100, stats = c("elpd", "rmse"), deltas = TRUE)

Code

write_rds(projpred_gde, here("output", "models", "projpred_gde.rds"))
write_rds(projpred_gdm, here("output", "models", "projpred_gdm.rds"))

GLMM models

Run the models.

Code

options(mc.cores = parallel::detectCores())

set.seed(895)
glmm_gde <- map2(train_scaled, projpred_gde, ~run_glmmfields(response = "gde", df = .x, pp = .y))

set.seed(8985)
glmm_gdm <- map2(train_scaled, projpred_gdm, ~run_glmmfields(response = "gdm", df = .x, pp = .y, nknots = 30))

Code

write_rds(glmm_gde, here("output", "models", "glmm_gde.rds"))
write_rds(glmm_gdm, here("output", "models", "glmm_gdm.rds"))

Compare models

Comparing the top models selected from the two model selection approaches. I’m going to compare the variables that are selected, the levels of residual spatial autocorrelation (SAC), and the spatial distribution of error in the training set.

Residual SAC

The GLMM properly accounts for SAC in both data sets!

Code

# predictions
glmm_pred_gde <- map(glmm_gde, predict)
glmm_pred_gdm <- map(glmm_gdm, predict)

# map to sf data frames for spatial weights matrices and doing moran.mc easily
glmm_pred_sf_gde <- map2(train_sf, glmm_pred_gde, ~mutate(.x, .pred = pull(.y, 1), .resid = gde - .pred))
glmm_pred_sf_gdm <- map2(train_sf, glmm_pred_gdm, ~mutate(.x, .pred = pull(.y, 1), .resid = gdm - .pred))

# create a neighborhood matrix (queen = TRUE means all neighbors, including diagonals, will be included) and spatial weights matrix
glmm_w <- map(train_sf, ~poly2nb(.x, queen = TRUE)) %>% 
  map(~nb2listw(.x, style = "W", zero.policy = TRUE))


glmm_moran_gde <- map2(glmm_pred_sf_gde, glmm_w, ~moran.mc(.x$.resid, .y, nsim = 10000, zero.policy = TRUE))
glmm_moran_gdm <- map2(glmm_pred_sf_gdm, glmm_w, ~moran.mc(.x$.resid, .y, nsim = 10000, zero.policy = TRUE))

# check moran p-values
map(glmm_moran_gde, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Moran's I p-values (GDE)")

Moran’s I p-values (GDE)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.2127787	0.1192881	0.1878812	0.7075292	0.4193581	0.5543446

Code

map(glmm_moran_gdm, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Moran's I p-values (GDM)")

Moran’s I p-values (GDM)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.3030697	0.1594841	0.1461854	0.7384262	0.4887511	0.2788721

Posterior plots

Generating a bunch of diagnostic plots to compare how the models perform on training data.

Get posteriors

Code

posts_gde <- map(glmm_gde, ~glmmfields::posterior_predict(.x, iter = 1000))
posts_gdm <- map(glmm_gdm, ~glmmfields::posterior_predict(.x, iter = 1000))

resp_gde <- map(train_sf, ~pull(.x, gde))
resp_gdm <- map(train_sf, ~pull(.x, gdm))

Posterior-predictive check

The posteriors for all are similar, but the distribution of GDE gets narrower as the minimum OTU filter increases.

Code

plot_ppc_dens <- function(resp, post, xlims = c(0, 1)) {
  
  ppc_dens_overlay(resp, post[1:50,]) +
    lims(x = xlims)
}

ppc_gde <-
  map2(
    resp_gde,
    posts_gde,
    ~ plot_ppc_dens(.x, .y)
  )

ppc_gdm <-
  map2(
    resp_gdm,
    posts_gdm,
    ~ plot_ppc_dens(.x, .y, xlims = c(0, 0.1))
  )

PPC GDE
PPC GDM

Code

ppc_gde[["min_otu_10"]] + labs(title = "min_otu_10") +
  ppc_gde[["min_otu_25"]] + labs(title = "min_otu_25") +
  ppc_gde[["min_otu_50"]] + labs(title = "min_otu_50") +
  ppc_gde[["min_otu_100"]] + labs(title = "min_otu_100") +
  ppc_gde[["min_otu_150"]] + labs(title = "min_otu_150") +
  ppc_gde[["min_otu_200"]] + labs(title = "min_otu_200") +
  patchwork::plot_layout(ncol = 2)

Code

ppc_gdm[["min_otu_10"]] + labs(title = "min_otu_10") +
  ppc_gdm[["min_otu_25"]] + labs(title = "min_otu_25") +
  ppc_gdm[["min_otu_50"]] + labs(title = "min_otu_50") +
  ppc_gdm[["min_otu_100"]] + labs(title = "min_otu_100") +
  ppc_gdm[["min_otu_150"]] + labs(title = "min_otu_150") +
  ppc_gdm[["min_otu_200"]] + labs(title = "min_otu_200") +
  patchwork::plot_layout(ncol = 2)

Warning: Removed 1 rows containing non-finite values (stat_density).

Coefficient posteriors

For GDE:
BIO5 (max temp of the warmest month) shows up in all models, followed by BIO13 (prec of wettest month, 4/6), temp_var (4/6), temp_trend (4/6), BIO2 (diurnal temp range, 3/6)

For GDM:

Code

# only include post-warmup draws
beta_posts_gde <- map(glmm_gde, get_beta_posts) 
beta_posts_gdm <- map(glmm_gdm, get_beta_posts) 

beta_plots_gde <- map(beta_posts_gde, ~mcmc_intervals(.x, 
           prob = 0.70,
           prob_outer = 0.95) +
  geom_vline(xintercept = 0) +
  ggtitle("70% and 95% CI intervals"))  

beta_plots_gdm <- map(beta_posts_gdm, ~mcmc_intervals(.x, 
           prob = 0.70,
           prob_outer = 0.95) +
  geom_vline(xintercept = 0) +
  ggtitle("70% and 95% CI intervals"))

Beta GDE
Beta GDM

Code

beta_plots_gde[["min_otu_10"]] + labs(title = "min_otu_10") +
  beta_plots_gde[["min_otu_25"]] + labs(title = "min_otu_25") +
  beta_plots_gde[["min_otu_50"]] + labs(title = "min_otu_50") +
  beta_plots_gde[["min_otu_100"]] + labs(title = "min_otu_100") +
  beta_plots_gde[["min_otu_150"]] + labs(title = "min_otu_150") +
  beta_plots_gde[["min_otu_200"]] + labs(title = "min_otu_200") +
  patchwork::plot_layout(ncol = 2)

Code

beta_plots_gdm[["min_otu_10"]] + labs(title = "min_otu_10") +
  beta_plots_gdm[["min_otu_25"]] + labs(title = "min_otu_25") +
  beta_plots_gdm[["min_otu_50"]] + labs(title = "min_otu_50") +
  beta_plots_gdm[["min_otu_100"]] + labs(title = "min_otu_100") +
  beta_plots_gdm[["min_otu_150"]] + labs(title = "min_otu_150") +
  beta_plots_gdm[["min_otu_200"]] + labs(title = "min_otu_200") +
  patchwork::plot_layout(ncol = 2)

Warning: Removed 1 rows containing missing values (geom_vline).

Test predictions

Predicting the model to withheld test data.

Code

test_scaled <- suppressWarnings(map2(train_sf, test_sf, ~scale_test(.x, .y)))

test_pred_gde <- map2(glmm_gde, test_scaled, ~predict(.x, newdata = .y))
test_pred_gdm <- map2(glmm_gdm, test_scaled, ~predict(.x, newdata = .y))

test_sf_gde <-
  map2(
    test_sf,
    test_pred_gde,
    ~ mutate(
      .x,
      .pred = .y$estimate,
      .pred_ci = .y$conf_high - .y$conf_low,
      .resid = gde - .y$estimate
    )
  )

test_sf_gdm <-
  map2(
    test_sf,
    test_pred_gdm,
    ~ mutate(
      .x,
      .pred = .y$estimate,
      .pred_ci = .y$conf_high - .y$conf_low,
      .resid = gdm - .y$estimate
    )
  )

Obs-v-pred GDE
Obs-v-pred GDM

Code

obs_vs_pred_gde <- map(test_sf_gde, ~plot_obs_vs_pred(.x, resp = "gde"))

obs_vs_pred_gde[["min_otu_10"]] + labs(title = "min_otu_10") 
  obs_vs_pred_gde[["min_otu_25"]] + labs(title = "min_otu_25") 
  obs_vs_pred_gde[["min_otu_50"]] + labs(title = "min_otu_50") 
  obs_vs_pred_gde[["min_otu_100"]] + labs(title = "min_otu_100") 
  obs_vs_pred_gde[["min_otu_150"]] + labs(title = "min_otu_150") 
  obs_vs_pred_gde[["min_otu_200"]] + labs(title = "min_otu_200")

Code

obs_vs_pred_gdm <- map(test_sf_gdm, ~plot_obs_vs_pred(.x, resp = "gdm"))

obs_vs_pred_gdm[["min_otu_10"]] + labs(title = "min_otu_10") 
  obs_vs_pred_gdm[["min_otu_25"]] + labs(title = "min_otu_25") 
  obs_vs_pred_gdm[["min_otu_50"]] + labs(title = "min_otu_50") 
  obs_vs_pred_gdm[["min_otu_100"]] + labs(title = "min_otu_100") 
  obs_vs_pred_gdm[["min_otu_150"]] + labs(title = "min_otu_150") 
  obs_vs_pred_gdm[["min_otu_200"]] + labs(title = "min_otu_200")

Test maps

Code

test_pred_maps_gde <- map(test_sf_gde, ~map_test(.x, var = ".pred"))
test_pred_maps_gdm <- map(test_sf_gdm, ~map_test(.x, var = ".pred"))

Code

test_pred_maps_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

test_pred_maps_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

test_pred_maps_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

test_pred_maps_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

test_pred_maps_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

test_pred_maps_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

test_pred_maps_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

test_pred_maps_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

test_pred_maps_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

test_pred_maps_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

test_pred_maps_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

test_pred_maps_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Test residual maps

Maps of residual error across test data.

Code

test_resid_maps_gde <- map(test_sf_gde, ~map_test(.x, var = ".resid"))

test_resid_maps_gdm <- map(test_sf_gdm, ~map_test(.x, var = ".resid"))

Code

test_resid_maps_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

test_resid_maps_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

test_resid_maps_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

test_resid_maps_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

test_resid_maps_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

test_resid_maps_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

test_resid_maps_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

test_resid_maps_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

test_resid_maps_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

test_resid_maps_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

test_resid_maps_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

test_resid_maps_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Test 95% CI maps

Maps of the range in 95% CI of predicted values.

Code

test_ci_maps_gde <- map(test_sf_gde, ~map_test(.x, var = ".pred_ci"))
test_ci_maps_gdm <- map(test_sf_gdm, ~map_test(.x, var = ".pred_ci"))

Code

test_ci_maps_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

test_ci_maps_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

test_ci_maps_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

test_ci_maps_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

test_ci_maps_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

test_ci_maps_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

test_ci_maps_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

test_ci_maps_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

test_ci_maps_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

test_ci_maps_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

test_ci_maps_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

test_ci_maps_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Full maps

Predicting the models to the full data sets

Code

pred_vars <- c("bio_2", 
                 "bio_5", 
                 "bio_13", 
                 "bio_14", 
                 "bio_15", 
                 "ghh_std_dev", 
                 "human_gHM", 
                 "precip_trend", 
                 "precip_var",
                 "temp_trend",
                 "temp_var")


full_scaled <- suppressWarnings(map(train_sf, ~scale_test(.x, df_test = full_sf)))

full_scaled_noNA <- map(full_scaled, ~filter(.x, complete.cases(across(any_of(pred_vars)))))

full_preds_gde <- map2(glmm_gde, full_scaled_noNA, ~predict(.x, .y))
full_preds_gdm <- map2(glmm_gdm, full_scaled_noNA, ~predict(.x, .y))

full_sf_noNA <- filter(full_sf, complete.cases(across(any_of(pred_vars)))) 
  
full_preds_sf_gde <-
  map(full_preds_gde, ~ bind_cols(full_sf_noNA, .x)) %>%
  map( ~ mutate(
    .x,
    .pred = estimate,
    .pred_ci = conf_high - conf_low,
    .resid = gde - .pred,
    # there are some outlier localities that really skew the distribution of error
    .log_ci = log(.pred_ci)
  ))

full_preds_sf_gdm <-
  map(full_preds_gdm, ~ bind_cols(full_sf_noNA, .x)) %>%
  map( ~ mutate(
    .x,
    .pred = estimate,
    .pred_ci = conf_high - conf_low,
    .resid = gdm - .pred,
    # there are some outlier localities that really skew the distribution of error
    .log_ci = log(.pred_ci)
  ))

Write the predictions to file because they take forever to generate

Code

write_rds(full_preds_gde, here("output", "spreadsheets", "full_preds_gde.rds"))
write_rds(full_preds_gdm, here("output", "spreadsheets", "full_preds_gdm.rds"))

Full prediction maps

Make the prediction maps.

Code

full_pred_maps_gde <- map(full_preds_sf_gde, ~map_test(.x, var = ".pred"))
full_pred_maps_gdm <- map(full_preds_sf_gdm, ~map_test(.x, var = ".pred"))

Code

full_pred_maps_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

full_pred_maps_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

full_pred_maps_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

full_pred_maps_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

full_pred_maps_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

full_pred_maps_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

full_pred_maps_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

full_pred_maps_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

full_pred_maps_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

full_pred_maps_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

full_pred_maps_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

full_pred_maps_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Full 95% CI maps

Make the CI error maps.

Code

full_ci_maps_gde <- map(full_preds_sf_gde, ~map_test(.x, var = ".log_ci"))
full_ci_maps_gdm <- map(full_preds_sf_gdm, ~map_test(.x, var = ".log_ci"))

Code

full_ci_maps_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

full_ci_maps_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

full_ci_maps_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

full_ci_maps_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

full_ci_maps_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

full_ci_maps_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

full_ci_maps_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

full_ci_maps_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

full_ci_maps_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

full_ci_maps_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

full_ci_maps_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

full_ci_maps_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Latitude

I opted for a simple assessment of latitude ~ GD, by doing a pearson correlation that accounts for spatial autocorrelation

Code

coords_full <- map(splits_sf, ~st_coordinates(st_centroid(.x)))

sp_lat_gde <- map2(splits_sf, coords_full, ~modified.ttest(x = abs(.y[,2]), y = .x$gde, coords = .y))

sp_lat_gdm <- map2(splits_sf, coords_full, ~modified.ttest(x = abs(.y[,2]), y = .x$gdm, coords = .y))

map(sp_lat_gde, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Latitude p-values (GDE)")

map(sp_lat_gde, ~.x[["corr"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Latitude correlation coefficients (GDE)")

map(sp_lat_gdm, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Latitude p-values (GDM)")

map(sp_lat_gdm, ~.x[["corr"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Latitude correlation coefficients (GDM)")

Latitude p-values (GDE)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.1218372	0.0328494	0.0824729	0.0639672	0.0517265	0.0471147

Latitude correlation coefficients (GDE)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
-0.1335496	-0.2450824	-0.2498223	-0.279874	-0.3447724	-0.3555856

Latitude p-values (GDM)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.8285219	0.9184679	0.8408779	0.766976	0.9010953	0.9615755

Latitude correlation coefficients (GDM)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.0181651	0.0122095	0.0327533	0.0527753	-0.0228155	-0.0092474

Code

lat_df <- map(splits_sf, function(x) {
  c <- st_coordinates(st_centroid(x))
  df <- mutate(x, lat = c[,2], lon = c[,1])
  return(df)
})

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Warning in st_centroid.sf(x): st_centroid assumes attributes are constant over
geometries of x

Code

lat_plots_gde <- map(lat_df, ~ggplot(data = .x, aes(x = abs(lat), y = gde)) + 
                       geom_point() + 
                       geom_smooth(method = "lm") + 
                       theme_bw())

lat_plots_gdm <- map(lat_df, ~ggplot(data = .x, aes(x = abs(lat), y = gdm)) + 
                       geom_point() + 
                       geom_smooth(method = "lm") + 
                       theme_bw())

Code

lat_plots_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

`geom_smooth()` using formula 'y ~ x'

Code

lat_plots_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

`geom_smooth()` using formula 'y ~ x'

Code

  lat_plots_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

`geom_smooth()` using formula 'y ~ x'

Freezeline

GDE is significantly different across the global freezeline for all datasets, while GDM is not.

Code

coords_full <- map(splits_sf, ~st_coordinates(st_centroid(.x)))

sp_ttest_gde <- map2(splits_sf, coords_full, ~modified.ttest(x = .x$freezeline, y = .x$gde, coords = .y))

sp_ttest_gdm <- map2(splits_sf, coords_full, ~modified.ttest(x = .x$freezeline, y = .x$gdm, coords = .y))

map(sp_ttest_gde, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Freezeline p-values (GDE)")

map(sp_ttest_gde, ~.x[["corr"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Freezeline correlation coefficients (GDE)")

map(sp_ttest_gdm, ~.x[["p.value"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Freezeline p-values (GDM)")

map(sp_ttest_gdm, ~.x[["corr"]]) %>% 
  bind_rows() %>% 
  knitr::kable(caption = "Freezeline correlation coefficients (GDM)")

Freezeline p-values (GDE)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.0107783	0.0101762	0.017008	0.0130217	0.0296767	0.0408479

Freezeline correlation coefficients (GDE)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
-0.2155514	-0.2745981	-0.3154334	-0.3380986	-0.3650885	-0.3452126

Freezeline p-values (GDM)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
0.8864441	0.8079055	0.8545842	0.4842055	0.7181578	0.5342123

Freezeline correlation coefficients (GDM)
min_otu_10	min_otu_25	min_otu_50	min_otu_100	min_otu_150	min_otu_200
-0.0100113	0.0230526	0.0233863	0.0966559	0.0536256	0.0952099

Code

fl_boxplot_gde <- map(splits_sf, ~ggplot() + geom_boxplot(data = .x, aes(x = min_temp, y = gde)) + theme_bw())

fl_boxplot_gdm <- map(splits_sf, ~ggplot() + geom_boxplot(data = .x, aes(x = min_temp, y = gdm)) + theme_bw())

Code

#|: label: fl-boxplot-gde
#| layout-ncol: 2

fl_boxplot_gde[["min_otu_10"]] + labs(title = "GDE, min_otu_10")

Code

  fl_boxplot_gde[["min_otu_25"]] + labs(title = "GDE, min_otu_25")

Code

  fl_boxplot_gde[["min_otu_50"]] + labs(title = "GDE, min_otu_50")

Code

  fl_boxplot_gde[["min_otu_100"]] + labs(title = "GDE, min_otu_100")

Code

  fl_boxplot_gde[["min_otu_150"]] + labs(title = "GDE, min_otu_150")

Code

  fl_boxplot_gde[["min_otu_200"]] + labs(title = "GDE, min_otu_200")

Code

#|: label: fl-boxplot-gdm
#| layout-ncol: 2

fl_boxplot_gdm[["min_otu_10"]] + labs(title = "GDM, min_otu_10")

Code

  fl_boxplot_gdm[["min_otu_25"]] + labs(title = "GDM, min_otu_25")

Code

  fl_boxplot_gdm[["min_otu_50"]] + labs(title = "GDM, min_otu_50")

Code

  fl_boxplot_gdm[["min_otu_100"]] + labs(title = "GDM, min_otu_100")

Code

  fl_boxplot_gdm[["min_otu_150"]] + labs(title = "GDM, min_otu_150")

Code

  fl_boxplot_gdm[["min_otu_200"]] + labs(title = "GDM, min_otu_200")

Load packages

Read in and process raw data

Read raw data

Filter out bad cells

Add in spatial and environmental metadata

Create data sets with minimum OTU thresholds

Split into testing and training data

GLMMFields

Projection predictive variable selection

GLMM models

Compare models

Residual SAC

Posterior plots

Get posteriors

Posterior-predictive check

Coefficient posteriors

Test predictions

Test maps

Test residual maps

Test 95% CI maps

Full maps

Full prediction maps

Full 95% CI maps

Latitude

Freezeline

System information