# These are the three most common fluxes recorded in SRDB
dv_set <- c("sqrt_Rs_annual", "sqrt_Rh_annual", "sqrt_Rs_growingseason")
# The following SPEI values are from Table 1 of Keune et al. (2025)
# https://www.nature.com/articles/s41597-025-04896-y
# and correspond to "severely dry", "moderately dry", and "mildly dry"
# (there are no "extremely dry"=-2.33 values in the dataset)
spei_cutoffs <- c(-1.65, -1.28, -0.84)
# What SPEI window should be used for the cutoffs above?
spei_windows <- c(12, 24) # months
# Model independent variables
predictors <- c("Ecosystem_type",
"SPEI12",
"Tair", "Precip",
"Tsoil_lev1", "VWC_lev1",
"LAI_high", "LAI_low", "Soil_type_name")
# Linear model right hand side
lm_rhs_formula <- as.character(formula(m_Rs_PBE))[3]
test_lm_pbe <- function(x_train, x_val, formula) {
m <- lm(formula = formula, data = x_train)
pred_train <- predict(m, newdata = x_train)
# If data sizes are small (as happens with Rh_annual and spei_cutoff -1.5)
# there might be a mismatch in available data, causing prediction of
# validation data to fail
try(pred_val <- predict(m, newdata = x_val))
if(exists("pred_val")) {
validation_r2 <- r2(pred_val, pull(x_val[1]))
validation_rmse <- rmse(pred_val, pull(x_val[1]))
} else {
validation_r2 <- validation_rmse <- NA_real_
}
# Reduce the model and identify and significant PBE terms remaining
m_reduced <- MASS::stepAIC(m, trace = FALSE)
pbe_terms <- tidy(m_reduced) %>% filter(grepl("PBE", term))
n_pbe_terms <- nrow(pbe_terms)
n_signif_pbe_terms <- sum(pbe_terms$p.value < 0.05)
# Re-fit model with no PBE term
x_no_pbe <- x_train
x_no_pbe$PBE <- 1
m_no_pbe <- lm(formula = formula, data = x_no_pbe)
tibble(n_pbe_terms = n_pbe_terms,
n_signif_pbe_terms = n_signif_pbe_terms,
pbe_anova = anova_p_twomodels(m, m_no_pbe),
training_r2 = r2(pred_train, pull(x_train[1])),
training_rmse = rmse(pred_train, pull(x_train[1])),
validation_r2 = validation_r2,
validation_rmse = validation_rmse)
}
do_sensitivity_analysis <- function(x, dv_set, predictors, spei_windows, spei_cutoffs) {
rf_results <- list()
lm_results <- list()
SPEI_cols <- colnames(x)[grep("^SPEI", colnames(x))]
for(dv in dv_set) {
for(w in spei_windows) {
spei_past_col <- paste0("SPEI", w, "_y1")
for(spei_cut in spei_cutoffs) {
# Remove other potential independent variables
this_x <- x[c(dv, union(predictors, SPEI_cols))]
# Complete cases only
this_x <- this_x[complete.cases(this_x),]
message("dv = ", dv, " spei_cut = ",
spei_cut, " window = ", w, " n = ", nrow(this_x))
# Identify observations not currently in a drought but that
# WERE in a drought the previous year
# PBE = potential Birch effect
this_x$PBE = this_x$SPEI12 > 0 & this_x[spei_past_col] <= spei_cut
# Drop non-predictors entirely now
this_x <- this_x[c(dv, predictors, "PBE")]
# Construct formula
f <- as.formula(paste(dv, "~", lm_rhs_formula))
# Do the rf k-fold
rf_out <- do_k_fold(this_x, fit_and_test_rf)
rf_out$model <- "rf"
# Do the lm k-fold; do_k_fold will pass f on to fit_and_test_lm
lm_out <- do_k_fold(this_x, test_lm_pbe, formula = f)
lm_out$model <- "lm"
info_cols <- tibble(spei_window = w,
spei_cut = spei_cut,
dv = dv,
n = nrow(this_x),
n_pbe_true = sum(this_x$PBE))
lm_results[[paste(dv, w, spei_cut)]] <-
bind_cols(info_cols, bind_rows(rf_out, lm_out))
}
}
}
bind_rows(lm_results)
}
out <- do_sensitivity_analysis(dat_pbe_base,
dv_set, predictors,
spei_windows = c(12, 24),
spei_cutoffs = spei_cutoffs)