Take a look at the distribution of met data, and perform a Principal Components Analysis (PCA) to examine auto-correlation between the various met variables.
ggplot(resistance, aes(trough_resistance, fill = severity)) +
geom_histogram(bins = 20) +
ggtitle("Resistance") +
facet_wrap(~variable)
ggplot(resilience, aes(resilience, fill = severity)) +
geom_histogram(bins = 20) +
ggtitle("Resilience") +
facet_wrap(~variable)
We see from the met analysis above that vbdsf and vddsf are highly correlated with nbdsf and nddsf respectively; remove one pair.
Take a first look at the relationship between output variables and met variables.
# Build a combined met-metric dataset
combined %>%
select(metric, variable, met, value) %>%
# We see from the met analysis above that vbdsf and vddsf are highly
# correlated with nbdsf and nddsf respectively; remove one pair
left_join(select(met2_wide, -nbdsf, -nddsf), by = "met") ->
metric_combined
resistance %>%
select(variable, met, trough_resistance) %>%
left_join(select(met2_wide, -nbdsf, -nddsf), by = "met") ->
resist_combined
metric_combined %>%
pivot_longer(dlwrf:vddsf, values_to = "met_value") ->
metric_comb_long
p_mm <- ggplot(filter(metric_comb_long, metric == "resistance"),
aes(met_value, value, color = variable)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, color = "black", linetype = 5) +
facet_wrap(~ variable + name, scales = "free") +
ggtitle("Resistance") +
theme(legend.position = "none", axis.text = element_text(size = 5))
print(p_mm)
p_mm <- p_mm %+% filter(metric_comb_long, metric == "resilience")
print(p_mm + ggtitle("Resilience"))
Fit a Random Forest model to each output variable (GPP, etc.) and then calculate variable importance for each model. Make partial dependence plots for the three most important met variables for each output variable.
# Fit a Random Forest for each variable (across all treatment severities)
# Note the r.f. functions don't play well with tibbles, so change to data frame
library(randomForest)
d <- as.data.frame(metric_combined)
rc_all <- split(d, interaction(d$metric, d$variable))
rf_all <- lapply(rc_all, function(x)
# remove metric, variable, met columns and run RF
randomForest(value ~ ., data = x[-1:-3], importance = TRUE)
)
# Importance metrics and partial plot data
import <- list()
partial <- list()
for(var in names(rf_all)) {
mod <- rf_all[[var]]
metric <- rc_all[[var]]$metric[1]
variable <- rc_all[[var]]$variable[1]
imp <- as.data.frame(importance(mod))
imp$met_var <- rownames(imp)
imp$metric <- metric
imp$variable <- variable
import[[var]] <- as_tibble(imp)
impvar <- rownames(imp)[order(imp[, 1], decreasing = TRUE)]
for (i in seq_along(impvar[1:3])) {
pp <- partialPlot(mod, rc_all[[var]], impvar[i], plot = FALSE)
partial[[paste(var, i)]] <- tibble(metric = metric,
variable = variable,
met_var = impvar[i],
x = pp$x, y = pp$y)
}
}
import <- bind_rows(import)
partial <- bind_rows(partial)
library(forcats)
f_imp_plot <- function(x) {
ggplot(x, aes(fct_reorder(met_var, `%IncMSE`), `%IncMSE`)) +
geom_col() +
coord_flip() +
xlab("Met variable") +
ggtitle(unique(x$variable))
}
d <- filter(import, metric == "resistance")
xx <- lapply(split(d, d$variable), f_imp_plot)
library(cowplot)
varimp_plot <- plot_grid(xx[["GPP"]], xx[["NEP"]], xx[["NPP"]], xx[["Rh"]])
print(varimp_plot)
save_plot("varimp_resistance.png", varimp_plot)
p_pdp <- ggplot(filter(partial, metric == "resistance"),
aes(x, y, color = variable)) +
geom_line() +
xlab("Met var value") + ylab("Resistance") +
ggtitle("Partial dependence plot") +
facet_grid(variable ~ met_var, scales = "free")
print(p_pdp)
# Re-do above plots but now for resilience
d <- filter(import, metric == "resilience")
xx <- lapply(split(d, d$variable), f_imp_plot)
varimp_plot <- plot_grid(xx[["GPP"]], xx[["NEP"]], xx[["NPP"]], xx[["Rh"]])
print(varimp_plot)
save_plot("varimp_resilience.png", varimp_plot)
p_pdp <- p_pdp %+% filter(partial, metric == "resilience")
print(p_pdp + ylab("Resilience"))
# Combined partial dependence plot
partial %>%
mutate(metric = if_else(metric == "resistance", "Resistance", "Resilience")) %>%
ggplot(aes(x, y, color = variable)) +
geom_line() +
xlab("Meteorological variable value") + ylab("Metric value") +
scale_color_discrete("") +
facet_grid(metric ~ met_var, scales = "free") +
theme(axis.text.x = element_text(angle = 90))
For resistance, tmp (air temperature) and vbdsf (sunlight) are among the top-three most important drivers for all four output variables (GPP, NEP, NPP, Rh). Higher temperature and more light translate into higher resistance for all variables.
This is largely true for resilience as well, although prate (precipitation) is important for both NPP and Rh; in fact precipitation is far and away the most important variable for Rh. Here, however, higher temperature and more light mean less resilience.
Interesting that when GPP is included as a condition that could explain the resistance & resilience metrics it really only affect the Rh values.