1 Met data

Take a look at the distribution of met data, and perform a Principal Components Analysis (PCA) to examine auto-correlation between the various met variables.

2 Metric data

ggplot(resistance, aes(trough_resistance, fill = severity)) +
  geom_histogram(bins = 20) + 
  ggtitle("Resistance") +
  facet_wrap(~variable)

ggplot(resilience, aes(resilience, fill = severity)) +
  geom_histogram(bins = 20) + 
  ggtitle("Resilience") +
  facet_wrap(~variable)

3 Met-metric data

We see from the met analysis above that vbdsf and vddsf are highly correlated with nbdsf and nddsf respectively; remove one pair.

Take a first look at the relationship between output variables and met variables.

3.1 Resistance

# Build a combined met-metric dataset
combined %>% 
  select(metric, variable, met, value) %>% 
  # We see from the met analysis above that vbdsf and vddsf are highly
  # correlated with nbdsf and nddsf respectively; remove one pair
  left_join(select(met2_wide, -nbdsf, -nddsf), by = "met") ->
  metric_combined

resistance %>% 
  select(variable, met, trough_resistance) %>% 
  left_join(select(met2_wide, -nbdsf, -nddsf), by = "met") ->
  resist_combined

metric_combined %>% 
  pivot_longer(dlwrf:vddsf, values_to = "met_value") ->
  metric_comb_long

p_mm <- ggplot(filter(metric_comb_long, metric == "resistance"), 
               aes(met_value, value, color = variable)) +
  geom_point() + 
  geom_smooth(method = "lm", formula = y ~ x, color = "black", linetype = 5) +
  facet_wrap(~ variable + name, scales = "free") + 
  ggtitle("Resistance") +
  theme(legend.position = "none", axis.text = element_text(size = 5))
print(p_mm)

3.2 Resilience

p_mm <- p_mm %+% filter(metric_comb_long, metric == "resilience")
print(p_mm + ggtitle("Resilience"))

4 Variable importance

Fit a Random Forest model to each output variable (GPP, etc.) and then calculate variable importance for each model. Make partial dependence plots for the three most important met variables for each output variable.

# Fit a Random Forest for each variable (across all treatment severities)
# Note the r.f. functions don't play well with tibbles, so change to data frame
library(randomForest)
d <- as.data.frame(metric_combined)
rc_all <- split(d, interaction(d$metric, d$variable))
rf_all <- lapply(rc_all, function(x) 
  # remove metric, variable, met columns and run RF
  randomForest(value ~ ., data = x[-1:-3], importance = TRUE)
)

# Importance metrics and partial plot data
import <- list()
partial <- list()
for(var in names(rf_all)) {
  mod <- rf_all[[var]]
  metric <- rc_all[[var]]$metric[1]
  variable <- rc_all[[var]]$variable[1]
  
  imp <- as.data.frame(importance(mod))
  imp$met_var <- rownames(imp)
  imp$metric <- metric
  imp$variable <- variable
  import[[var]] <- as_tibble(imp)
  
  impvar <- rownames(imp)[order(imp[, 1], decreasing = TRUE)]
  for (i in seq_along(impvar[1:3])) {
    pp <- partialPlot(mod, rc_all[[var]], impvar[i], plot = FALSE)
    partial[[paste(var, i)]] <- tibble(metric = metric,
                                       variable = variable,
                                       met_var = impvar[i],
                                       x = pp$x, y = pp$y)
  }
}

import <- bind_rows(import)
partial <- bind_rows(partial)

4.1 Resistance

library(forcats)
f_imp_plot <- function(x) {
  ggplot(x, aes(fct_reorder(met_var, `%IncMSE`), `%IncMSE`)) + 
    geom_col() +
    coord_flip() +
    xlab("Met variable") +
    ggtitle(unique(x$variable))
}
d <- filter(import, metric == "resistance")
xx <- lapply(split(d, d$variable), f_imp_plot)

library(cowplot)
varimp_plot <- plot_grid(xx[["GPP"]], xx[["NEP"]], xx[["NPP"]], xx[["Rh"]])
print(varimp_plot)

save_plot("varimp_resistance.png", varimp_plot)

p_pdp <- ggplot(filter(partial, metric == "resistance"), 
                aes(x, y, color = variable)) + 
  geom_line() + 
  xlab("Met var value") + ylab("Resistance") +
  ggtitle("Partial dependence plot") +
  facet_grid(variable ~ met_var, scales = "free")
print(p_pdp)

4.2 Resilience

# Re-do above plots but now for resilience
d <- filter(import, metric == "resilience")
xx <- lapply(split(d, d$variable), f_imp_plot)
varimp_plot <- plot_grid(xx[["GPP"]], xx[["NEP"]], xx[["NPP"]], xx[["Rh"]])
print(varimp_plot)

save_plot("varimp_resilience.png", varimp_plot)

p_pdp <- p_pdp %+% filter(partial, metric == "resilience")
print(p_pdp + ylab("Resilience"))

5 Summary

# Combined partial dependence plot
partial %>% 
  mutate(metric = if_else(metric == "resistance", "Resistance", "Resilience")) %>% 
  ggplot(aes(x, y, color = variable)) + 
  geom_line() + 
  xlab("Meteorological variable value") + ylab("Metric value") +
  scale_color_discrete("") +
  facet_grid(metric ~ met_var, scales = "free") +
  theme(axis.text.x = element_text(angle = 90))

For resistance, tmp (air temperature) and vbdsf (sunlight) are among the top-three most important drivers for all four output variables (GPP, NEP, NPP, Rh). Higher temperature and more light translate into higher resistance for all variables.

This is largely true for resilience as well, although prate (precipitation) is important for both NPP and Rh; in fact precipitation is far and away the most important variable for Rh. Here, however, higher temperature and more light mean less resilience.

Interesting that when GPP is included as a condition that could explain the resistance & resilience metrics it really only affect the Rh values.

metric analysis

09 July, 2021