Obtain outputs from Euler, generated with feature_elimination_leafnp.R. Download CSV files into data/

Load data aggregated to sites, done in randomforest_leafnp.Rmd.

dfs <- readRDS("data/dfs_leafnp_20210729.rds")

Leaf N

Get FE results

target <- "leafN"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))

## Rows: 45 Columns: 2

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))

## Rows: 1035 Columns: 3

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))

## Rows: 45 Columns: 4

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe provides the full information about the updated R2 after respective variable was dropped.

With this, we can re-create the feature elimination, getting name of candidate predictor at each step (“level”) for which, when dropped, the model still achieved the highest R2.

df_fe_summary_reconstr <- df_fe %>% 
  group_by(level) %>% 
  filter(rsq == max(rsq)) %>% 
  ungroup() %>% 
  select(-level)

all_equal(df_fe_summary, df_fe_summary_reconstr)

## [1] TRUE

df_fe_summary provides information about the updated R2 after respective variable was dropped.

df_fe_summary %>% 
  mutate(step = rev(1:n())) %>% 
  mutate(pred = fct_reorder(pred, step)) %>%
  ggplot(aes(pred, rsq)) +
  geom_bar(stat = "identity") +
  coord_flip()

This is a bit misleading as "ndep" doesn’t appear.

preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
           "mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
           "ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
           "T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
           "CNrt", "ALSA", "PBR", "TP", "TK")
preds[which(!(preds %in% df_fe_summary$pred))]

## [1] "ndep"

This is because it is the only remaining variable in the last model.

Let’s change that so that the column preds can be interpreted more clearly.

First, pred = NA can be interpreted as a model including all predictors.

df_fe_summary <- df_fe_summary %>% 
  mutate(pred = ifelse(is.na(pred), "ALL", pred))

Second, the data frame df_fe_summary as written by the feature elimination contains information pred interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep" (single variable-model), and the row above that is for "mai" where the rsq is given for the model that contains "ndep + mai". The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved

df_fe_summary <- df_fe_summary %>% 
  filter(pred == "ALL") %>% 
  bind_rows(df_fe_summary %>% 
              filter(pred != "ALL")) %>% 
  mutate(pred_new = lead(pred)) %>%
  mutate(pred_new = ifelse(is.na(pred_new), "ndep", pred_new)) %>% 
  select(-pred) %>% 
  rename(pred = pred_new) %>% 
  mutate(step = rev(1:n())) %>%
  mutate(pred = fct_reorder(pred, step))

df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "ALSA", "yes", "no")) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()

ggsave("fig/rsq_stepwise_leafN.pdf", width = 6, height = 6)

gga1 <- df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "ALSA", "yes", "no")) %>% 
  tail(n = 10) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()
gga1

ggsave("fig/rsq_stepwise_leafN_sub.pdf", width = 6, height = 6)

This shows that there are negligible gains after ALSA (more generously after cwdx80). In other words, we might as well build a model with just the following predictors:

longlist <- df_fe_summary %>% 
  slice((nrow(.)-8):nrow(.)) %>% 
  pull(pred) %>% 
  as.character()
longlist

## [1] "cwdx80"    "ndaysgs"   "ALSA"      "mav"       "elv"       "co2"      
## [7] "tmonthmin" "mai"       "ndep"

shortlist <- df_fe_summary %>% 
  slice((nrow(.)-6):nrow(.)) %>% 
  pull(pred) %>% 
  as.character()
shortlist

## [1] "ALSA"      "mav"       "elv"       "co2"       "tmonthmin" "mai"      
## [7] "ndep"

Train final model

With just these (longlist), fit again a RF model.

filn <- "data/mod_rf_caret_leafn.rds"

if (file.exists(filn) && !overwrite){
  
  mod_rf_caret_leafn <- readRDS(filn)
  
} else {
 
  ## create generic formula for the model and define preprocessing steps
  pp <- recipe(leafN ~ ., data = dplyr::select(dfs, leafN, all_of(shortlist))) %>%
  
    ## impute by median as part of the recipe
    step_medianimpute(all_predictors())
    # step_impute_median(all_predictors())

  traincotrlParams <- trainControl( 
    method = "cv", 
    number = 5, 
    verboseIter = FALSE,
    savePredictions = "final"
    )
  
  ## best choice
  tune_grid <- expand.grid( .mtry = 3, 
                            .min.node.size = 8,
                            .splitrule = "variance"
                            )
  
  set.seed(1982)
  
  mod_rf_caret_leafn <- train(
    pp,
    data            = dplyr::select(dfs, leafN, all_of(shortlist)),
    metric          = "RMSE",
    method          = "ranger",
    tuneGrid        = tune_grid,
    trControl       = traincotrlParams,
    replace         = TRUE,
    sample.fraction = 0.5,
    num.trees       = 2000,        # boosted for the final model
    importance      = "impurity"   # for variable importance analysis, alternative: "permutation"
    )
  
  mod_rf_caret_leafn
  
  saveRDS(mod_rf_caret_leafn, file = filn)
}

Plot CV results

Visualise cross-validation results using results from the best tuned model.

## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafn$pred %>% 
  as_tibble() %>% 
  dplyr::filter(mtry == mod_rf_caret_leafn$bestTune$mtry, 
                splitrule == mod_rf_caret_leafn$bestTune$splitrule, 
                min.node.size == mod_rf_caret_leafn$bestTune$min.node.size) %>%
  separate(Resample, into = c(NA, "Fold"), sep = "old") %>% 
  dplyr::rename(idx = rowIndex)

out <- df_cv %>% 
  rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)

## Loading required package: LSD

## Loading required package: ggthemes

## Loading required package: RColorBrewer

gg1 <- out$gg +
  ylim(5,40) + xlim(5,40) +
  labs(x = "Predicted leaf N (mg/g)", y = "Observed leaf N (mg/g)")
gg1

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 64 rows containing non-finite values (stat_smooth).

## Warning: Removed 64 rows containing missing values (geom_point).

Example partial dependence analysis

library(pdp)
pred <- function(object, newdata)  {
  results <- as.vector(predict(object, newdata))
  return(results)
}

partial(
  mod_rf_caret_leafn,
  train = dplyr::select(dfs, leafN, all_of(shortlist)), 
  pred.var = "ndep",
  pred.fun = pred,
  grid.resolution = 20,
  plot = TRUE,
  center = TRUE,
  plot.engine = "ggplot2"
)

## Warning: Ignoring unknown parameters: smooth, contour, contour.colour, palette

## Warning: `fun.y` is deprecated. Use `fun` instead.

## Warning: Use of `object[["yhat.id"]]` is discouraged. Use `.data[["yhat.id"]]`
## instead.

## Warning: Use of `object[[1L]]` is discouraged. Use `.data[[1L]]` instead.

## Warning: Use of `object[["yhat"]]` is discouraged. Use `.data[["yhat"]]`
## instead.

## Warning: Use of `object[[1L]]` is discouraged. Use `.data[[1L]]` instead.

## Warning: Use of `object[["yhat"]]` is discouraged. Use `.data[["yhat"]]`
## instead.

Leaf P

Get FE results

target <- "leafP"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))

## Rows: 45 Columns: 2

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))

## Rows: 1035 Columns: 3

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))

## Rows: 45 Columns: 4

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe_summary provides information about the updated R2 after respective variable was dropped. This is a bit misleading as the last dropped variable doesn’t appear.

preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
           "mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
           "ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
           "T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
           "CNrt", "ALSA", "PBR", "TP", "TK")
pred_last <- preds[which(!(preds %in% df_fe_summary$pred))]
pred_last

## [1] "ndep"

This is because it is the only remaining variable in the last model.

Let’s change that so that the column preds can be interpreted more clearly.

First, pred = NA can be interpreted as a model including all predictors.

df_fe_summary <- df_fe_summary %>% 
  mutate(pred = ifelse(is.na(pred), "ALL", pred))

Second, the data frame df_fe_summary as written by the feature elimination contains information pred interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep" (single variable-model), and the row above that is for "mai" where the rsq is given for the model that contains "ndep + mai". The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved

df_fe_summary <- df_fe_summary %>% 
  filter(pred == "ALL") %>% 
  bind_rows(df_fe_summary %>% 
              filter(pred != "ALL")) %>% 
  mutate(pred_new = lead(pred)) %>%
  mutate(pred_new = ifelse(is.na(pred_new), pred_last, pred_new)) %>% 
  select(-pred) %>% 
  rename(pred = pred_new) %>% 
  mutate(step = rev(1:n())) %>%
  mutate(pred = fct_reorder(pred, step))

df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "T_GRAVEL", "yes", "no")) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()

ggsave("fig/rsq_stepwise_leafP.pdf", width = 6, height = 6)

gga2 <- df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "T_GRAVEL", "yes", "no")) %>% 
  tail(n = 18) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()
gga2

ggsave("fig/rsq_stepwise_leafP_sub.pdf", width = 6, height = 6)

This shows that there are negligible gains after "pmonthmin". In other words, we might as well build a model with just the following predictors:

longlist <- df_fe_summary %>% 
  slice((nrow(.)-14):nrow(.)) %>% 
  pull(pred) %>% 
  as.character()
longlist

##  [1] "T_GRAVEL"  "elv"       "mai"       "vcmax25"   "cwdx80"    "mapgs"    
##  [7] "pmonthmin" "AWC_CLASS" "gs_accl"   "alpha"     "PBR"       "co2"      
## [13] "ai"        "tmonthmin" "ndep"

Train final model

With just these (longlist), fit again a RF model.

filn <- "data/mod_rf_caret_leafp.rds"

if (file.exists(filn) && !overwrite){
  
  mod_rf_caret_leafp <- readRDS(filn)
  
} else {
 
  ## create generic formula for the model and define preprocessing steps
  pp <- recipe(leafP ~ ., data = dplyr::select(dfs, leafP, all_of(longlist))) %>%
  
    ## impute by median as part of the recipe
    step_medianimpute(all_predictors())
    # step_impute_median(all_predictors())

  traincotrlParams <- trainControl( 
    method = "cv", 
    number = 5, 
    verboseIter = FALSE,
    savePredictions = "final"
    )
  
  ## best choice based on leaf N
  tune_grid <- expand.grid( .mtry = 3, 
                            .min.node.size = 8,
                            .splitrule = "variance"
                            )
  
  set.seed(1982)
  
  mod_rf_caret_leafp <- train(
    pp,
    data            = dplyr::select(dfs, leafP, all_of(longlist)),
    metric          = "RMSE",
    method          = "ranger",
    tuneGrid        = tune_grid,
    trControl       = traincotrlParams,
    replace         = FALSE,
    sample.fraction = 0.5,
    num.trees       = 2000,        # boosted for the final model
    importance      = "impurity"   # for variable importance analysis, alternative: "permutation"
    )
  
  mod_rf_caret_leafp
  
  saveRDS(mod_rf_caret_leafp, file = filn)
}

Plot CV results

Visualise cross-validation results using results from the best tuned model.

## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafp$pred %>% 
  as_tibble() %>% 
  dplyr::filter(mtry == mod_rf_caret_leafp$bestTune$mtry, 
                splitrule == mod_rf_caret_leafp$bestTune$splitrule, 
                min.node.size == mod_rf_caret_leafp$bestTune$min.node.size) %>%
  separate(Resample, into = c(NA, "Fold"), sep = "old") %>% 
  dplyr::rename(idx = rowIndex)

out <- df_cv %>% 
  rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)
gg2 <- out$gg +
  ylim(0, 5) + xlim(0, 5) +
  labs(x = "Predicted leaf P (mg/g)", y = "Observed leaf P (mg/g)")
gg2

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 11 rows containing non-finite values (stat_smooth).

## Warning: Removed 11 rows containing missing values (geom_point).

Leaf N:P

Get FE results

target <- "LeafNP"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))

## Rows: 45 Columns: 2

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))

## Rows: 1035 Columns: 3

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))

## Rows: 45 Columns: 4

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_fe_summary provides information about the updated R2 after respective variable was dropped. This is a bit misleading as the last dropped variable doesn’t appear.

preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
           "mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
           "ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
           "T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
           "CNrt", "ALSA", "PBR", "TP", "TK")
pred_last <- preds[which(!(preds %in% df_fe_summary$pred))]
pred_last

## [1] "ndep"

This is because it is the only remaining variable in the last model.

Let’s change that so that the column preds can be interpreted more clearly.

First, pred = NA can be interpreted as a model including all predictors.

df_fe_summary <- df_fe_summary %>% 
  mutate(pred = ifelse(is.na(pred), "ALL", pred))

Second, the data frame df_fe_summary as written by the feature elimination contains information pred interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep" (single variable-model), and the row above that is for "mai" where the rsq is given for the model that contains "ndep + mai". The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved

df_fe_summary <- df_fe_summary %>% 
  filter(pred == "ALL") %>% 
  bind_rows(df_fe_summary %>% 
              filter(pred != "ALL")) %>% 
  mutate(pred_new = lead(pred)) %>%
  mutate(pred_new = ifelse(is.na(pred_new), pred_last, pred_new)) %>% 
  select(-pred) %>% 
  rename(pred = pred_new) %>% 
  mutate(step = rev(1:n())) %>%
  mutate(pred = fct_reorder(pred, step))

df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "jmax25", "yes", "no")) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()

ggsave("fig/rsq_stepwise_leafNP.pdf", width = 6, height = 6)

gga3 <- df_fe_summary %>% 
  mutate(highlight = ifelse(pred == "jmax25", "yes", "no")) %>% 
  tail(n = 17) %>% 
  ggplot(aes(pred, rsq, fill = highlight)) +
  scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
  geom_bar(stat = "identity") +
  labs(x = "", y = expression(italic(R)^2)) +
  coord_flip() +
  theme_classic()
gga3

ggsave("fig/rsq_stepwise_leafNP_sub.pdf", width = 6, height = 6)

This shows that there are negligible gains after "cwdx80". In other words, we might as well build a model with just the following predictors:

longlist <- df_fe_summary %>% 
  slice(32:45) %>% 
  pull(pred) %>% 
  as.character()
longlist

##  [1] "jmax25"     "T_CEC_SOIL" "CNrt"       "mapgs"      "mat"       
##  [6] "cwdx80"     "AWC_CLASS"  "ai"         "T_PH_H2O"   "TP"        
## [11] "co2"        "gs_accl"    "matgs"      "ndep"

Train final model

For leaf N:P, remove outlier:

dfs_sub <- dfs %>% 
  filter(LeafNP < 70)

With just these (longlist), fit again a RF model.

filn <- "data/mod_rf_caret_leafnp.rds"

if (file.exists(filn) && !overwrite){
  
  mod_rf_caret_leafnp <- readRDS(filn)
  
} else {
 
  ## create generic formula for the model and define preprocessing steps
  pp <- recipe(LeafNP ~ ., data = dplyr::select(dfs_sub, LeafNP, all_of(longlist))) %>%
  
    ## impute by median as part of the recipe
    step_medianimpute(all_predictors())
    # step_impute_median(all_predictors())

  traincotrlParams <- trainControl( 
    method = "cv", 
    number = 5, 
    verboseIter = FALSE,
    savePredictions = "final"
    )
  
  ## best choice based on leaf N:P
  tune_grid <- expand.grid( .mtry = 3,
                            .min.node.size = 8,
                            .splitrule = "variance"
                            )
  
  set.seed(1982)
  
  mod_rf_caret_leafnp <- train(
    pp,
    data            = dplyr::select(dfs_sub, LeafNP, all_of(longlist)),
    metric          = "RMSE",
    method          = "ranger",
    tuneGrid        = tune_grid,
    trControl       = traincotrlParams,
    replace         = TRUE,
    sample.fraction = 0.5,
    num.trees       = 2000,        # boosted for the final model
    importance      = "impurity"   # for variable importance analysis, alternative: "permutation"
    )
  
  saveRDS(mod_rf_caret_leafnp, file = filn)
  mod_rf_caret_leafnp
}

Plot CV results

Visualise cross-validation results using results from the best tuned model.

## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafnp$pred %>% 
  as_tibble() %>% 
  dplyr::filter(mtry == mod_rf_caret_leafnp$bestTune$mtry, 
                splitrule == mod_rf_caret_leafnp$bestTune$splitrule, 
                min.node.size == mod_rf_caret_leafnp$bestTune$min.node.size) %>%
  separate(Resample, into = c(NA, "Fold"), sep = "old") %>% 
  dplyr::rename(idx = rowIndex)

out <- df_cv %>% 
  rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)
gg3 <- out$gg +
  ylim(0, 30) + xlim(0, 30) +
  labs(x = "Predicted leaf N:P", y = "Observed leaf N:P")
gg3

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 150 rows containing non-finite values (stat_smooth).

## Warning: Removed 150 rows containing missing values (geom_point).

Publication figures

library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggthemes':
## 
##     theme_map

plot_grid(gg1, gg2, gg3, labels =  c("a", "b", "c"), ncol = 3)

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 64 rows containing non-finite values (stat_smooth).

## Warning: Removed 64 rows containing missing values (geom_point).

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 11 rows containing non-finite values (stat_smooth).

## Warning: Removed 11 rows containing missing values (geom_point).

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 150 rows containing non-finite values (stat_smooth).

## Warning: Removed 150 rows containing missing values (geom_point).

ggsave("fig/modobs_all.pdf", width = 12, height = 4)

Feature elimination for leafnp

Beni Stocker

9/6/2021

Leaf N

Get FE results

Train final model

Plot CV results

Example partial dependence analysis

Leaf P

Get FE results

Train final model

Plot CV results

Leaf N:P

Get FE results

Train final model

Plot CV results

Publication figures