Obtain outputs from Euler, generated with feature_elimination_leafnp.R
. Download CSV files into data/
Load data aggregated to sites, done in randomforest_leafnp.Rmd
.
dfs <- readRDS("data/dfs_leafnp_20210729.rds")
target <- "leafN"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))
## Rows: 45 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))
## Rows: 1035 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))
## Rows: 45 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe
provides the full information about the updated R2 after respective variable was dropped.
With this, we can re-create the feature elimination, getting name of candidate predictor at each step (“level”) for which, when dropped, the model still achieved the highest R2.
df_fe_summary_reconstr <- df_fe %>%
group_by(level) %>%
filter(rsq == max(rsq)) %>%
ungroup() %>%
select(-level)
all_equal(df_fe_summary, df_fe_summary_reconstr)
## [1] TRUE
df_fe_summary
provides information about the updated R2 after respective variable was dropped.
df_fe_summary %>%
mutate(step = rev(1:n())) %>%
mutate(pred = fct_reorder(pred, step)) %>%
ggplot(aes(pred, rsq)) +
geom_bar(stat = "identity") +
coord_flip()
This is a bit misleading as "ndep"
doesn’t appear.
preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
"mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
"ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
"T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
"CNrt", "ALSA", "PBR", "TP", "TK")
preds[which(!(preds %in% df_fe_summary$pred))]
## [1] "ndep"
This is because it is the only remaining variable in the last model.
Let’s change that so that the column preds
can be interpreted more clearly.
First, pred = NA
can be interpreted as a model including all predictors.
df_fe_summary <- df_fe_summary %>%
mutate(pred = ifelse(is.na(pred), "ALL", pred))
Second, the data frame df_fe_summary
as written by the feature elimination contains information pred
interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep"
(single variable-model), and the row above that is for "mai"
where the rsq is given for the model that contains "ndep + mai"
. The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved
df_fe_summary <- df_fe_summary %>%
filter(pred == "ALL") %>%
bind_rows(df_fe_summary %>%
filter(pred != "ALL")) %>%
mutate(pred_new = lead(pred)) %>%
mutate(pred_new = ifelse(is.na(pred_new), "ndep", pred_new)) %>%
select(-pred) %>%
rename(pred = pred_new) %>%
mutate(step = rev(1:n())) %>%
mutate(pred = fct_reorder(pred, step))
df_fe_summary %>%
mutate(highlight = ifelse(pred == "ALSA", "yes", "no")) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
ggsave("fig/rsq_stepwise_leafN.pdf", width = 6, height = 6)
gga1 <- df_fe_summary %>%
mutate(highlight = ifelse(pred == "ALSA", "yes", "no")) %>%
tail(n = 10) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
gga1
ggsave("fig/rsq_stepwise_leafN_sub.pdf", width = 6, height = 6)
This shows that there are negligible gains after ALSA (more generously after cwdx80). In other words, we might as well build a model with just the following predictors:
longlist <- df_fe_summary %>%
slice((nrow(.)-8):nrow(.)) %>%
pull(pred) %>%
as.character()
longlist
## [1] "cwdx80" "ndaysgs" "ALSA" "mav" "elv" "co2"
## [7] "tmonthmin" "mai" "ndep"
shortlist <- df_fe_summary %>%
slice((nrow(.)-6):nrow(.)) %>%
pull(pred) %>%
as.character()
shortlist
## [1] "ALSA" "mav" "elv" "co2" "tmonthmin" "mai"
## [7] "ndep"
With just these (longlist
), fit again a RF model.
filn <- "data/mod_rf_caret_leafn.rds"
if (file.exists(filn) && !overwrite){
mod_rf_caret_leafn <- readRDS(filn)
} else {
## create generic formula for the model and define preprocessing steps
pp <- recipe(leafN ~ ., data = dplyr::select(dfs, leafN, all_of(shortlist))) %>%
## impute by median as part of the recipe
step_medianimpute(all_predictors())
# step_impute_median(all_predictors())
traincotrlParams <- trainControl(
method = "cv",
number = 5,
verboseIter = FALSE,
savePredictions = "final"
)
## best choice
tune_grid <- expand.grid( .mtry = 3,
.min.node.size = 8,
.splitrule = "variance"
)
set.seed(1982)
mod_rf_caret_leafn <- train(
pp,
data = dplyr::select(dfs, leafN, all_of(shortlist)),
metric = "RMSE",
method = "ranger",
tuneGrid = tune_grid,
trControl = traincotrlParams,
replace = TRUE,
sample.fraction = 0.5,
num.trees = 2000, # boosted for the final model
importance = "impurity" # for variable importance analysis, alternative: "permutation"
)
mod_rf_caret_leafn
saveRDS(mod_rf_caret_leafn, file = filn)
}
Visualise cross-validation results using results from the best tuned model.
## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafn$pred %>%
as_tibble() %>%
dplyr::filter(mtry == mod_rf_caret_leafn$bestTune$mtry,
splitrule == mod_rf_caret_leafn$bestTune$splitrule,
min.node.size == mod_rf_caret_leafn$bestTune$min.node.size) %>%
separate(Resample, into = c(NA, "Fold"), sep = "old") %>%
dplyr::rename(idx = rowIndex)
out <- df_cv %>%
rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)
## Loading required package: LSD
## Loading required package: ggthemes
## Loading required package: RColorBrewer
gg1 <- out$gg +
ylim(5,40) + xlim(5,40) +
labs(x = "Predicted leaf N (mg/g)", y = "Observed leaf N (mg/g)")
gg1
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 64 rows containing non-finite values (stat_smooth).
## Warning: Removed 64 rows containing missing values (geom_point).
library(pdp)
pred <- function(object, newdata) {
results <- as.vector(predict(object, newdata))
return(results)
}
partial(
mod_rf_caret_leafn,
train = dplyr::select(dfs, leafN, all_of(shortlist)),
pred.var = "ndep",
pred.fun = pred,
grid.resolution = 20,
plot = TRUE,
center = TRUE,
plot.engine = "ggplot2"
)
## Warning: Ignoring unknown parameters: smooth, contour, contour.colour, palette
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Use of `object[["yhat.id"]]` is discouraged. Use `.data[["yhat.id"]]`
## instead.
## Warning: Use of `object[[1L]]` is discouraged. Use `.data[[1L]]` instead.
## Warning: Use of `object[["yhat"]]` is discouraged. Use `.data[["yhat"]]`
## instead.
## Warning: Use of `object[[1L]]` is discouraged. Use `.data[[1L]]` instead.
## Warning: Use of `object[["yhat"]]` is discouraged. Use `.data[["yhat"]]`
## instead.
target <- "leafP"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))
## Rows: 45 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))
## Rows: 1035 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))
## Rows: 45 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe_summary
provides information about the updated R2 after respective variable was dropped. This is a bit misleading as the last dropped variable doesn’t appear.
preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
"mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
"ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
"T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
"CNrt", "ALSA", "PBR", "TP", "TK")
pred_last <- preds[which(!(preds %in% df_fe_summary$pred))]
pred_last
## [1] "ndep"
This is because it is the only remaining variable in the last model.
Let’s change that so that the column preds
can be interpreted more clearly.
First, pred = NA
can be interpreted as a model including all predictors.
df_fe_summary <- df_fe_summary %>%
mutate(pred = ifelse(is.na(pred), "ALL", pred))
Second, the data frame df_fe_summary
as written by the feature elimination contains information pred
interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep"
(single variable-model), and the row above that is for "mai"
where the rsq is given for the model that contains "ndep + mai"
. The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved
df_fe_summary <- df_fe_summary %>%
filter(pred == "ALL") %>%
bind_rows(df_fe_summary %>%
filter(pred != "ALL")) %>%
mutate(pred_new = lead(pred)) %>%
mutate(pred_new = ifelse(is.na(pred_new), pred_last, pred_new)) %>%
select(-pred) %>%
rename(pred = pred_new) %>%
mutate(step = rev(1:n())) %>%
mutate(pred = fct_reorder(pred, step))
df_fe_summary %>%
mutate(highlight = ifelse(pred == "T_GRAVEL", "yes", "no")) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
ggsave("fig/rsq_stepwise_leafP.pdf", width = 6, height = 6)
gga2 <- df_fe_summary %>%
mutate(highlight = ifelse(pred == "T_GRAVEL", "yes", "no")) %>%
tail(n = 18) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
gga2
ggsave("fig/rsq_stepwise_leafP_sub.pdf", width = 6, height = 6)
This shows that there are negligible gains after "pmonthmin"
. In other words, we might as well build a model with just the following predictors:
longlist <- df_fe_summary %>%
slice((nrow(.)-14):nrow(.)) %>%
pull(pred) %>%
as.character()
longlist
## [1] "T_GRAVEL" "elv" "mai" "vcmax25" "cwdx80" "mapgs"
## [7] "pmonthmin" "AWC_CLASS" "gs_accl" "alpha" "PBR" "co2"
## [13] "ai" "tmonthmin" "ndep"
With just these (longlist
), fit again a RF model.
filn <- "data/mod_rf_caret_leafp.rds"
if (file.exists(filn) && !overwrite){
mod_rf_caret_leafp <- readRDS(filn)
} else {
## create generic formula for the model and define preprocessing steps
pp <- recipe(leafP ~ ., data = dplyr::select(dfs, leafP, all_of(longlist))) %>%
## impute by median as part of the recipe
step_medianimpute(all_predictors())
# step_impute_median(all_predictors())
traincotrlParams <- trainControl(
method = "cv",
number = 5,
verboseIter = FALSE,
savePredictions = "final"
)
## best choice based on leaf N
tune_grid <- expand.grid( .mtry = 3,
.min.node.size = 8,
.splitrule = "variance"
)
set.seed(1982)
mod_rf_caret_leafp <- train(
pp,
data = dplyr::select(dfs, leafP, all_of(longlist)),
metric = "RMSE",
method = "ranger",
tuneGrid = tune_grid,
trControl = traincotrlParams,
replace = FALSE,
sample.fraction = 0.5,
num.trees = 2000, # boosted for the final model
importance = "impurity" # for variable importance analysis, alternative: "permutation"
)
mod_rf_caret_leafp
saveRDS(mod_rf_caret_leafp, file = filn)
}
Visualise cross-validation results using results from the best tuned model.
## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafp$pred %>%
as_tibble() %>%
dplyr::filter(mtry == mod_rf_caret_leafp$bestTune$mtry,
splitrule == mod_rf_caret_leafp$bestTune$splitrule,
min.node.size == mod_rf_caret_leafp$bestTune$min.node.size) %>%
separate(Resample, into = c(NA, "Fold"), sep = "old") %>%
dplyr::rename(idx = rowIndex)
out <- df_cv %>%
rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)
gg2 <- out$gg +
ylim(0, 5) + xlim(0, 5) +
labs(x = "Predicted leaf P (mg/g)", y = "Observed leaf P (mg/g)")
gg2
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
target <- "LeafNP"
df_fe_summary <- read_csv(paste0("data/df_fe_summary_", target, ".csv"))
## Rows: 45 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (1): rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe <- read_csv(paste0("data/df_fe_", target, ".csv"))
## Rows: 1035 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (2): level, rsq
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_vip <- read_csv(paste0("data/df_vip_", target, ".csv"))
## Rows: 45 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): pred
## dbl (3): level, rsq, vip
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_fe_summary
provides information about the updated R2 after respective variable was dropped. This is a bit misleading as the last dropped variable doesn’t appear.
preds <- c("elv", "mat", "matgs", "tmonthmin", "tmonthmax", "ndaysgs", "mai", "maigs", "map", "pmonthmin",
"mapgs", "mavgs", "mav", "alpha", "vcmax25", "jmax25", "gs_accl", "aet", "ai", "cwdx80", "gti",
"ndep", "co2", "T_BULK_DENSITY", "AWC_CLASS", "T_CLAY", "T_SILT", "T_SAND", "T_GRAVEL", "T_PH_H2O",
"T_TEB", "T_BS", "T_CEC_SOIL", "T_CEC_CLAY", "T_ECE", "T_ESP", "T_CACO3", "T_OC", "ORGC", "TOTN",
"CNrt", "ALSA", "PBR", "TP", "TK")
pred_last <- preds[which(!(preds %in% df_fe_summary$pred))]
pred_last
## [1] "ndep"
This is because it is the only remaining variable in the last model.
Let’s change that so that the column preds
can be interpreted more clearly.
First, pred = NA
can be interpreted as a model including all predictors.
df_fe_summary <- df_fe_summary %>%
mutate(pred = ifelse(is.na(pred), "ALL", pred))
Second, the data frame df_fe_summary
as written by the feature elimination contains information pred
interpreted as the variable dropped in the respective step. Instead re-define it so that it is to be interpreted as the variable added, relative to the model of the row below. Like this, the bottom row is for "ndep"
(single variable-model), and the row above that is for "mai"
where the rsq is given for the model that contains "ndep + mai"
. The top row is for the model containing all predictors. Interestingly, the highest rsq (based on 5-fold cross-validation!) is achieved
df_fe_summary <- df_fe_summary %>%
filter(pred == "ALL") %>%
bind_rows(df_fe_summary %>%
filter(pred != "ALL")) %>%
mutate(pred_new = lead(pred)) %>%
mutate(pred_new = ifelse(is.na(pred_new), pred_last, pred_new)) %>%
select(-pred) %>%
rename(pred = pred_new) %>%
mutate(step = rev(1:n())) %>%
mutate(pred = fct_reorder(pred, step))
df_fe_summary %>%
mutate(highlight = ifelse(pred == "jmax25", "yes", "no")) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
ggsave("fig/rsq_stepwise_leafNP.pdf", width = 6, height = 6)
gga3 <- df_fe_summary %>%
mutate(highlight = ifelse(pred == "jmax25", "yes", "no")) %>%
tail(n = 17) %>%
ggplot(aes(pred, rsq, fill = highlight)) +
scale_fill_manual( values = c( "yes"="tomato", "no"="gray50" ), guide = FALSE ) +
geom_bar(stat = "identity") +
labs(x = "", y = expression(italic(R)^2)) +
coord_flip() +
theme_classic()
gga3
ggsave("fig/rsq_stepwise_leafNP_sub.pdf", width = 6, height = 6)
This shows that there are negligible gains after "cwdx80"
. In other words, we might as well build a model with just the following predictors:
longlist <- df_fe_summary %>%
slice(32:45) %>%
pull(pred) %>%
as.character()
longlist
## [1] "jmax25" "T_CEC_SOIL" "CNrt" "mapgs" "mat"
## [6] "cwdx80" "AWC_CLASS" "ai" "T_PH_H2O" "TP"
## [11] "co2" "gs_accl" "matgs" "ndep"
For leaf N:P, remove outlier:
dfs_sub <- dfs %>%
filter(LeafNP < 70)
With just these (longlist
), fit again a RF model.
filn <- "data/mod_rf_caret_leafnp.rds"
if (file.exists(filn) && !overwrite){
mod_rf_caret_leafnp <- readRDS(filn)
} else {
## create generic formula for the model and define preprocessing steps
pp <- recipe(LeafNP ~ ., data = dplyr::select(dfs_sub, LeafNP, all_of(longlist))) %>%
## impute by median as part of the recipe
step_medianimpute(all_predictors())
# step_impute_median(all_predictors())
traincotrlParams <- trainControl(
method = "cv",
number = 5,
verboseIter = FALSE,
savePredictions = "final"
)
## best choice based on leaf N:P
tune_grid <- expand.grid( .mtry = 3,
.min.node.size = 8,
.splitrule = "variance"
)
set.seed(1982)
mod_rf_caret_leafnp <- train(
pp,
data = dplyr::select(dfs_sub, LeafNP, all_of(longlist)),
metric = "RMSE",
method = "ranger",
tuneGrid = tune_grid,
trControl = traincotrlParams,
replace = TRUE,
sample.fraction = 0.5,
num.trees = 2000, # boosted for the final model
importance = "impurity" # for variable importance analysis, alternative: "permutation"
)
saveRDS(mod_rf_caret_leafnp, file = filn)
mod_rf_caret_leafnp
}
Visualise cross-validation results using results from the best tuned model.
## get predicted values from cross-validation resamples, take mean across repetitions
df_cv <- mod_rf_caret_leafnp$pred %>%
as_tibble() %>%
dplyr::filter(mtry == mod_rf_caret_leafnp$bestTune$mtry,
splitrule == mod_rf_caret_leafnp$bestTune$splitrule,
min.node.size == mod_rf_caret_leafnp$bestTune$min.node.size) %>%
separate(Resample, into = c(NA, "Fold"), sep = "old") %>%
dplyr::rename(idx = rowIndex)
out <- df_cv %>%
rbeni::analyse_modobs2("pred", "obs", type = "heat", shortsubtitle = TRUE)
gg3 <- out$gg +
ylim(0, 30) + xlim(0, 30) +
labs(x = "Predicted leaf N:P", y = "Observed leaf N:P")
gg3
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 150 rows containing non-finite values (stat_smooth).
## Warning: Removed 150 rows containing missing values (geom_point).
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
plot_grid(gg1, gg2, gg3, labels = c("a", "b", "c"), ncol = 3)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 64 rows containing non-finite values (stat_smooth).
## Warning: Removed 64 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 150 rows containing non-finite values (stat_smooth).
## Warning: Removed 150 rows containing missing values (geom_point).
ggsave("fig/modobs_all.pdf", width = 12, height = 4)