TPO Heat Map Analysis
TPOSampleJoined %>%
ggplot(aes(log10(TOT_MCF))) +
geom_density() +
scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30)) +
labs(title = "Logged Volume Distribution - Unfiltered", x = "Log(10) MCF", y = "Density")
TPOSampleJoined %>%
filter(TOT_MCF < 16000 & Pro_Rad < 200 & !is.na(Pro_Rad)) %>%
ggplot(aes(log10(TOT_MCF))) +
geom_density() +
scale_x_continuous(limits = c(-2,5), breaks = c(seq(-2,5,1))) +
scale_y_continuous(limits = c, breaks = c(seq(0,.7,.1))) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30)) +
labs(title = "Logged Volume Distribution - Model Obs. Only", x = "Log(10) MCF", y = "Density")
TPOSampleJoined %>%
ggplot(aes(sqrt(Pro_Rad)))+
geom_density() +
scale_x_continuous(limits = c(0, 20), breaks = c(seq(0,20,2))) +
scale_y_continuous(limits = c(0,.25), breaks = c(seq(0,.25,.05))) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30)) +
labs(title = "SqRt Radius Distribution", x = "SqRt Procurement Radius", y = "Density")
Agg <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad)) %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2) +
stat_qq_line() +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
L <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Lake") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2, color = "blue") +
stat_qq_line(color = "blue") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
NE <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "NewEngland") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2, color = "red") +
stat_qq_line(color = "red") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
MA <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "MidAtl") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2, color = "orange") +
stat_qq_line(color = "orange") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
P <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Plains") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2, color = "brown") +
stat_qq_line(color = "brown") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
C <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Central") %>%
ggplot(aes(sample = log10(TOT_MCF))) +
stat_qq(geom = "point", size = 2, color = "darkgreen") +
stat_qq_line(color = "darkgreen") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(-2,5,1)), limits = c(-2,5.2)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif'))
annotate_figure(Test,
left = textGrob("Log10(MCF)", rot = 90, vjust = .4, gp = gpar(cex = 4, fontfamily = "serif")),
bottom = textGrob("Theoretical Quantiles",gp = gpar(cex = 4, fontfamily = "serif")),
top = textGrob("Normal-QQ Plots - Log10(MCF)", gp = gpar(cex = 6, fontfamily = "serif")))
rm(Agg,C,L,MA,NE,P)
Agg <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Pro_Rad < 200) %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2) +
stat_qq_line() +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,5)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.1,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
L <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Lake") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2, color = "blue") +
stat_qq_line(color = "blue") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
NE <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "NewEngland") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2, color = "red") +
stat_qq_line(color = "red") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
MA <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "MidAtl") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2, color = "orange") +
stat_qq_line(color = "orange") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
P <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Plains") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2, color = "brown") +
stat_qq_line(color = "brown") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
C <- TPOSampleJoined %>%
filter(!is.na(Pro_Rad) & Region == "Central") %>%
ggplot(aes(sample = sqrt(Pro_Rad))) +
stat_qq(geom = "point", size = 2, color = "darkgreen") +
stat_qq_line(color = "darkgreen") +
scale_x_continuous(breaks = c(seq(-2,2,1)), limits = c(-4,4)) +
scale_y_continuous(breaks = c(seq(0,15,3)), limits = c(-.4,15)) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(legend.position = "none", title = element_text(size = 20, family = "serif"))
Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif'))
annotate_figure(Test,
left = textGrob("Sqrt(Procurement Radius)", rot = 90, gp = gpar(cex = 4, fontfamily = "serif")),
bottom = textGrob("Theoretical Quantiles",gp = gpar(cex = 4, fontfamily = "serif")),
top = textGrob("Normal-QQ Plots - Procurement Radius", gp = gpar(cex = 6, fontfamily = "serif")))
rm(Agg,C,L,MA,NE,P)
# ggplot(TPOSampleJoined, aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
# geom_point() +
# geom_smooth(se = FALSE, method = "lm", fullrange = TRUE) +
# scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
# scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
# facet_wrap(~Region, ncol = 3) +
# theme_fivethirtyeight(base_size = 20, base_family = "serif") +
# theme(legend.position = "none", title = element_text(size = 20, family = "serif")) +
# labs(title = "Regression Observations") +
# ggpubr::stat_regline_equation(label.x = 1, label.y = 14, aes(label = ..rr.label..))
Agg <- TPOSampleJoined %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3) +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "black") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
C <- TPOSampleJoined %>%
filter(Region == "Central") %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3, color = "darkgreen") +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "darkgreen") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
L <- TPOSampleJoined %>%
filter(Region == "Lake") %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3, color = "blue") +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "blue") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
MA <- TPOSampleJoined %>%
filter(Region == "MidAtl") %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3, color = "orange") +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "orange") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
NE <- TPOSampleJoined %>%
filter(Region == "NewEngland") %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3, color = "red") +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "red") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
P <- TPOSampleJoined %>%
filter(Region == "Plains") %>%
ggplot(aes(LOG_TOT_MCF, Pro_Rad_SqRt)) +
geom_point(size = 3, color = "brown") +
geom_smooth(se = FALSE, method = "lm", fullrange = TRUE, color = "brown") +
scale_x_continuous(limits = c(-2, 4.5), breaks = c(seq(-2,4.5,.5))) +
scale_y_continuous(limits = c(0, 14), breaks = c(seq(0,14,2))) +
theme_fivethirtyeight(base_size = 30, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_blank(), axis.title.y = element_blank()) +
ggpubr::stat_regline_equation(label.x = 1, label.y = 14, size = 10, aes(label = ..rr.label..))
Test <- ggarrange(plotlist = list(Agg,C,L,MA,NE,P), labels = list("Agg.", "Central", "Lake", "MidAtl", "NewEng", "Plains"), font.label = list(size = 50, face = "plain", color ="black", family = 'serif'))
annotate_figure(Test,
left = textGrob("Sqrt(Procurement Radius)", rot = 90, gp = gpar(cex = 4, fontfamily = "serif")),
bottom = textGrob("Log(10) MCF",gp = gpar(cex = 4, fontfamily = "serif")),
top = textGrob("Regression Observations", gp = gpar(cex = 6, fontfamily = "serif")))
rm(Agg,C,L,MA,NE,P)
# Initialize a linear regression object, linear_model
linear_model <- linear_reg() %>%
set_engine("lm") %>%
set_mode("regression")
lm_fit <- linear_model %>%
fit(Pro_Rad_SqRt ~ LOG_TOT_MCF * Region, data = data_training)
VI <- as.data.frame(lm_fit$fit$coefficients) %>%
rownames_to_column(var = "variable") %>%
rename(scaled_importance = 'lm_fit$fit$coefficients')
VI <- VI[2:10,]
VI <- VI %>%
mutate(scaled_importance = abs(1/(1-(scaled_importance^2))),
scaled_importance = scaled_importance/max(scaled_importance))
VI %>%
mutate(variable = as.factor(variable),
variable = fct_reorder(variable, scaled_importance)) %>%
filter(scaled_importance > 0) %>%
ggplot(aes(y = variable, x = scaled_importance)) +
geom_col() +
scale_x_continuous(breaks = c(seq(0,1,.1)), limits = c(0,1)) +
labs(title = "Linear Regression - Scaled Variable Importance Scores", subtitle = "Scores calculated using test data through R-Squared values.",y = "Scaled VI Score", x = "Variable") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 30))
# Predict outcome
predictions <- predict(lm_fit, new_data = data_test)
# Combine test data with predictions
data_test_results <- data_test %>%
select(LOG_TOT_MCF, Region, Pro_Rad_SqRt) %>%
bind_cols(predictions)
# data_test_results <- data_test_results %>%
# mutate(Pro_Rad_SqRt = Pro_Rad_SqRt^2,
# .pred = .pred^2)
lin_testdata <- data_test_results %>%
mutate(residuals = abs(Pro_Rad_SqRt - .pred))
lin_test_results <- lin_testdata %>%
select(Pro_Rad_SqRt, .pred) %>%
bind_cols(mean_res = mean(lin_testdata$residuals))
lin_rmse <- round(rmse(lin_test_results$.pred, lin_test_results$Pro_Rad_SqRt), 3)
lin_mae <- round(mae_vec(lin_test_results$.pred, lin_test_results$Pro_Rad_SqRt), 3)
lin_r2 <- round(R2_Score(lin_test_results$.pred, lin_test_results$Pro_Rad_SqRt), 3)
paste0("Overall RMSE = ", round(rmse(lin_test_results$.pred, lin_test_results$Pro_Rad_SqRt), 3))
[1] "Overall RMSE = 2.074"
test <- lin_testdata %>%
dplyr::group_by(Region) %>%
summarise_at(vars(residuals), list(MeanRes = mean))
test2 <- lin_testdata %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, .pred))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
ggplot(lin_testdata, aes(.pred, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(1, 9), breaks = c(seq(1,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "Linear Regression - Test Data Residual Plot", subtitle = "SqRt(Procurement Radius)" , x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
#EVALUATE THE MODEL BEFORE PROCEEDING, then use last_fit() to fit on the test dataset
# Define a linear regression model
linear_model <- linear_reg() %>%
set_engine('lm') %>%
set_mode('regression')
# Train linear_model with last_fit()
linear_fit <- linear_model %>%
last_fit(Pro_Rad_SqRt ~ LOG_TOT_MCF * Region, split = data_split)
# Collect predictions
lin_finalpreds <- linear_fit %>%
collect_predictions() %>%
mutate(Res = abs(Pro_Rad_SqRt - .pred))
rm(linear_fit, linear_model, lm_fit, predictions, data_test_results, lin_testdata)
# Initialize a linear regression object, linear_model
mgcv_model <- gen_additive_mod() %>%
set_engine("mgcv") %>%
set_mode("regression")
mgcv_fit <- mgcv_model %>%
fit(Pro_Rad_SqRt ~ s(LOG_TOT_MCF, by = Region) + Region,
data = data_training, method = "REML")
VI <- as.data.frame(mgcv_fit$fit$coefficients) %>%
rownames_to_column(var = "variable") %>%
rename(scaled_importance = 'mgcv_fit$fit$coefficients')
VI <- VI[2:10,]
VI <- VI %>%
mutate(scaled_importance = abs(1/(1-(scaled_importance^2))),
scaled_importance = scaled_importance/max(scaled_importance))
VI %>%
mutate(variable = as.factor(variable),
variable = fct_reorder(variable, scaled_importance)) %>%
filter(scaled_importance > 0) %>%
ggplot(aes(y = variable, x = scaled_importance)) +
geom_col() +
scale_x_continuous(breaks = c(seq(0,1,.1)), limits = c(0,1)) +
labs(title = "Additive Regression - Scaled Variable Importance Scores", subtitle = "Scores calculated using test data through R-Squared values.",y = "Scaled VI Score", x = "Variable") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 30))
# Predict outcome
mgcv_testpreds <- predict(mgcv_fit, new_data = data_test)
mgcv_finalpreds <- predict(mgcv_fit, new_data = Test_Data)
# Combine test data with predictions
mgcv_testpreds <- data_test %>%
select(LOG_TOT_MCF, Region, Pro_Rad_SqRt) %>%
bind_cols(mgcv_testpreds) %>%
mutate(residuals = abs(Pro_Rad_SqRt - .pred))
mgcv_rmse <- round(rmse(mgcv_testpreds$.pred, mgcv_testpreds$Pro_Rad_SqRt), 3)
mgcv_mae <- round(mae_vec(mgcv_testpreds$.pred, mgcv_testpreds$Pro_Rad_SqRt), 3)
mgcv_r2 <- round(R2_Score(mgcv_testpreds$.pred, mgcv_testpreds$Pro_Rad_SqRt), 3)
paste0("Overall RMSE = ", round(rmse(mgcv_testpreds$.pred, mgcv_testpreds$Pro_Rad_SqRt), 3))
[1] "Overall RMSE = 2.098"
# Combine final data with predictions
mgcv_finalpreds <- Test_Data %>%
select(LOG_TOT_MCF, Region, Pro_Rad_SqRt) %>%
bind_cols(mgcv_finalpreds) %>%
mutate(residuals = abs(Pro_Rad_SqRt - .pred))
test <- mgcv_testpreds %>%
dplyr::group_by(Region) %>%
summarise_at(vars(residuals), list(MeanRes = mean))
test2 <- mgcv_testpreds %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, .pred))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
ggplot(mgcv_testpreds, aes(.pred, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(1, 9), breaks = c(seq(1,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "Additive Regression - Test Data Residual Plot", subtitle = "SqRt(Procurement Radius)",
x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
# mgcv_testpreds <- mgcv_testpreds %>%
# rmse(Pro_Rad_SqRt, .pred) %>%
# bind_cols(mean_res = mean(mgcv_testpreds$residuals))
#
# mgcv_finalpreds <- mgcv_finalpreds %>%
# rmse(Pro_Rad_SqRt, .pred) %>%
# bind_cols(mean_res = mean(mgcv_finalpreds$residuals))
rm(mgcv_fit, mgcv_model)
dt_model <- decision_tree() %>%
set_engine('rpart') %>%
set_mode('regression')
# Build feature engineering pipeline
data_recipe <- recipe(Pro_Rad_SqRt ~ ., data = data_training) %>%
# Correlation filter
step_corr(all_numeric(), threshold = .8) %>%
# Create dummy variables
step_dummy(all_nominal(), -all_outcomes())
# Create a workflow
data_wkfl <- workflow() %>%
# Include the model object
add_model(dt_model) %>%
# Include the recipe object
add_recipe(data_recipe)
# Train the workflow
data_wkfl_fit <- data_wkfl %>%
last_fit(split = data_split)
# Calculate performance metrics on test data
data_wkfl_fit %>%
collect_metrics(summarize = FALSE)
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 2.18 Preprocessor1_Model1
2 rsq standard 0.0738 Preprocessor1_Model1
# Create cross validation folds
set.seed(260)
data_folds <- vfold_cv(data_training, v = 5, strata = Pro_Rad_SqRt)
# Create custom metrics function
data_metrics <- metric_set(yardstick::rmse)
# Fit resamples
data_rs <- data_wkfl %>%
fit_resamples(resamples = data_folds)
# Detailed cross validation results
data_results <- data_rs %>%
collect_metrics(summarize = FALSE)
# Set tuning hyperparameters
dt_tune_model <- decision_tree(tree_depth = tune(), min_n = tune()) %>%
set_engine('rpart') %>%
set_mode('regression')
# Create a tuning workflow
data_tune_wkfl <- data_wkfl %>%
# Replace model
update_model(dt_tune_model)
# Hyperparameter tuning with grid search
set.seed(284)
dt_grid <- grid_random(parameters(dt_tune_model), size = 5)
# Hyperparameter tuning
dt_tuning <- data_tune_wkfl %>%
tune_grid(resamples = data_folds, grid = dt_grid)
# Collect detailed tuning results
dt_tuning_results <- dt_tuning %>%
collect_metrics(summarize = FALSE)
# Display 5 best performing models
Test <- dt_tuning %>% show_best(metric = 'rmse', n = 5)
# Select based on best performance
best_dt_model <- dt_tuning %>%
# Choose the best model based on roc_auc
select_best(metric = 'rmse')
# Finalize your workflow
final_data_wkfl <- data_tune_wkfl %>%
finalize_workflow(best_dt_model)
# Train finalized decision tree workflow
data_final_fit <- final_data_wkfl %>%
last_fit(split = data_split)
# View performance metrics
data_final_fit %>% collect_metrics()
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 2.18 Preprocessor1_Model1
2 rsq standard 0.0738 Preprocessor1_Model1
# Create an ROC curve
dt_testpreds <- data_final_fit %>%
# Collect predictions
collect_predictions() %>%
mutate(Res = abs(Pro_Rad_SqRt - .pred)) %>%
cbind(data_test[,3]) %>%
select(-c(id, .row, .config))
test <- dt_testpreds %>%
dplyr::group_by(Region) %>%
summarise_at(vars(Res), list(MeanRes = mean))
test2 <- dt_testpreds %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, .pred))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
dtree_rmse <- round(rmse(dt_testpreds$.pred, dt_testpreds$Pro_Rad_SqRt), 3)
dtree_mae <- round(mae_vec(dt_testpreds$.pred, dt_testpreds$Pro_Rad_SqRt), 3)
dtree_r2 <- round(R2_Score(dt_testpreds$.pred, dt_testpreds$Pro_Rad_SqRt), 3)
paste0("Overall RMSE = ", round(rmse(dt_testpreds$.pred, dt_testpreds$Pro_Rad_SqRt), 3))
[1] "Overall RMSE = 2.18"
dt_testpreds %>%
ggplot(aes(.pred, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(1, 9), breaks = c(seq(1,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "Decision Tree Regression - Test Data Residual Plot",
subtitle = "SqRt(Procurement Radius)" ,x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
rf_model <- rand_forest(trees = 1000) %>%
set_engine('ranger') %>%
set_mode('regression')
# Build feature engineering pipeline
data_recipe <- recipe(Pro_Rad_SqRt ~ ., data = data_training) %>%
# Correlation filter
step_corr(all_numeric(), threshold = .8) %>%
# Create dummy variables
step_dummy(all_nominal(), -all_outcomes())
# Create a workflow
data_wkfl <- workflow() %>%
# Include the model object
add_model(rf_model) %>%
# Include the recipe object
add_recipe(data_recipe)
# Train the workflow
data_wkfl_fit <- data_wkfl %>%
last_fit(split = data_split)
# Calculate performance metrics on test data
data_wkfl_fit %>%
collect_metrics(summarize = FALSE)
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 2.06 Preprocessor1_Model1
2 rsq standard 0.150 Preprocessor1_Model1
# Create cross validation folds
set.seed(260)
data_folds <- vfold_cv(data_training, v = 5, strata = Pro_Rad_SqRt)
# Create custom metrics function
data_metrics <- metric_set(yardstick::rmse)
# Fit resamples
data_rs <- data_wkfl %>%
fit_resamples(resamples = data_folds)
# Detailed cross validation results
data_results <- data_rs %>%
collect_metrics(summarize = FALSE)
# Set tuning hyperparameters
rf_tune_model <- rand_forest(trees = tune(), mtry = tune()) %>%
set_engine('ranger') %>%
set_mode('regression')
# Create a tuning workflow
data_tune_wkfl <- data_wkfl %>%
# Replace model
update_model(rf_tune_model)
# Hyperparameter tuning with grid search
set.seed(284)
# Hyperparameter tuning
rf_tuning <- data_tune_wkfl %>%
tune_grid(resamples = data_folds)
# Collect detailed tuning results
rf_tuning_results <- rf_tuning %>%
collect_metrics(summarize = FALSE)
# Display 5 best performing models
Test <- rf_tuning %>% show_best(metric = 'rmse', n = 5)
# Select based on best performance
best_rf_model <- rf_tuning %>%
# Choose the best model based on rmse
select_best(metric = 'rmse')
# Finalize your workflow
final_data_wkfl <- data_tune_wkfl %>%
finalize_workflow(best_rf_model)
# Train finalized decision tree workflow
data_final_fit <- final_data_wkfl %>%
last_fit(split = data_split)
# View performance metrics
data_final_fit %>% collect_metrics()
# A tibble: 2 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 rmse standard 2.06 Preprocessor1_Model1
2 rsq standard 0.147 Preprocessor1_Model1
# Create an ROC curve
rf_testpreds <- data_final_fit %>%
# Collect predictions
collect_predictions() %>%
mutate(Res = abs(Pro_Rad_SqRt - .pred)) %>%
cbind(data_test[,3]) %>%
select(-c(id, .row, .config))
test <- rf_testpreds %>%
dplyr::group_by(Region) %>%
summarise_at(vars(Res), list(MeanRes = mean))
test2 <- rf_testpreds %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, .pred))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
rf_rmse <- round(rmse(rf_testpreds$.pred, rf_testpreds$Pro_Rad_SqRt), 3)
rf_mae <- round(mae_vec(rf_testpreds$.pred, rf_testpreds$Pro_Rad_SqRt), 3)
rf_r2 <- round(R2_Score(rf_testpreds$.pred, rf_testpreds$Pro_Rad_SqRt), 3)
rf_testpreds %>%
ggplot(aes(.pred, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(1, 9), breaks = c(seq(1,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "Random Forest Regression - Test Data Residual Plot",
subtitle = "SqRt(Procurement Radius)" , x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
# Specify the model class
xg_model <- boost_tree(
trees = 1000,
learn_rate = tune(),
tree_depth = tune(),
sample_size = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Create the tuning grid
tunegrid_boost <- grid_regular(parameters(xg_model),
levels = 5)
# Create CV folds of training data
folds <- vfold_cv(data_training, 5, strata = Pro_Rad_SqRt)
# Tune along the grid
tune_results <- tune_grid(xg_model,
Pro_Rad_SqRt ~ .,
resamples = folds,
grid = tunegrid_boost,
metrics = metric_set(yardstick::rmse))
best_params <- select_best(tune_results)
# Finalize the specification
final_spec <- finalize_model(xg_model, best_params)
# Train the final model on the full training data
final_model <- final_spec %>% fit(Pro_Rad_SqRt ~ ., data_training)
# Specify, fit, predict, and combine with training data
xg_testpreds <- final_model %>%
predict(data_test)
xg_testpreds <- data_test %>%
cbind(xg_testpreds) %>%
mutate(Res = abs(Pro_Rad_SqRt - .pred))
test <- xg_testpreds %>%
dplyr::group_by(Region) %>%
summarise_at(vars(Res), list(MeanRes = mean))
test2 <- xg_testpreds %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, .pred))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
xg_rmse <- round(rmse(xg_testpreds$.pred, xg_testpreds$Pro_Rad_SqRt), 3)
xg_mae <- round(mae_vec(xg_testpreds$.pred, xg_testpreds$Pro_Rad_SqRt), 3)
xg_r2 <- round(R2_Score(xg_testpreds$.pred, xg_testpreds$Pro_Rad_SqRt), 3)
xg_testpreds %>%
ggplot(aes(.pred, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(1, 9), breaks = c(seq(1,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30),
axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "Gradient Boosted Regression - Test Data Residual Plot",
subtitle = "SqRt(Procurement Radius)" , x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
VI <- h2o.varimp(leader)
VI <- VI %>%
mutate(variable = case_when(variable == "LOG_TOT_MCF" ~ "Log10(MCF)",
variable == "MILL_STATE" ~ "State",
variable == "MILL_TYPE_CD" ~ "Mill Type",
variable == "Region" ~ "Region"))
VI %>%
mutate(variable = as.factor(variable),
variable = fct_reorder(variable, scaled_importance)) %>%
filter(scaled_importance > 0) %>%
ggplot(aes(y = variable, x = scaled_importance)) +
geom_col() +
scale_x_continuous(breaks = c(seq(0,1,.1)), limits = c(0,1)) +
labs(title = "h2o Regression - Scaled Variable Importance Scores",
subtitle = "Scores calculated using test data.", y = "Scaled VI Score", x = "Variable") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 30))
head(lb)
model_id rmse mse
1 GBM_grid_1_AutoML_1_20221026_163026_model_10 2.029082 4.117172
2 GBM_grid_1_AutoML_1_20221026_163026_model_2 2.044099 4.178339
3 GBM_grid_1_AutoML_1_20221026_163026_model_8 2.049108 4.198843
4 GBM_grid_1_AutoML_1_20221026_163026_model_11 2.050137 4.203060
5 GBM_1_AutoML_1_20221026_163026 2.056010 4.227176
6 GLM_1_AutoML_1_20221026_163026 2.058550 4.237629
mae rmsle mean_residual_deviance
1 1.550014 0.2761806 4.117172
2 1.565655 0.2810973 4.178339
3 1.536726 0.2791560 4.198843
4 1.579500 0.2793606 4.203060
5 1.553396 0.2796475 4.227176
6 1.562654 0.2794918 4.237629
h2o_testpreds <- as.data.frame(valid) %>%
cbind(pred) %>%
mutate(Res = abs(predict - Pro_Rad_SqRt))
test <- h2o_testpreds %>%
dplyr::group_by(Region) %>%
summarise_at(vars(Res), list(MeanRes = mean))
test2 <- h2o_testpreds %>%
dplyr::group_by(Region) %>%
summarise(rmse = rmse(Pro_Rad_SqRt, predict))
test <- test %>%
left_join(test2, by = 'Region') %>%
mutate(MeanRes = round(MeanRes, 2),
rmse = round(rmse, 2))
rm(test2)
h2o_rmse <- round(rmse(h2o_testpreds$predict, h2o_testpreds$Pro_Rad_SqRt), 3)
h2o_mae <- round(mae_vec(h2o_testpreds$predict, h2o_testpreds$Pro_Rad_SqRt), 3)
h2o_r2 <- round(R2_Score(h2o_testpreds$predict, h2o_testpreds$Pro_Rad_SqRt), 3)
paste0("Overall RMSE = ", h2o_rmse)
[1] "Overall RMSE = 2.029"
h2o_testpreds %>%
ggplot(aes(predict, Pro_Rad_SqRt)) +
geom_point(size = 1) +
geom_abline() +
scale_x_continuous(limits = c(3, 9), breaks = c(seq(3,9,1))) +
scale_y_continuous(limits = c(0, 13), breaks = c(seq(0,13,1))) +
facet_wrap(~Region, nrow = 2) +
theme_fivethirtyeight(base_size = 20, base_family = "serif") +
theme(axis.title = element_text(family = 'serif', size = 30), axis.title.x = element_text(family = 'serif', size = 30),
axis.title.y = element_text(family = 'serif', size = 30)) +
labs(title = "h2o Regression - Test Data Residual Plot",
subtitle = "SqRt(Procurement Radius)" , x = "Prediction", y = "Observation") +
geom_text(x = 4.3, y = 11, aes(label = MeanRes, size = 4), data = test) +
annotate("text", x = 3.5, y = 11, label = "Mean Res: ") +
geom_text(x = 4.3, y = 12, aes(label = rmse, size = 4), data = test) +
annotate("text", x = 3.65, y = 12, label = "RMSE: ") +
theme(legend.position="none") +
ggpubr::stat_regline_equation(label.x = 5.75, label.y = 13, aes(label = ..rr.label..))
RMSEs <- lin_rmse %>%
cbind(mgcv_rmse, dtree_rmse, rf_rmse, xg_rmse, h2o_rmse)
RMSEs <- as.data.frame(RMSEs) %>%
rename('Linear' = '.', 'Additive' = 'mgcv_rmse', 'Decision Tree' = 'dtree_rmse',
'Random Forest' = 'rf_rmse', 'Gradient Boosted' = 'xg_rmse' ,'Machine Learning (h2o)' = 'h2o_rmse')
RMSEs <- RMSEs %>%
pivot_longer(names_to = "Model", values_to = 'RMSE', cols = c(1:6)) %>%
mutate(Model = as.factor(Model),
Model = fct_reorder(Model, RMSE))
MAEs <- lin_mae %>%
cbind(mgcv_mae, dtree_mae, rf_mae, xg_mae, h2o_mae)
MAEs <- as.data.frame(MAEs) %>%
rename('Linear' = '.', 'Additive' = 'mgcv_mae', 'Decision Tree' = 'dtree_mae',
'Random Forest' = 'rf_mae', 'Gradient Boosted' = 'xg_mae' , 'Machine Learning (h2o)' = 'h2o_mae')
MAEs <- MAEs %>%
pivot_longer(names_to = "Model", values_to = 'MAE', cols = c(1:6)) %>%
mutate(Model = as.factor(Model),
Model = fct_reorder(Model, MAE))
r2s <- lin_r2 %>%
cbind(mgcv_r2, dtree_r2, rf_r2, xg_r2, h2o_r2)
r2s <- as.data.frame(r2s) %>%
rename('Linear' = '.', 'Additive' = 'mgcv_r2', 'Decision Tree' = 'dtree_r2',
'Random Forest' = 'rf_r2', 'Gradient Boosted' = 'xg_r2' ,'Machine Learning (h2o)' = 'h2o_r2')
r2s <- r2s %>%
pivot_longer(names_to = "Model", values_to = 'r2', cols = c(1:6)) %>%
mutate(Model = as.factor(Model),
Model = fct_reorder(Model, r2))
ModelStats <- RMSEs %>%
left_join(MAEs, by = 'Model') %>%
left_join(r2s, by = 'Model')
rm(lin_rmse, lin_mae, lin_r2, mgcv_rmse, mgcv_mae, mgcv_r2, dtree_rmse, dtree_mae,
dtree_r2, rf_rmse, rf_mae, rf_r2, xg_rmse, xg_mae, xg_r2, h2o_rmse, h2o_mae, h2o_r2, RMSEs, MAEs, r2s, x, y)
pal <- brewer.pal(6, name = "Spectral")
pal <- rev(pal)
ggplot(ModelStats, aes(x = Model, y = RMSE, fill = Model)) +
geom_col(position = "identity") +
scale_y_continuous(limits = c(0,max(ModelStats$RMSE)+.05)) +
scale_fill_manual(values = pal, name = "Model") +
labs(title = "Test Data RMSEs for Assessed Models", y = "RMSE", x = "Model",
subtitle = "Leading models shown for models requiring hyperparameter tuning (all but linear)") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 30), axis.text.x = element_blank(),
axis.title.x = element_blank(), panel.grid.major.x = element_blank(),
legend.text = element_text(size = 24, family = "serif"),
legend.title = element_text(size = 30, family = "serif")) +
geom_text(aes(label = paste0("RMSE: ", RMSE)), size = 6, nudge_y = .05)
ggplot(ModelStats, aes(x = Model, y = MAE, fill = Model)) +
geom_col(position = "identity") +
scale_y_continuous(limits = c(0,max(ModelStats$MAE)+.05)) +
scale_fill_manual(values = pal, name = "Model") +
labs(title = "Test Data MAEs for Assessed Models", y = "MAE", x = "Model",
subtitle = "Leading models shown for models requiring hyperparameter tuning (all but linear)") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 30), axis.text.x = element_blank(),
axis.title.x = element_blank(), panel.grid.major.x = element_blank(),
legend.text = element_text(size = 24, family = "serif"),
legend.title = element_text(size = 30, family = "serif")) +
geom_text(aes(label = paste0("MAE: ", MAE)), size = 6 , nudge_y = .05)
ggplot(ModelStats, aes(x = Model, y = r2, fill = Model)) +
geom_col(position = "identity") +
scale_y_continuous(limits = c(0,.25)) +
scale_fill_manual(values = pal, name = "Model") +
labs(title = "Test Data R-Squared Values for Assessed Models", y = "R-Squared", x = "Model",
subtitle = "Leading models shown for models requiring hyperparameter tuning (all but linear)") +
theme_fivethirtyeight(base_size = 20, base_family = 'serif') +
theme(axis.title = element_text(family = 'serif', size = 20), axis.text.x = element_blank(),
axis.title.x = element_blank(), panel.grid.major.x = element_blank(),
legend.text = element_text(size = 24, family = "serif"),
legend.title = element_text(size = 30, family = "serif")) +
geom_text(aes(label = paste0("R-Squared: " ,r2)), size = 6 , nudge_y = .005)
knitr::kable(ModelStats, digits = 3, align = "cccc", col.names = c("Model", "RMSE", "MAE", "R-Squared"), caption = "Model Fit Statistics for Assessed Models.
All statistics calculated using test data.") %>%
kable_styling(font_size = 16)
| Model | RMSE | MAE | R-Squared |
|---|---|---|---|
| Linear | 2.074 | 1.525 | 0.130 |
| Additive | 2.098 | 1.523 | 0.110 |
| Decision Tree | 2.180 | 1.636 | 0.039 |
| Random Forest | 2.062 | 1.503 | 0.141 |
| Gradient Boosted | 2.053 | 1.489 | 0.148 |
| Machine Learning (h2o) | 2.029 | 1.550 | 0.148 |