# Load best result from each modelresults <-tribble(~model, ~recipe, ~file,"XGBoost", "EDA", "results/tuning/xgb_eda.rds","Random Forest", "EDA", "results/tuning/rf_eda.rds","SVM-RBF", "EDA", "results/tuning/svm_eda.rds","MARS", "EDA", "results/tuning/mars_eda.rds","Linear", "Interactions", "results/tuning/lm_interactions.rds","Elastic Net", "Interactions", "results/tuning/en_interactions.rds","Lasso", "Interactions", "results/tuning/lasso_interactions.rds","KNN", "EDA", "results/tuning/knn_eda.rds")extract_best_rmse <-function(file) { result <-read_rds(file) best <-show_best(result, metric ="rmse", n =1)tibble(mean = best$mean, std_err = best$std_err)}cv_summary <- results %>%mutate(metrics =map(file, extract_best_rmse)) %>%unnest(metrics) %>%select(model, recipe, mean, std_err) %>%arrange(mean)cv_summary %>%gt() %>%cols_label(model ="Model",recipe ="Recipe",mean ="CV RMSE",std_err ="Std Error" ) %>%fmt_number(columns =c(mean, std_err), decimals =4) %>%tab_style(style =cell_fill(color ="#e8f5e9"),locations =cells_body(rows =1) ) %>%tab_header(title ="Cross-Validation RMSE by Model")
Cross-Validation RMSE by Model
Model
Recipe
CV RMSE
Std Error
XGBoost
EDA
0.1438
0.0010
Random Forest
EDA
0.1536
0.0009
SVM-RBF
EDA
0.1586
0.0014
MARS
EDA
0.1643
0.0010
Elastic Net
Interactions
0.2174
0.0014
Lasso
Interactions
0.2174
0.0014
Linear
Interactions
0.2174
0.0014
KNN
EDA
0.2326
0.0014
Code
cv_summary %>%mutate(model =fct_reorder(model, mean)) %>%ggplot(aes(x = mean, y = model)) +geom_point(size =3) +geom_errorbarh(aes(xmin = mean - std_err, xmax = mean + std_err),height =0.2 ) +labs(title ="Cross-validated RMSE by model",x ="RMSE (log scale)",y =NULL ) +theme_minimal()
Winning Model: XGBoost + EDA Recipe
Code
xgb_eda_result <-read_rds("results/tuning/xgb_eda.rds")best_params <-select_best(xgb_eda_result, metric ="rmse")final_spec <-boost_tree(trees = best_params$trees,tree_depth = best_params$tree_depth,learn_rate = best_params$learn_rate,loss_reduction = best_params$loss_reduction,min_n = best_params$min_n) %>%set_engine("xgboost") %>%set_mode("regression")final_workflow <-workflow() %>%add_recipe(recipe_eda) %>%add_model(final_spec)# Fit on full training setfinal_fit <-fit(final_workflow, data = miami_train)write_rds(final_fit, "results/final_fit.rds")message("✓ Final model fit saved.")