This document reproduces all code outputs from Chapter 3: Feature & Target Engineering (Sections 3.1 – 3.5) of the online book Hands-On Machine Learning with R by Bradley Boehmke & Brandon Greenwell.
Note on the dataset:
AmesHousing::make_ames()pre-cleans all missing values. To faithfully demonstrate Section 3.3, we use the raw Ames data (AmesHousing::ames_raw) which retains realNAs, then split it for training/testing.
| Section | Topic |
|---|---|
| 3.1 | Prerequisites |
| 3.2 | Target engineering |
| 3.3 | Dealing with missingness |
| 3.4 | Feature filtering |
| 3.5 | Numeric feature engineering |
# Helper packages
library(dplyr) # data manipulation
library(ggplot2) # graphics
library(visdat) # missing value visualisation
# Feature engineering packages
library(caret) # ML tasks
library(recipes) # feature engineering blueprints
# Supporting packages
library(AmesHousing) # Ames housing dataset (raw + cleaned)
library(rsample) # train/test splitting
library(patchwork) # combining ggplots
library(naniar) # missing data tools
library(e1071) # skewness()
library(corrplot) # correlation heatmap
library(forecast) # BoxCox.lambda()
library(scales) # dollar labels
library(tidyr) # pivot_longer()# Use RAW Ames data so missing values are preserved for Section 3.3
set.seed(123)
ames_raw <- AmesHousing::ames_raw
names(ames_raw) <- make.names(names(ames_raw))
ames_raw <- ames_raw %>% rename(Sale_Price = SalePrice)
# Stratified 75/25 split
ames_raw$price_q <- dplyr::ntile(ames_raw$Sale_Price, 4)
split <- rsample::initial_split(ames_raw, prop = 0.75, strata = "price_q")
ames_train <- rsample::training(split) %>% select(-price_q)
ames_test <- rsample::testing(split) %>% select(-price_q)
cat("Training rows :", nrow(ames_train), "\n")## Training rows : 2196
## Test rows : 734
## Features : 81
## Missing cells (train): 10548
Although not always required, transforming the response variable can lead to predictive improvement, especially with parametric models that assume normally distributed errors.
p1 <- ggplot(ames_train, aes(x = Sale_Price)) +
geom_histogram(bins = 70, fill = "#4472C4", colour = "white", linewidth = 0.2) +
scale_x_continuous(labels = scales::dollar) +
labs(title = "Original Sale Price",
x = "Sale Price ($)", y = "Count") +
theme_bw(base_size = 12)
p2 <- ggplot(ames_train, aes(x = log(Sale_Price))) +
geom_histogram(bins = 70, fill = "#27AE60", colour = "white", linewidth = 0.2) +
labs(title = "log(Sale Price)",
x = "log(Sale Price)", y = "Count") +
theme_bw(base_size = 12)
p1 + p2 +
plot_annotation(
title = "Figure 3.1 - Transforming the response variable",
subtitle = "Right-skewed original vs. approximately normal log-transform"
)Figure 3.1 - Right-skewed Sale_Price distribution
transformed_response <- log(ames_train$Sale_Price)
cat("Skewness (original) :", round(e1071::skewness(ames_train$Sale_Price), 3), "\n")## Skewness (original) : 1.602
## Skewness (log-transformed): -0.126
# Fit lambda on TRAINING data only to prevent data leakage
lambda_bc <- forecast::BoxCox.lambda(ames_train$Sale_Price, method = "loglik")
cat("Optimal Box-Cox lambda:", round(lambda_bc, 4), "\n")## Optimal Box-Cox lambda: 0.05
bc_train <- forecast::BoxCox(ames_train$Sale_Price, lambda_bc)
p_orig <- ggplot(ames_train, aes(x = Sale_Price)) +
geom_histogram(bins = 70, fill = "#7F8C8D", colour = "white", linewidth = 0.2) +
scale_x_continuous(labels = scales::dollar) +
labs(title = "Original", x = "Sale Price ($)", y = "Count") +
theme_bw(base_size = 11)
p_log <- ggplot(mapping = aes(x = log(ames_train$Sale_Price))) +
geom_histogram(bins = 70, fill = "#27AE60", colour = "white", linewidth = 0.2) +
labs(title = "log(Sale Price)", x = "log(Sale Price)", y = "Count") +
theme_bw(base_size = 11)
p_bc <- ggplot(mapping = aes(x = bc_train)) +
geom_histogram(bins = 70, fill = "#C0392B", colour = "white", linewidth = 0.2) +
labs(title = paste0("Box-Cox (lambda = ", round(lambda_bc, 2), ")"),
x = "Box-Cox(Sale Price)", y = "Count") +
theme_bw(base_size = 11)
p_orig + p_log + p_bc +
plot_annotation(
title = "Figure 3.2 - Comparing transformations of Sale_Price",
subtitle = "Box-Cox finds the optimal power transformation automatically"
)Figure 3.2 - Box-Cox transformation of Sale Price
par(mfrow = c(1, 3), mar = c(4, 4, 3, 1))
qqnorm(ames_train$Sale_Price,
main = "QQ - Original", col = "#7F8C8D", pch = 16, cex = 0.4)
qqline(ames_train$Sale_Price, col = "red", lwd = 2)
qqnorm(log(ames_train$Sale_Price),
main = "QQ - log", col = "#27AE60", pch = 16, cex = 0.4)
qqline(log(ames_train$Sale_Price), col = "red", lwd = 2)
qqnorm(bc_train,
main = paste0("QQ - Box-Cox (lam=", round(lambda_bc, 2), ")"),
col = "#C0392B", pch = 16, cex = 0.4)
qqline(bc_train, col = "red", lwd = 2)Figure 3.2b - Normal Q-Q plots
# Recipe version - lambda estimated automatically from training data
ames_recipe_bc <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_BoxCox(all_outcomes())
ames_recipe_bcdata.frame(
Transformation = c(
"Original",
"log(Sale Price)",
paste0("Box-Cox (lambda = ", round(lambda_bc, 3), ")")
),
Skewness = round(c(
e1071::skewness(ames_train$Sale_Price),
e1071::skewness(log(ames_train$Sale_Price)),
e1071::skewness(bc_train)
), 4)
) |>
knitr::kable(caption = "Table 3.1 - Skewness after each transformation")| Transformation | Skewness |
|---|---|
| Original | 1.6016 |
| log(Sale Price) | -0.1256 |
| Box-Cox (lambda = 0.05) | -0.0165 |
miss_summary <- ames_train %>%
summarise(across(everything(), ~ sum(is.na(.)) / n() * 100)) %>%
tidyr::pivot_longer(everything(),
names_to = "Feature",
values_to = "Pct_Missing") %>%
filter(Pct_Missing > 0) %>%
arrange(desc(Pct_Missing))
cat("Features with missing values:", nrow(miss_summary), "\n")## Features with missing values: 26
## Total missing cells : 10548
knitr::kable(
head(miss_summary, 15),
digits = 2,
caption = "Table 3.2 - Top 15 features by % missing (training set)"
)| Feature | Pct_Missing |
|---|---|
| Pool.QC | 99.59 |
| Misc.Feature | 96.72 |
| Alley | 93.17 |
| Fence | 80.92 |
| Fireplace.Qu | 48.82 |
| Lot.Frontage | 16.99 |
| Garage.Yr.Blt | 5.56 |
| Garage.Finish | 5.56 |
| Garage.Qual | 5.56 |
| Garage.Cond | 5.56 |
| Garage.Type | 5.46 |
| Bsmt.Exposure | 2.96 |
| Bsmt.Qual | 2.82 |
| Bsmt.Cond | 2.82 |
| BsmtFin.Type.1 | 2.82 |
visdat::vis_dat(ames_train, sort_type = FALSE) +
labs(title = "Figure 3.3 - Data types and missingness in ames_train")Figure 3.3 - vis_dat overview of training data
visdat::vis_miss(ames_train, cluster = TRUE) +
labs(title = "Figure 3.3b - Clustered missing-value pattern")Figure 3.3b - vis_miss clustered plot
# Subset to only variables that actually have NAs before calling gg_miss_upset
miss_vars <- miss_summary %>% filter(Pct_Missing > 0) %>% pull(Feature)
if (length(miss_vars) >= 2) {
naniar::gg_miss_upset(
ames_train %>% dplyr::select(dplyr::all_of(miss_vars)),
nsets = min(10, length(miss_vars))
)
} else {
cat("Not enough variables with missing data for an UpSet plot.\n")
}Figure 3.3c - UpSet plot of co-occurring missing variables
miss_summary %>%
ggplot(aes(x = reorder(Feature, Pct_Missing), y = Pct_Missing)) +
geom_col(fill = "#4472C4") +
geom_hline(yintercept = 5, linetype = "dashed", colour = "orange", linewidth = 0.7) +
geom_hline(yintercept = 20, linetype = "dashed", colour = "red", linewidth = 0.7) +
coord_flip() +
labs(title = "Figure 3.3d - % Missing per feature (training set)",
subtitle = "Dashed lines at 5% (orange) and 20% (red)",
x = NULL, y = "Missing (%)") +
theme_bw(base_size = 11)Figure 3.3d - % missing per feature bar chart
ames_recipe_imp_stat <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors())
ames_recipe_imp_statprep_stat <- prep(ames_recipe_imp_stat, training = ames_train)
baked_stat <- bake(prep_stat, new_data = ames_train)
p_orig_lf <- ames_train %>%
filter(!is.na(Lot.Frontage)) %>%
ggplot(aes(x = Lot.Frontage)) +
geom_histogram(bins = 40, fill = "#7F8C8D", colour = "white", linewidth = 0.2) +
labs(title = "Original (non-missing only)", x = "Lot Frontage (ft)", y = "Count") +
theme_bw(base_size = 11)
p_med_lf <- baked_stat %>%
ggplot(aes(x = Lot.Frontage)) +
geom_histogram(bins = 40, fill = "#4472C4", colour = "white", linewidth = 0.2) +
labs(title = "After median imputation", x = "Lot Frontage (ft)", y = "Count") +
theme_bw(base_size = 11)
p_orig_lf + p_med_lf +
plot_annotation(title = "Figure 3.4a - Lot.Frontage: original vs. median-imputed")Figure 3.4a - Median imputation for Lot.Frontage
ames_recipe_knn <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_knn(all_predictors(), neighbors = 6)
ames_recipe_knnprep_knn <- prep(ames_recipe_knn, training = ames_train)
baked_knn <- bake(prep_knn, new_data = ames_train)
p_knn_lf <- baked_knn %>%
ggplot(aes(x = Lot.Frontage)) +
geom_histogram(bins = 40, fill = "#27AE60", colour = "white", linewidth = 0.2) +
labs(title = "After KNN imputation (k = 6)", x = "Lot Frontage (ft)", y = "Count") +
theme_bw(base_size = 11)
p_orig_lf + p_med_lf + p_knn_lf +
plot_annotation(title = "Figure 3.4b - Imputation methods compared for Lot.Frontage")Figure 3.4b - KNN vs median imputation for Lot.Frontage
Removing features with near-zero variance (NZV) reduces noise and computational cost.
nzv_metrics <- caret::nearZeroVar(ames_train, saveMetrics = TRUE)
knitr::kable(
nzv_metrics %>%
tibble::rownames_to_column("Feature") %>%
filter(nzv == TRUE) %>%
arrange(desc(percentUnique)) %>%
head(20),
digits = 3,
caption = "Table 3.3 - Near-zero variance features"
)| Feature | freqRatio | percentUnique | zeroVar | nzv |
|---|---|---|---|---|
| Enclosed.Porch | 109.941 | 7.286 | FALSE | TRUE |
| Screen.Porch | 166.917 | 4.690 | FALSE | TRUE |
| Misc.Val | 236.222 | 1.412 | FALSE | TRUE |
| Low.Qual.Fin.SF | 721.000 | 1.366 | FALSE | TRUE |
| X3Ssn.Porch | 724.000 | 0.956 | FALSE | TRUE |
| Pool.Area | 2187.000 | 0.455 | FALSE | TRUE |
| Condition.2 | 197.364 | 0.364 | FALSE | TRUE |
| Functional | 38.491 | 0.364 | FALSE | TRUE |
| Roof.Matl | 135.438 | 0.319 | FALSE | TRUE |
| BsmtFin.Type.2 | 24.312 | 0.273 | FALSE | TRUE |
| Heating | 98.091 | 0.273 | FALSE | TRUE |
| Bsmt.Cond | 23.747 | 0.228 | FALSE | TRUE |
| Garage.Qual | 22.318 | 0.228 | FALSE | TRUE |
| Garage.Cond | 36.944 | 0.228 | FALSE | TRUE |
| Land.Contour | 21.151 | 0.182 | FALSE | TRUE |
| Kitchen.AbvGr | 20.920 | 0.182 | FALSE | TRUE |
| Utilities | 1096.500 | 0.137 | FALSE | TRUE |
| Land.Slope | 20.624 | 0.137 | FALSE | TRUE |
| Street | 218.600 | 0.091 | FALSE | TRUE |
## Total NZV features : 19
## Remaining after removal: 62
nzv_metrics %>%
tibble::rownames_to_column("Feature") %>%
filter(Feature != "Sale_Price") %>%
ggplot(aes(x = reorder(Feature, freqRatio),
y = log1p(freqRatio),
fill = nzv)) +
geom_col() +
scale_fill_manual(
values = c("TRUE" = "#C0392B", "FALSE" = "#4472C4"),
name = "Near-zero\nvariance"
) +
coord_flip() +
labs(
title = "Figure 3.5 - log(1 + Frequency Ratio) per feature",
subtitle = "Red = flagged as near-zero variance",
x = NULL, y = "log(1 + Frequency Ratio)"
) +
theme_bw(base_size = 8) +
theme(axis.text.y = element_text(size = 6))Figure 3.5 - Frequency ratio per feature
ames_recipe_nzv <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_nzv(all_predictors())
prep_nzv <- prep(ames_recipe_nzv, training = ames_train)
baked_nzv <- bake(prep_nzv, new_data = ames_train)
cat("Features before NZV removal:", ncol(ames_train) - 1, "\n")## Features before NZV removal: 81
## Features after NZV removal: 58
num_baked <- baked_nzv %>% select(where(is.numeric))
top20_names <- cor(num_baked)[, "Sale_Price"] %>%
abs() %>%
sort(decreasing = TRUE) %>%
names() %>%
.[. != "Sale_Price"] %>%
head(20)
cor_mat <- cor(num_baked %>% select(all_of(top20_names)))
high_cor_idx <- caret::findCorrelation(cor_mat, cutoff = 0.80)
cat("Features to remove (|r| > 0.80):", length(high_cor_idx), "\n")## Features to remove (|r| > 0.80): 2
corrplot::corrplot(
cor_mat,
method = "color",
type = "lower",
tl.cex = 0.75,
tl.col = "black",
diag = FALSE,
title = "Figure 3.5b - Correlation: top-20 numeric predictors",
mar = c(0, 0, 2, 0)
)Figure 3.5b - Correlation heatmap of top 20 numeric predictors
Skewed predictors can violate model assumptions and hurt distance-based algorithms.
skew_vals <- num_baked %>%
select(-Sale_Price) %>%
summarise(across(everything(),
~ e1071::skewness(., na.rm = TRUE))) %>%
tidyr::pivot_longer(everything(),
names_to = "Feature",
values_to = "Skewness") %>%
arrange(desc(abs(Skewness)))
knitr::kable(
head(skew_vals, 15),
digits = 3,
caption = "Table 3.4 - Top 15 most skewed numeric predictors"
)| Feature | Skewness |
|---|---|
| Lot.Area | 13.461 |
| Bsmt.Half.Bath | 4.168 |
| BsmtFin.SF.2 | 4.045 |
| Mas.Vnr.Area | 2.673 |
| Open.Porch.SF | 2.609 |
| Lot.Frontage | 1.898 |
| Wood.Deck.SF | 1.837 |
| BsmtFin.SF.1 | 1.455 |
| X1st.Flr.SF | 1.279 |
| Gr.Liv.Area | 1.216 |
| Total.Bsmt.SF | 1.058 |
| Bsmt.Unf.SF | 0.952 |
| X2nd.Flr.SF | 0.868 |
| TotRms.AbvGrd | 0.739 |
| Half.Bath | 0.729 |
top10_skew <- head(skew_vals, 10)$Feature
num_baked %>%
select(all_of(top10_skew)) %>%
tidyr::pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
ggplot(aes(x = Value)) +
geom_histogram(bins = 40, fill = "#4472C4", colour = "white", linewidth = 0.2) +
facet_wrap(~ Feature, scales = "free", ncol = 5) +
labs(
title = "Figure 3.6 - Top-10 most skewed numeric predictors",
subtitle = "Scales are free per panel",
x = NULL, y = "Count"
) +
theme_bw(base_size = 9) +
theme(strip.text = element_text(size = 8, face = "bold"))Figure 3.6 - Distributions of 10 most skewed features
## Most skewed feature: Lot.Area
p_before <- num_baked %>%
ggplot(aes(x = .data[[most_skewed]])) +
geom_histogram(bins = 50, fill = "#7F8C8D", colour = "white", linewidth = 0.2) +
labs(
title = paste0("Before: ", most_skewed,
" (skew = ",
round(e1071::skewness(num_baked[[most_skewed]], na.rm = TRUE), 2), ")"),
x = most_skewed, y = "Count"
) +
theme_bw(base_size = 11)
bc_pred_recipe <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_BoxCox(all_numeric_predictors())
bc_prep <- prep(bc_pred_recipe, training = ames_train)
bc_baked <- bake(bc_prep, new_data = ames_train)
p_after <- bc_baked %>%
ggplot(aes(x = .data[[most_skewed]])) +
geom_histogram(bins = 50, fill = "#C0392B", colour = "white", linewidth = 0.2) +
labs(
title = paste0("After Box-Cox (skew = ",
round(e1071::skewness(bc_baked[[most_skewed]], na.rm = TRUE), 2), ")"),
x = paste0("BoxCox(", most_skewed, ")"), y = "Count"
) +
theme_bw(base_size = 11)
p_before + p_after +
plot_annotation(
title = paste0("Figure 3.7 - Box-Cox transformation of ", most_skewed)
)Figure 3.7 - Box-Cox on most skewed predictor
yj_recipe <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_YeoJohnson(all_numeric_predictors())
yj_prep <- prep(yj_recipe, training = ames_train)
yj_baked <- bake(yj_prep, new_data = ames_train)
p_yj <- yj_baked %>%
ggplot(aes(x = .data[[most_skewed]])) +
geom_histogram(bins = 50, fill = "#27AE60", colour = "white", linewidth = 0.2) +
labs(
title = paste0("Yeo-Johnson (skew = ",
round(e1071::skewness(yj_baked[[most_skewed]], na.rm = TRUE), 2), ")"),
x = paste0("YJ(", most_skewed, ")"), y = "Count"
) +
theme_bw(base_size = 11)
p_before + p_after + p_yj +
plot_annotation(
title = paste0("Figure 3.7b - Box-Cox vs. Yeo-Johnson on ", most_skewed)
)Figure 3.7b - Yeo-Johnson vs Box-Cox comparison
Many ML algorithms require features on a comparable scale. Z-score standardisation: \(z_i = (x_i - \bar{x}) / s\)
feats4 <- c("Lot.Area", "Gr.Liv.Area", "Year.Built", "Garage.Area")
feats4 <- feats4[feats4 %in% names(baked_nzv)]
p_before_scale <- baked_nzv %>%
select(all_of(feats4)) %>%
tidyr::pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
ggplot(aes(x = Value, fill = Feature)) +
geom_histogram(bins = 40, colour = "white", linewidth = 0.2, show.legend = FALSE) +
facet_wrap(~ Feature, scales = "free", ncol = 2) +
labs(title = "Before standardisation", x = NULL, y = "Count") +
theme_bw(base_size = 10)
# step_normalize = step_center + step_scale in one call
norm_recipe <- recipe(Sale_Price ~ ., data = ames_train) %>%
step_impute_median(all_numeric_predictors()) %>%
step_impute_mode(all_nominal_predictors()) %>%
step_nzv(all_predictors()) %>%
step_normalize(all_numeric_predictors())
norm_prep <- prep(norm_recipe, training = ames_train)
norm_baked <- bake(norm_prep, new_data = ames_train)
feats4_norm <- feats4[feats4 %in% names(norm_baked)]
p_after_scale <- norm_baked %>%
select(all_of(feats4_norm)) %>%
tidyr::pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
ggplot(aes(x = Value, fill = Feature)) +
geom_histogram(bins = 40, colour = "white", linewidth = 0.2, show.legend = FALSE) +
facet_wrap(~ Feature, scales = "free", ncol = 2) +
labs(title = "After standardisation (mean = 0, sd = 1)", x = NULL, y = "Count") +
theme_bw(base_size = 10)
p_before_scale / p_after_scale +
plot_annotation(
title = "Figure 3.8 - Effect of z-score standardisation",
subtitle = "Top: raw scale | Bottom: standardised"
)Figure 3.8 - Before and after standardisation
norm_baked %>%
select(all_of(feats4_norm)) %>%
summarise(across(everything(),
list(mean = ~ round(mean(., na.rm = TRUE), 4),
sd = ~ round(sd(., na.rm = TRUE), 4)))) %>%
tidyr::pivot_longer(everything(),
names_to = c("Feature", ".value"),
names_sep = "_(?=[^_]+$)") %>%
knitr::kable(
caption = "Table 3.5 - Means and SDs after normalisation (expect ~0 and ~1)"
)| Feature | mean | sd |
|---|---|---|
| Lot.Area | 0 | 1 |
| Gr.Liv.Area | 0 | 1 |
| Year.Built | 0 | 1 |
| Garage.Area | 0 | 1 |
bind_rows(
baked_nzv %>%
select(all_of(feats4)) %>%
tidyr::pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
mutate(Stage = "Before"),
norm_baked %>%
select(all_of(feats4_norm)) %>%
tidyr::pivot_longer(everything(), names_to = "Feature", values_to = "Value") %>%
mutate(Stage = "After (z-score)")
) %>%
mutate(Stage = factor(Stage, levels = c("Before", "After (z-score)"))) %>%
ggplot(aes(x = Stage, y = Value, fill = Stage)) +
geom_violin(alpha = 0.7, trim = TRUE) +
geom_boxplot(width = 0.12, outlier.size = 0.6, alpha = 0.5) +
scale_fill_manual(values = c("Before" = "#4472C4", "After (z-score)" = "#27AE60")) +
facet_wrap(~ Feature, scales = "free_y", ncol = 2) +
labs(
title = "Figure 3.8b - Violin plots before & after standardisation",
x = NULL, y = "Value", fill = NULL
) +
theme_bw(base_size = 11) +
theme(legend.position = "bottom",
strip.text = element_text(face = "bold"))Figure 3.8b - Violin plots before and after standardisation
| Section | Topic | Key_Recipe_Steps |
|---|---|---|
| 3.1 | Prerequisites | initial_split() |
| 3.2 | Target engineering | step_log(), step_BoxCox(), step_YeoJohnson() |
| 3.3 | Dealing with missingness | step_impute_median/mode(), step_impute_knn(), step_impute_bag() |
| 3.4 | Feature filtering | step_nzv(), findCorrelation() |
| 3.5 | Numeric feature engineering | step_BoxCox(), step_YeoJohnson(), step_normalize() |
After knitting this document to HTML in RStudio:
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Taipei
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.3.1 scales_1.4.0 forecast_8.24.0 corrplot_0.95
## [5] e1071_1.7-17 naniar_1.1.0 patchwork_1.3.2 rsample_1.3.1
## [9] AmesHousing_0.0.4 recipes_1.3.1 caret_7.0-1 lattice_0.22-7
## [13] visdat_0.6.0 ggplot2_4.0.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 timeDate_4041.110 farver_2.1.2
## [4] S7_0.2.0 fastmap_1.2.0 pROC_1.19.0.1
## [7] digest_0.6.37 rpart_4.1.24 timechange_0.3.0
## [10] lifecycle_1.0.4 survival_3.8-3 magrittr_2.0.4
## [13] compiler_4.5.1 rlang_1.1.6 sass_0.4.10
## [16] tools_4.5.1 yaml_2.3.10 data.table_1.17.8
## [19] knitr_1.50 labeling_0.4.3 curl_7.0.0
## [22] TTR_0.24.4 plyr_1.8.9 RColorBrewer_1.1-3
## [25] withr_3.0.2 purrr_1.1.0 nnet_7.3-20
## [28] grid_4.5.1 stats4_4.5.1 sparsevctrs_0.3.4
## [31] xts_0.14.1 colorspace_2.1-2 future_1.67.0
## [34] globals_0.18.0 iterators_1.0.14 MASS_7.3-65
## [37] cli_3.6.5 UpSetR_1.4.0 rmarkdown_2.29
## [40] generics_0.1.4 rstudioapi_0.17.1 future.apply_1.20.0
## [43] reshape2_1.4.5 cachem_1.1.0 proxy_0.4-29
## [46] stringr_1.5.2 splines_4.5.1 parallel_4.5.1
## [49] urca_1.3-4 vctrs_0.6.5 hardhat_1.4.2
## [52] Matrix_1.7-3 jsonlite_2.0.0 tseries_0.10-58
## [55] listenv_0.9.1 foreach_1.5.2 gower_1.0.2
## [58] jquerylib_0.1.4 quantmod_0.4.28 glue_1.8.0
## [61] parallelly_1.45.1 codetools_0.2-20 lubridate_1.9.4
## [64] stringi_1.8.7 gtable_0.3.6 quadprog_1.5-8
## [67] lmtest_0.9-40 tibble_3.3.0 pillar_1.11.1
## [70] furrr_0.3.1 htmltools_0.5.8.1 ipred_0.9-15
## [73] lava_1.8.1 R6_2.6.1 evaluate_1.0.5
## [76] fracdiff_1.5-3 bslib_0.9.0 class_7.3-23
## [79] Rcpp_1.1.0 gridExtra_2.3 nlme_3.1-168
## [82] prodlim_2025.04.28 xfun_0.52 zoo_1.8-14
## [85] ModelMetrics_1.2.2.2 pkgconfig_2.0.3