summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

——————————————————————————

Feature Engineering and Selection: A Practical Approach for Predictive Models

by Max Kuhn and Kjell Johnson

——————————————————————————

——————————————————————————

Code requires these packages:

library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.4.3
## ── Attaching packages ────────────────────────────────────── tidymodels 1.4.1 ──
## ✔ broom        1.0.10     ✔ recipes      1.3.1 
## ✔ dials        1.4.2      ✔ rsample      1.3.1 
## ✔ dplyr        1.1.4      ✔ tailor       0.1.0 
## ✔ ggplot2      4.0.0      ✔ tidyr        1.3.1 
## ✔ infer        1.0.9      ✔ tune         2.0.0 
## ✔ modeldata    1.5.1      ✔ workflows    1.3.0 
## ✔ parsnip      1.3.3      ✔ workflowsets 1.1.1 
## ✔ purrr        1.1.0      ✔ yardstick    1.3.2
## Warning: package 'dials' was built under R version 4.4.3
## Warning: package 'scales' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'infer' was built under R version 4.4.3
## Warning: package 'modeldata' was built under R version 4.4.3
## Warning: package 'parsnip' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'recipes' was built under R version 4.4.3
## Warning: package 'rsample' was built under R version 4.4.3
## Warning: package 'tailor' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'tune' was built under R version 4.4.3
## Warning: package 'workflows' was built under R version 4.4.3
## Warning: package 'workflowsets' was built under R version 4.4.3
## Warning: package 'yardstick' was built under R version 4.4.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
values <- c("low", "medium", "high")
dat <- data.frame(x = ordered(values, levels = values))

# https://bookdown.org/max/FES/encodings-for-ordered-data.html#tab:categorical-ordered-table
model.matrix(~ x, dat)
##   (Intercept)           x.L        x.Q
## 1           1 -7.071068e-01  0.4082483
## 2           1 -7.850462e-17 -0.8164966
## 3           1  7.071068e-01  0.4082483
## attr(,"assign")
## [1] 0 1 1
## attr(,"contrasts")
## attr(,"contrasts")$x
## [1] "contr.poly"
# https://bookdown.org/max/FES/encodings-for-ordered-data.html#tab:categorical-ordered-table
recipe(~ x, data = dat) %>% 
  step_dummy(x) %>% 
  prep() %>% 
  juice()
## # A tibble: 3 × 2
##         x_1    x_2
##       <dbl>  <dbl>
## 1 -7.07e- 1  0.408
## 2 -7.85e-17 -0.816
## 3  7.07e- 1  0.408
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_Indonesia.utf8  LC_CTYPE=English_Indonesia.utf8   
## [3] LC_MONETARY=English_Indonesia.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=English_Indonesia.utf8    
## 
## time zone: Asia/Jakarta
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] yardstick_1.3.2    workflowsets_1.1.1 workflows_1.3.0    tune_2.0.0        
##  [5] tidyr_1.3.1        tailor_0.1.0       rsample_1.3.1      recipes_1.3.1     
##  [9] purrr_1.1.0        parsnip_1.3.3      modeldata_1.5.1    infer_1.0.9       
## [13] ggplot2_4.0.0      dplyr_1.1.4        dials_1.4.2        scales_1.4.0      
## [17] broom_1.0.10       tidymodels_1.4.1  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.49           bslib_0.8.0        
##  [4] lattice_0.22-6      vctrs_0.6.5         tools_4.4.1        
##  [7] generics_0.1.3      parallel_4.4.1      tibble_3.3.0       
## [10] pkgconfig_2.0.3     Matrix_1.7-0        data.table_1.16.2  
## [13] RColorBrewer_1.1-3  lhs_1.2.0           S7_0.2.0           
## [16] GPfit_1.0-9         lifecycle_1.0.4     compiler_4.4.1     
## [19] farver_2.1.2        codetools_0.2-20    DiceDesign_1.10    
## [22] htmltools_0.5.8.1   class_7.3-22        sass_0.4.9         
## [25] yaml_2.3.10         prodlim_2025.04.28  furrr_0.3.1        
## [28] pillar_1.11.0       jquerylib_0.1.4     MASS_7.3-60.2      
## [31] cachem_1.1.0        gower_1.0.2         rpart_4.1.23       
## [34] parallelly_1.45.1   lava_1.8.1          tidyselect_1.2.1   
## [37] digest_0.6.37       future_1.67.0       listenv_0.9.1      
## [40] splines_4.4.1       fastmap_1.2.0       grid_4.4.1         
## [43] cli_3.6.5           magrittr_2.0.3      survival_3.6-4     
## [46] future.apply_1.20.0 withr_3.0.2         backports_1.5.0    
## [49] timechange_0.3.0    lubridate_1.9.4     rmarkdown_2.29     
## [52] globals_0.18.0      nnet_7.3-19         timeDate_4041.110  
## [55] evaluate_1.0.1      knitr_1.49          hardhat_1.4.2      
## [58] rlang_1.1.6         Rcpp_1.0.13-1       glue_1.8.0         
## [61] ipred_0.9-15        rstudioapi_0.17.1   jsonlite_1.8.9     
## [64] R6_2.5.1