Group-Assignment-1-Statistical-Modeling.knit

# ---- Installing Packages ----
req <- c("data.table","dplyr","ggplot2","glmnet","pls","caret","scales")
to_install <- setdiff(req, rownames(installed.packages()))
if (length(to_install)) install.packages(to_install, dependencies = TRUE)
invisible(lapply(req, library, character.only = TRUE))

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Loading required package: Matrix

## Loaded glmnet 4.1-10

## 
## Attaching package: 'pls'

## The following object is masked from 'package:stats':
## 
##     loadings

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:pls':
## 
##     R2

set.seed(42)

library(data.table) 

# make sure data.table is attached
parkinsons <- fread("c:/Users/HP/OneDrive/Documents/School Work/Statistical Modeling/parkinsons_updrs.data")

head(parkinsons)

##    subject#   age   sex test_time motor_UPDRS total_UPDRS Jitter(%) Jitter(Abs)
##       <int> <int> <int>     <num>       <num>       <num>     <num>       <num>
## 1:        1    72     0    5.6431      28.199      34.398   0.00662   3.380e-05
## 2:        1    72     0   12.6660      28.447      34.894   0.00300   1.680e-05
## 3:        1    72     0   19.6810      28.695      35.389   0.00481   2.462e-05
## 4:        1    72     0   25.6470      28.905      35.810   0.00528   2.657e-05
## 5:        1    72     0   33.6420      29.187      36.375   0.00335   2.014e-05
## 6:        1    72     0   40.6520      29.435      36.870   0.00353   2.290e-05
##    Jitter:RAP Jitter:PPQ5 Jitter:DDP Shimmer Shimmer(dB) Shimmer:APQ3
##         <num>       <num>      <num>   <num>       <num>        <num>
## 1:    0.00401     0.00317    0.01204 0.02565       0.230      0.01438
## 2:    0.00132     0.00150    0.00395 0.02024       0.179      0.00994
## 3:    0.00205     0.00208    0.00616 0.01675       0.181      0.00734
## 4:    0.00191     0.00264    0.00573 0.02309       0.327      0.01106
## 5:    0.00093     0.00130    0.00278 0.01703       0.176      0.00679
## 6:    0.00119     0.00159    0.00357 0.02227       0.214      0.01006
##    Shimmer:APQ5 Shimmer:APQ11 Shimmer:DDA      NHR    HNR    RPDE     DFA
##           <num>         <num>       <num>    <num>  <num>   <num>   <num>
## 1:      0.01309       0.01662     0.04314 0.014290 21.640 0.41888 0.54842
## 2:      0.01072       0.01689     0.02982 0.011112 27.183 0.43493 0.56477
## 3:      0.00844       0.01458     0.02202 0.020220 23.047 0.46222 0.54405
## 4:      0.01265       0.01963     0.03317 0.027837 24.445 0.48730 0.57794
## 5:      0.00929       0.01819     0.02036 0.011625 26.126 0.47188 0.56122
## 6:      0.01337       0.02263     0.03019 0.009438 22.946 0.53949 0.57243
##        PPE
##      <num>
## 1: 0.16006
## 2: 0.10810
## 3: 0.21014
## 4: 0.33277
## 5: 0.19361
## 6: 0.19500

# quick peek at the first few rows
parkinsons

##       subject#   age   sex test_time motor_UPDRS total_UPDRS Jitter(%)
##          <int> <int> <int>     <num>       <num>       <num>     <num>
##    1:        1    72     0    5.6431      28.199      34.398   0.00662
##    2:        1    72     0   12.6660      28.447      34.894   0.00300
##    3:        1    72     0   19.6810      28.695      35.389   0.00481
##    4:        1    72     0   25.6470      28.905      35.810   0.00528
##    5:        1    72     0   33.6420      29.187      36.375   0.00335
##   ---                                                                 
## 5871:       42    61     0  142.7900      22.485      33.485   0.00406
## 5872:       42    61     0  149.8400      21.988      32.988   0.00297
## 5873:       42    61     0  156.8200      21.495      32.495   0.00349
## 5874:       42    61     0  163.7300      21.007      32.007   0.00281
## 5875:       42    61     0  170.7300      20.513      31.513   0.00282
##       Jitter(Abs) Jitter:RAP Jitter:PPQ5 Jitter:DDP Shimmer Shimmer(dB)
##             <num>      <num>       <num>      <num>   <num>       <num>
##    1:   3.380e-05    0.00401     0.00317    0.01204 0.02565       0.230
##    2:   1.680e-05    0.00132     0.00150    0.00395 0.02024       0.179
##    3:   2.462e-05    0.00205     0.00208    0.00616 0.01675       0.181
##    4:   2.657e-05    0.00191     0.00264    0.00573 0.02309       0.327
##    5:   2.014e-05    0.00093     0.00130    0.00278 0.01703       0.176
##   ---                                                                  
## 5871:   3.113e-05    0.00167     0.00168    0.00500 0.01896       0.160
## 5872:   2.469e-05    0.00119     0.00147    0.00358 0.02315       0.215
## 5873:   2.470e-05    0.00152     0.00187    0.00456 0.02499       0.244
## 5874:   2.034e-05    0.00128     0.00151    0.00383 0.01484       0.131
## 5875:   2.110e-05    0.00135     0.00166    0.00406 0.01907       0.171
##       Shimmer:APQ3 Shimmer:APQ5 Shimmer:APQ11 Shimmer:DDA      NHR    HNR
##              <num>        <num>         <num>       <num>    <num>  <num>
##    1:      0.01438      0.01309       0.01662     0.04314 0.014290 21.640
##    2:      0.00994      0.01072       0.01689     0.02982 0.011112 27.183
##    3:      0.00734      0.00844       0.01458     0.02202 0.020220 23.047
##    4:      0.01106      0.01265       0.01963     0.03317 0.027837 24.445
##    5:      0.00679      0.00929       0.01819     0.02036 0.011625 26.126
##   ---                                                                    
## 5871:      0.00973      0.01133       0.01549     0.02920 0.025137 22.369
## 5872:      0.01052      0.01277       0.01904     0.03157 0.011927 22.886
## 5873:      0.01371      0.01456       0.01877     0.04112 0.017701 25.065
## 5874:      0.00693      0.00870       0.01307     0.02078 0.007984 24.422
## 5875:      0.00946      0.01154       0.01470     0.02839 0.008172 23.259
##          RPDE     DFA     PPE
##         <num>   <num>   <num>
##    1: 0.41888 0.54842 0.16006
##    2: 0.43493 0.56477 0.10810
##    3: 0.46222 0.54405 0.21014
##    4: 0.48730 0.57794 0.33277
##    5: 0.47188 0.56122 0.19361
##   ---                        
## 5871: 0.64215 0.55314 0.21367
## 5872: 0.52598 0.56518 0.12621
## 5873: 0.47792 0.57888 0.14157
## 5874: 0.56865 0.56327 0.14204
## 5875: 0.58608 0.57077 0.15336

# ---- 2) Basic cleaning / naming ----
old_names <- names(parkinsons)                                            
# keep original names in case we ever need to compare

names(parkinsons) <- make.names(names(parkinsons), unique = TRUE)        
# make column names R-safe (no %,: spaces)

if ("sex" %in% names(parkinsons)) parkinsons$sex <- factor(parkinsons$sex)
# treat sex as a factor if present

stopifnot("total_UPDRS" %in% names(parkinsons))                           
# fail early if the response column is missing

parkinsons <- na.omit(parkinsons)                                         
# drop any rows with missing values

str(parkinsons)

## Classes 'data.table' and 'data.frame':   5875 obs. of  22 variables:
##  $ subject.     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ age          : int  72 72 72 72 72 72 72 72 72 72 ...
##  $ sex          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ test_time    : num  5.64 12.67 19.68 25.65 33.64 ...
##  $ motor_UPDRS  : num  28.2 28.4 28.7 28.9 29.2 ...
##  $ total_UPDRS  : num  34.4 34.9 35.4 35.8 36.4 ...
##  $ Jitter...    : num  0.00662 0.003 0.00481 0.00528 0.00335 0.00353 0.00422 0.00476 0.00432 0.00496 ...
##  $ Jitter.Abs.  : num  3.38e-05 1.68e-05 2.46e-05 2.66e-05 2.01e-05 ...
##  $ Jitter.RAP   : num  0.00401 0.00132 0.00205 0.00191 0.00093 0.00119 0.00212 0.00226 0.00156 0.00258 ...
##  $ Jitter.PPQ5  : num  0.00317 0.0015 0.00208 0.00264 0.0013 0.00159 0.00221 0.00259 0.00207 0.00253 ...
##  $ Jitter.DDP   : num  0.01204 0.00395 0.00616 0.00573 0.00278 ...
##  $ Shimmer      : num  0.0256 0.0202 0.0168 0.0231 0.017 ...
##  $ Shimmer.dB.  : num  0.23 0.179 0.181 0.327 0.176 0.214 0.445 0.212 0.371 0.31 ...
##  $ Shimmer.APQ3 : num  0.01438 0.00994 0.00734 0.01106 0.00679 ...
##  $ Shimmer.APQ5 : num  0.01309 0.01072 0.00844 0.01265 0.00929 ...
##  $ Shimmer.APQ11: num  0.0166 0.0169 0.0146 0.0196 0.0182 ...
##  $ Shimmer.DDA  : num  0.0431 0.0298 0.022 0.0332 0.0204 ...
##  $ NHR          : num  0.0143 0.0111 0.0202 0.0278 0.0116 ...
##  $ HNR          : num  21.6 27.2 23 24.4 26.1 ...
##  $ RPDE         : num  0.419 0.435 0.462 0.487 0.472 ...
##  $ DFA          : num  0.548 0.565 0.544 0.578 0.561 ...
##  $ PPE          : num  0.16 0.108 0.21 0.333 0.194 ...
##  - attr(*, ".internal.selfref")=<externalptr>

# structure overview to confirm types

head(parkinsons)

##    subject.   age    sex test_time motor_UPDRS total_UPDRS Jitter...
##       <int> <int> <fctr>     <num>       <num>       <num>     <num>
## 1:        1    72      0    5.6431      28.199      34.398   0.00662
## 2:        1    72      0   12.6660      28.447      34.894   0.00300
## 3:        1    72      0   19.6810      28.695      35.389   0.00481
## 4:        1    72      0   25.6470      28.905      35.810   0.00528
## 5:        1    72      0   33.6420      29.187      36.375   0.00335
## 6:        1    72      0   40.6520      29.435      36.870   0.00353
##    Jitter.Abs. Jitter.RAP Jitter.PPQ5 Jitter.DDP Shimmer Shimmer.dB.
##          <num>      <num>       <num>      <num>   <num>       <num>
## 1:   3.380e-05    0.00401     0.00317    0.01204 0.02565       0.230
## 2:   1.680e-05    0.00132     0.00150    0.00395 0.02024       0.179
## 3:   2.462e-05    0.00205     0.00208    0.00616 0.01675       0.181
## 4:   2.657e-05    0.00191     0.00264    0.00573 0.02309       0.327
## 5:   2.014e-05    0.00093     0.00130    0.00278 0.01703       0.176
## 6:   2.290e-05    0.00119     0.00159    0.00357 0.02227       0.214
##    Shimmer.APQ3 Shimmer.APQ5 Shimmer.APQ11 Shimmer.DDA      NHR    HNR    RPDE
##           <num>        <num>         <num>       <num>    <num>  <num>   <num>
## 1:      0.01438      0.01309       0.01662     0.04314 0.014290 21.640 0.41888
## 2:      0.00994      0.01072       0.01689     0.02982 0.011112 27.183 0.43493
## 3:      0.00734      0.00844       0.01458     0.02202 0.020220 23.047 0.46222
## 4:      0.01106      0.01265       0.01963     0.03317 0.027837 24.445 0.48730
## 5:      0.00679      0.00929       0.01819     0.02036 0.011625 26.126 0.47188
## 6:      0.01006      0.01337       0.02263     0.03019 0.009438 22.946 0.53949
##        DFA     PPE
##      <num>   <num>
## 1: 0.54842 0.16006
## 2: 0.56477 0.10810
## 3: 0.54405 0.21014
## 4: 0.57794 0.33277
## 5: 0.56122 0.19361
## 6: 0.57243 0.19500

# another quick peek

# ---- 3) Train/Test split ----
library(caret)                                                           
# we'll use caret to split the data

set.seed(42)                                                             
# set seed again for the split

idx <- createDataPartition(parkinsons$total_UPDRS, p = 0.8, list = FALSE)
# stratified split index (80% train)

train <- parkinsons[idx, ]                                              
# training set

test  <- parkinsons[-idx, ]                                             
# test set

# ---- 4) Model matrices (for glmnet) ----
# Build a formula that includes all predictors except the response
x_formula <- as.formula(paste("~", paste(setdiff(names(train), "total_UPDRS"), collapse = "+"))) 
x_train <- model.matrix(x_formula, data = train)[, -1, drop = FALSE]    
# design matrix for train (drop intercept col)
x_test  <- model.matrix(x_formula, data = test)[, -1, drop = FALSE]       
# design matrix for test (drop intercept col)
y_train <- train$total_UPDRS                                            
# numeric response for train
y_test  <- test$total_UPDRS                                             
# numeric response for test

# ---- 5) Helper: simple metrics ----
metrics <- function(obs, pred) {                                         
  # small function to compute common metrics
  rmse <- RMSE(pred, obs)                                                
  # root mean squared error
  mae  <- MAE(pred, obs)                                                 
  # mean absolute error
  r2   <- cor(obs, pred)^2                                               
  # squared correlation as a simple R^2
  c(RMSE = rmse, MAE = mae, R2 = r2)                                      # return a named numeric vector
}

# =============================================================================
# A) RIDGE (alpha = 0)
# =============================================================================
library(glmnet)                                                           # ensure glmnet is attached

set.seed(42)                                                              # seed for cross-validation
cv_ridge <- cv.glmnet(x_train, y_train, alpha = 0, nfolds = 10,           # 10-fold CV for ridge
                      standardize = TRUE)

plot(cv_ridge)                                                            # show CV curve (λ vs. CV error) in Plots pane
title("Ridge CV Curve (alpha = 0)")                                       # add a title on top

ridge_fit <- glmnet(x_train, y_train, alpha = 0,                          # fit ridge at the best λ from CV
                    lambda = cv_ridge$lambda.min, standardize = TRUE)

ridge_coef <- coef(ridge_fit)                                             # coefficient vector (including intercept)
ridge_coef_df <- data.frame(                                              # tidy up as a data frame
  Feature = rownames(ridge_coef),
  Coefficient = as.numeric(ridge_coef)
)
print(head(ridge_coef_df, 15))                                            # show the first few ridge coefficients

##         Feature   Coefficient
## 1   (Intercept)  4.226154e+00
## 2      subject.  5.651063e-02
## 3           age  8.832910e-02
## 4          sex1 -1.796332e+00
## 5     test_time  2.695492e-03
## 6   motor_UPDRS  1.083190e+00
## 7     Jitter... -3.913412e+01
## 8   Jitter.Abs.  4.805318e+03
## 9    Jitter.RAP  5.117704e+01
## 10  Jitter.PPQ5 -4.440975e+00
## 11   Jitter.DDP  1.433628e+01
## 12      Shimmer -3.887519e+00
## 13  Shimmer.dB. -5.807030e-01
## 14 Shimmer.APQ3  3.647852e+00
## 15 Shimmer.APQ5  9.023994e+00

ridge_pred_tr <- as.numeric(predict(ridge_fit, newx = x_train))           # fitted values on train
ridge_pred_te <- as.numeric(predict(ridge_fit, newx = x_test))            # predictions on test
ridge_tr_m <- metrics(y_train, ridge_pred_tr)                             # train metrics
ridge_te_m <- metrics(y_test, ridge_pred_te)                              # test metrics
print(ridge_te_m)                                                         # print ridge test metrics

##      RMSE       MAE        R2 
## 3.2685552 2.3710977 0.9125407

ridge_path <- glmnet(x_train, y_train, alpha = 0, standardize = TRUE)     # refit across many λ for path plotting
plot(ridge_path, xvar = "lambda", label = FALSE)                          # ridge coefficient paths vs λ
title("Ridge Coefficient Paths")                                          # add title

ridge_resid <- y_test - ridge_pred_te                                     # residuals on the test set
plot(ridge_pred_te, ridge_resid, pch = 20,                                # residuals vs fitted to check structure
     main = "Ridge: Residuals vs Fitted", xlab = "Fitted", ylab = "Residuals", col = "pink")
abline(h = 0, lty = 2)                                                    # reference line at zero

qqnorm(ridge_resid, main = "Ridge: Normal Q-Q", col = "purple")                           # Q-Q plot for residual normality check
qqline(ridge_resid)                                                       # reference line

# =============================================================================
# B) LASSO (alpha = 1)
# =============================================================================
set.seed(42)                                                              # seed for cross-validation
cv_lasso <- cv.glmnet(x_train, y_train, alpha = 1, nfolds = 10,           # 10-fold CV for lasso
                      standardize = TRUE)

plot(cv_lasso)                                                            # show CV curve for lasso in Plots pane
title("LASSO CV Curve (alpha = 1)")                                       # add title

lasso_fit <- glmnet(x_train, y_train, alpha = 1,                          # fit lasso at best λ
                    lambda = cv_lasso$lambda.min, standardize = TRUE)

lasso_coef <- coef(lasso_fit)                                             # coefficient vector (includes intercept)
lasso_coef_df <- data.frame(Feature = rownames(lasso_coef),               # tidy coefficients
                            Coefficient = as.numeric(lasso_coef))
print(head(lasso_coef_df, 15))                                            # show a sample of coefficients

##         Feature   Coefficient
## 1   (Intercept)  1.938297e+00
## 2      subject.  4.589583e-02
## 3           age  7.294393e-02
## 4          sex1 -1.769383e+00
## 5     test_time  1.895935e-03
## 6   motor_UPDRS  1.206547e+00
## 7     Jitter... -3.341993e+02
## 8   Jitter.Abs.  1.695020e+04
## 9    Jitter.RAP  4.515372e+02
## 10  Jitter.PPQ5  5.187883e+01
## 11   Jitter.DDP  0.000000e+00
## 12      Shimmer -3.029778e+01
## 13  Shimmer.dB.  0.000000e+00
## 14 Shimmer.APQ3  0.000000e+00
## 15 Shimmer.APQ5  9.309207e+01

lasso_selected <- subset(lasso_coef_df, Feature != "(Intercept)" &        # keep features with non-zero coeffs
                           abs(Coefficient) > 0)
print(lasso_selected[order(-abs(lasso_selected$Coefficient)), ][1:20, ])  # show top 20 by absolute size

##            Feature   Coefficient
## 8      Jitter.Abs.  1.695020e+04
## 9       Jitter.RAP  4.515372e+02
## 7        Jitter... -3.341993e+02
## 15    Shimmer.APQ5  9.309207e+01
## 10     Jitter.PPQ5  5.187883e+01
## 16   Shimmer.APQ11 -4.078055e+01
## 12         Shimmer -3.029778e+01
## 17     Shimmer.DDA -4.856863e+00
## 18             NHR -4.758957e+00
## 22             PPE -3.686316e+00
## 21             DFA -3.680895e+00
## 20            RPDE  2.652370e+00
## 4             sex1 -1.769383e+00
## 6      motor_UPDRS  1.206547e+00
## 19             HNR -7.817605e-02
## 3              age  7.294393e-02
## 2         subject.  4.589583e-02
## 5        test_time  1.895935e-03
## NA            <NA>            NA
## NA.1          <NA>            NA

lasso_pred_tr <- as.numeric(predict(lasso_fit, newx = x_train))           # fitted values on train
lasso_pred_te <- as.numeric(predict(lasso_fit, newx = x_test))            # predictions on test
lasso_tr_m <- metrics(y_train, lasso_pred_tr)                             # train metrics
lasso_te_m <- metrics(y_test, lasso_pred_te)                              # test metrics
print(lasso_te_m)                                                         # print lasso test metrics

##      RMSE       MAE        R2 
## 3.1319922 2.3101629 0.9137848

lasso_path <- glmnet(x_train, y_train, alpha = 1, standardize = TRUE)     # refit across λ for path plotting
plot(lasso_path, xvar = "lambda", label = FALSE)                          # lasso coefficient paths vs λ
title("LASSO Coefficient Paths")                                          # add title

lasso_resid <- y_test - lasso_pred_te                                     # residuals on test
plot(lasso_pred_te, lasso_resid, pch = 20,                                # residuals vs fitted
     main = "LASSO: Residuals vs Fitted", xlab = "Fitted", ylab = "Residuals")
abline(h = 0, lty = 2)                                                    # reference line

qqnorm(lasso_resid, main = "LASSO: Normal Q-Q")                           # Q-Q plot
qqline(lasso_resid)                                                       # reference line

# C) PLS (pls::plsr)
# =============================================================================
library(pls)                                                              # attach pls for plsr()

train_pls <- cbind.data.frame(total_UPDRS = y_train, as.data.frame(x_train)) # build a data.frame for plsr
test_pls  <- cbind.data.frame(total_UPDRS = y_test,  as.data.frame(x_test))  # same for test

set.seed(42)                                                              # seed for CV component selection
pls_cv <- plsr(total_UPDRS ~ ., data = train_pls, scale = TRUE,           # fit PLS with scaling and K-fold CV
               validation = "CV", segments = 10)

plot(RMSEP(pls_cv), main = "PLS: CV RMSEP by # Components")               # visualize CV error across components

rmsep_vals <- drop(RMSEP(pls_cv)$val[1,1,])                               # pull CV RMSEP for 0..m comps into a vector
best_comp <- which.min(rmsep_vals) - 1                                    # index minus 1 = number of components
if (best_comp < 1) best_comp <- 1                                         # ensure we pick at least 1 component
cat("Best number of PLS components:", best_comp, "\n")                    # print chosen number of comps

## Best number of PLS components: 14

pls_pred_tr <- as.numeric(predict(pls_cv, ncomp = best_comp,              # predict on train set with chosen comps
                                  newdata = train_pls))
pls_pred_te <- as.numeric(predict(pls_cv, ncomp = best_comp,              # predict on test set with chosen comps
                                  newdata = test_pls))

pls_tr_m <- metrics(train_pls$total_UPDRS, pls_pred_tr)                   # train metrics
pls_te_m <- metrics(test_pls$total_UPDRS,  pls_pred_te)                   # test metrics
print(pls_te_m)                                                           # print PLS test metrics

##      RMSE       MAE        R2 
## 3.1335417 2.3166312 0.9136953

pls_coef <- coef(pls_cv, ncomp = best_comp, intercept = TRUE)             # get coefficients including intercept
pls_coef_df <- data.frame(Feature = rownames(pls_coef),                   # tidy coefficients
                          Coefficient = as.numeric(pls_coef))
print(head(pls_coef_df, 15))                                              # show a sample of PLS coefficients

##         Feature Coefficient
## 1   (Intercept)   2.2703213
## 2      subject.   0.5738272
## 3           age   0.6578161
## 4          sex1  -0.8420905
## 5     test_time   0.1094232
## 6   motor_UPDRS   9.8276103
## 7     Jitter...  -2.1379967
## 8   Jitter.Abs.   0.5960258
## 9    Jitter.RAP   0.8588487
## 10  Jitter.PPQ5   0.1520581
## 11   Jitter.DDP   0.8577049
## 12      Shimmer  -0.7315441
## 13  Shimmer.dB.  -0.1897086
## 14 Shimmer.APQ3  -0.1845148
## 15 Shimmer.APQ5   1.8911502

pls_resid <- test_pls$total_UPDRS - pls_pred_te                           # residuals for diagnostic plots
plot(pls_pred_te, pls_resid, pch = 20,                                    # residuals vs fitted
     main = paste0("PLS: Residuals vs Fitted (", best_comp, " comps)"),
     xlab = "Fitted", ylab = "Residuals")
abline(h = 0, lty = 2)                                                    # reference line

qqnorm(pls_resid, main = "PLS: Normal Q-Q")                               # Q-Q plot for residuals
qqline(pls_resid)                                                         # reference line

# =============================================================================
# D) Compare models on the test set
# =============================================================================
library(dplyr)                                                            # attach dplyr for binding/formatting
comp <- bind_rows(                                                        # bind metric rows for each model
  data.frame(Model = "Ridge (lambda.min)", t(ridge_te_m)),
  data.frame(Model = "LASSO (lambda.min)", t(lasso_te_m)),
  data.frame(Model = paste0("PLS (", best_comp, " comps)"), t(pls_te_m))
) %>% mutate(across(where(is.numeric), ~round(., 4)))                     # round numbers for neat printing

print(comp)                                                               # show the comparison table in console

##                Model   RMSE    MAE     R2
## 1 Ridge (lambda.min) 3.2686 2.3711 0.9125
## 2 LASSO (lambda.min) 3.1320 2.3102 0.9138
## 3     PLS (14 comps) 3.1335 2.3166 0.9137

# (Optional) quick bar plot of RMSE by model for your report
library(ggplot2)                                                          # use ggplot2 for a quick visual
ggplot(comp, aes(x = Model, y = RMSE)) +                                  # RMSE by model
  geom_col() +                                                            # simple columns
  ggtitle("Test RMSE by Model") +                                         # title for context
  xlab("") + ylab("RMSE") +                                               # axis labels
  theme_minimal() +                                                       # clean theme
  theme(axis.text.x = element_text(angle = 20, hjust = 1))                # tilt labels for readability