# Install ISLR2 if not already installed
# install.packages("ISLR2")
library(ISLR2)
library(tidyverse) # ggplot2, dplyr, etc.
library(GGally) # ggpairs for scatterplot matrices
library(corrplot) # correlation matrix visualisation
library(car) # vif(), leveragePlots()
library(caret) # confusionMatrix()
library(knitr) # kable() for nice tables
library(kableExtra) # extra table formattingTask: Explore the
Autodata set from the ISLR2 package. Remove observations with missing values, then answer questions (a)–(f).
# Load the Auto dataset and drop any rows with NAs
data("Auto")
Auto <- na.omit(Auto)
# Quick overview
glimpse(Auto)## Rows: 392
## Columns: 9
## $ mpg <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, 15, 14, 15, 14, 2…
## $ cylinders <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, …
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383, 34…
## $ horsepower <int> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170, 16…
## $ weight <int> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, 385…
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8.5, …
## $ year <int> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 7…
## $ origin <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, …
## $ name <fct> chevrolet chevelle malibu, buick skylark 320, plymouth sa…
kable(head(Auto, 8), caption = "First 8 rows of the Auto data set") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)| mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | name |
|---|---|---|---|---|---|---|---|---|
| 18 | 8 | 307 | 130 | 3504 | 12.0 | 70 | 1 | chevrolet chevelle malibu |
| 15 | 8 | 350 | 165 | 3693 | 11.5 | 70 | 1 | buick skylark 320 |
| 18 | 8 | 318 | 150 | 3436 | 11.0 | 70 | 1 | plymouth satellite |
| 16 | 8 | 304 | 150 | 3433 | 12.0 | 70 | 1 | amc rebel sst |
| 17 | 8 | 302 | 140 | 3449 | 10.5 | 70 | 1 | ford torino |
| 15 | 8 | 429 | 198 | 4341 | 10.0 | 70 | 1 | ford galaxie 500 |
| 14 | 8 | 454 | 220 | 4354 | 9.0 | 70 | 1 | chevrolet impala |
| 14 | 8 | 440 | 215 | 4312 | 8.5 | 70 | 1 | plymouth fury iii |
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : int 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : int 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : int 3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : int 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : int 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
## ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...
Answer:
mpg,
cylinders, displacement,
horsepower, weight, acceleration,
yearorigin (coded
1/2/3 representing American/European/Japanese), name (car
model name — a character string)Note: Although
cylindersandoriginare stored as integers,originis conceptually categorical.cylinderscan be treated as either depending on context.
# Select only quantitative columns (excluding 'name' and treating 'origin' as qualitative)
quant_vars <- Auto |> select(mpg, cylinders, displacement, horsepower, weight, acceleration, year)
# Compute range for each variable
range_table <- quant_vars |>
summarise(across(everything(), list(Min = min, Max = max))) |>
pivot_longer(everything(),
names_to = c("Variable", ".value"),
names_sep = "_") |>
mutate(Range = Max - Min)
kable(range_table, caption = "Range of each quantitative predictor", digits = 2) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Variable | Min | Max | Range |
|---|---|---|---|
| mpg | 9 | 46.6 | 37.6 |
| cylinders | 3 | 8.0 | 5.0 |
| displacement | 68 | 455.0 | 387.0 |
| horsepower | 46 | 230.0 | 184.0 |
| weight | 1613 | 5140.0 | 3527.0 |
| acceleration | 8 | 24.8 | 16.8 |
| year | 70 | 82.0 | 12.0 |
summary_table <- quant_vars |>
summarise(across(everything(), list(Mean = mean, SD = sd))) |>
pivot_longer(everything(),
names_to = c("Variable", ".value"),
names_sep = "_")
kable(summary_table, caption = "Mean and Standard Deviation of quantitative predictors",
digits = 3) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Variable | Mean | SD |
|---|---|---|
| mpg | 23.446 | 7.805 |
| cylinders | 5.472 | 1.706 |
| displacement | 194.412 | 104.644 |
| horsepower | 104.469 | 38.491 |
| weight | 2977.584 | 849.403 |
| acceleration | 15.541 | 2.759 |
| year | 75.980 | 3.684 |
# Remove 10th through 85th observations
Auto_subset <- Auto[-(10:85), ]
cat("Dimensions after removing rows 10–85:", dim(Auto_subset), "\n")## Dimensions after removing rows 10–85: 316 9
# Recompute range, mean, SD on the subset
subset_quant <- Auto_subset |> select(mpg, cylinders, displacement, horsepower,
weight, acceleration, year)
subset_summary <- subset_quant |>
summarise(across(everything(),
list(Min = min, Max = max, Mean = mean, SD = sd))) |>
pivot_longer(everything(),
names_to = c("Variable", ".value"),
names_sep = "_") |>
mutate(Range = Max - Min) |>
select(Variable, Min, Max, Range, Mean, SD)
kable(subset_summary,
caption = "Summary statistics for the reduced data set (rows 10–85 removed)",
digits = 3) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Variable | Min | Max | Range | Mean | SD |
|---|---|---|---|---|---|
| mpg | 11.0 | 46.6 | 35.6 | 24.404 | 7.867 |
| cylinders | 3.0 | 8.0 | 5.0 | 5.373 | 1.654 |
| displacement | 68.0 | 455.0 | 387.0 | 187.241 | 99.678 |
| horsepower | 46.0 | 230.0 | 184.0 | 100.722 | 35.709 |
| weight | 1649.0 | 4997.0 | 3348.0 | 2935.972 | 811.300 |
| acceleration | 8.5 | 24.8 | 16.3 | 15.727 | 2.694 |
| year | 70.0 | 82.0 | 12.0 | 77.146 | 3.106 |
# Scatterplot matrix using GGally
ggpairs(quant_vars,
lower = list(continuous = wrap("points", alpha = 0.3, size = 0.8)),
upper = list(continuous = wrap("cor", size = 3.5)),
diag = list(continuous = wrap("densityDiag")),
title = "Scatterplot Matrix of Auto Quantitative Predictors")# Boxplot: mpg by number of cylinders
ggplot(Auto, aes(x = factor(cylinders), y = mpg, fill = factor(cylinders))) +
geom_boxplot(alpha = 0.7) +
scale_fill_brewer(palette = "Set2") +
labs(title = "MPG by Number of Cylinders",
x = "Cylinders",
y = "Miles per Gallon",
fill = "Cylinders") +
theme_minimal()# MPG trend over model year
ggplot(Auto, aes(x = year, y = mpg)) +
geom_jitter(alpha = 0.4, colour = "steelblue") +
geom_smooth(method = "loess", se = TRUE, colour = "tomato") +
labs(title = "MPG vs. Model Year",
x = "Model Year (offset from 1900)",
y = "Miles per Gallon") +
theme_minimal()Findings:
mpg is negatively correlated with
displacement, horsepower, and
weight — heavier, more powerful cars tend to be less
fuel-efficient.mpg is positively correlated with
year, indicating cars have generally become more
fuel-efficient over time.displacement, horsepower, and
weight are strongly positively correlated
with each other, suggesting multicollinearity if all are used as
predictors together.# Correlation of all quantitative predictors with mpg
cor_with_mpg <- cor(quant_vars) |>
as.data.frame() |>
select(mpg) |>
rownames_to_column("Predictor") |>
filter(Predictor != "mpg") |>
rename(Correlation_with_MPG = mpg) |>
arrange(desc(abs(Correlation_with_MPG)))
kable(cor_with_mpg,
caption = "Correlation of each predictor with MPG (sorted by absolute value)",
digits = 3) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(abs(cor_with_mpg$Correlation_with_MPG) > 0.7),
bold = TRUE, background = "#d4edda")| Predictor | Correlation_with_MPG |
|---|---|
| weight | -0.832 |
| displacement | -0.805 |
| horsepower | -0.778 |
| cylinders | -0.778 |
| year | 0.581 |
| acceleration | 0.423 |
Answer:
Based on the scatterplots and correlation values,
weight,
displacement,
horsepower, and
year are the most promising predictors of
mpg:
weight has the strongest negative correlation (≈
−0.83).displacement and horsepower also have high
negative correlations (≈ −0.80 and −0.78 respectively).year has a meaningful positive correlation (≈ +0.58),
capturing fuel economy improvements over time.cylinders and acceleration show weaker or
more noisy relationships.Task: Use multiple linear regression to predict
mpgusing all other numeric variables in theAutodata set.
Auto_num <- Auto |> select(-name) # remove character column
ggpairs(Auto_num,
lower = list(continuous = wrap("points", alpha = 0.3, size = 0.7)),
upper = list(continuous = wrap("cor", size = 3)),
diag = list(continuous = wrap("densityDiag")),
title = "Chapter 3 — Scatterplot Matrix (Auto, all numeric variables)")cor_matrix <- cor(Auto_num)
corrplot(cor_matrix,
method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
number.cex = 0.75,
title = "Correlation Matrix — Auto (numeric variables)",
mar = c(0, 0, 1.5, 0))kable(round(cor_matrix, 3),
caption = "Correlation matrix of all numeric variables in Auto") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | |
|---|---|---|---|---|---|---|---|---|
| mpg | 1.000 | -0.778 | -0.805 | -0.778 | -0.832 | 0.423 | 0.581 | 0.565 |
| cylinders | -0.778 | 1.000 | 0.951 | 0.843 | 0.898 | -0.505 | -0.346 | -0.569 |
| displacement | -0.805 | 0.951 | 1.000 | 0.897 | 0.933 | -0.544 | -0.370 | -0.615 |
| horsepower | -0.778 | 0.843 | 0.897 | 1.000 | 0.865 | -0.689 | -0.416 | -0.455 |
| weight | -0.832 | 0.898 | 0.933 | 0.865 | 1.000 | -0.417 | -0.309 | -0.585 |
| acceleration | 0.423 | -0.505 | -0.544 | -0.689 | -0.417 | 1.000 | 0.290 | 0.213 |
| year | 0.581 | -0.346 | -0.370 | -0.416 | -0.309 | 0.290 | 1.000 | 0.182 |
| origin | 0.565 | -0.569 | -0.615 | -0.455 | -0.585 | 0.213 | 0.182 | 1.000 |
# Fit MLR: mpg ~ all predictors except 'name'
mlr_fit <- lm(mpg ~ . - name, data = Auto)
summary(mlr_fit)##
## Call:
## lm(formula = mpg ~ . - name, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
# Tidy summary table
coef_table <- broom::tidy(mlr_fit) |>
mutate(significant = ifelse(p.value < 0.05, "Yes ✓", "No"))
kable(coef_table, digits = 4,
caption = "MLR Coefficient Estimates (response: mpg)") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(coef_table$significant == "Yes ✓"),
bold = TRUE, background = "#d4edda")| term | estimate | std.error | statistic | p.value | significant |
|---|---|---|---|---|---|
| (Intercept) | -17.2184 | 4.6443 | -3.7074 | 0.0002 | Yes ✓ |
| cylinders | -0.4934 | 0.3233 | -1.5261 | 0.1278 | No |
| displacement | 0.0199 | 0.0075 | 2.6474 | 0.0084 | Yes ✓ |
| horsepower | -0.0170 | 0.0138 | -1.2295 | 0.2196 | No |
| weight | -0.0065 | 0.0007 | -9.9288 | 0.0000 | Yes ✓ |
| acceleration | 0.0806 | 0.0988 | 0.8152 | 0.4155 | No |
| year | 0.7508 | 0.0510 | 14.7288 | 0.0000 | Yes ✓ |
| origin | 1.4261 | 0.2781 | 5.1275 | 0.0000 | Yes ✓ |
Comments:
i. Is there a relationship between the predictors and the
response? Yes. The overall F-statistic is large with a
near-zero p-value, indicating a statistically significant relationship
between the set of predictors and mpg.
ii. Which predictors have a statistically significant
relationship with mpg? At the α = 0.05 level, the
significant predictors are: displacement,
weight, year, and origin.
horsepower and acceleration are not
significant in the presence of the others.
iii. What does the coefficient for year
suggest? The positive coefficient for year
(approximately +0.75) indicates that, holding all other variables
constant, fuel efficiency increases by about 0.75 mpg for each
additional model year. This reflects the general trend of improving fuel
economy in newer cars.
Comments:
# Fit model with key interaction terms
int_fit <- lm(mpg ~ displacement + horsepower + weight + acceleration +
year + origin + cylinders +
displacement:weight +
horsepower:weight +
year:origin,
data = Auto)
summary(int_fit)##
## Call:
## lm(formula = mpg ~ displacement + horsepower + weight + acceleration +
## year + origin + cylinders + displacement:weight + horsepower:weight +
## year:origin, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4370 -1.6736 -0.0609 1.4670 11.8289
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.847e+01 8.064e+00 2.291 0.02251 *
## displacement -3.328e-02 2.044e-02 -1.629 0.10421
## horsepower -1.379e-01 5.018e-02 -2.748 0.00628 **
## weight -1.095e-02 7.296e-04 -15.002 < 2e-16 ***
## acceleration -9.464e-03 9.496e-02 -0.100 0.92066
## year 5.300e-01 1.029e-01 5.149 4.21e-07 ***
## origin -1.084e+01 4.391e+00 -2.470 0.01396 *
## cylinders 4.277e-02 2.899e-01 0.148 0.88278
## displacement:weight 1.093e-05 5.675e-06 1.926 0.05483 .
## horsepower:weight 2.979e-05 1.343e-05 2.219 0.02711 *
## year:origin 1.483e-01 5.614e-02 2.641 0.00861 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.907 on 381 degrees of freedom
## Multiple R-squared: 0.8649, Adjusted R-squared: 0.8613
## F-statistic: 243.8 on 10 and 381 DF, p-value: < 2.2e-16
int_coef <- broom::tidy(int_fit) |>
filter(str_detect(term, ":")) |> # keep only interaction terms
mutate(significant = ifelse(p.value < 0.05, "Yes ✓", "No"))
kable(int_coef, digits = 4,
caption = "Interaction term estimates only") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| term | estimate | std.error | statistic | p.value | significant |
|---|---|---|---|---|---|
| displacement:weight | 0.0000 | 0.0000 | 1.9262 | 0.0548 | No |
| horsepower:weight | 0.0000 | 0.0000 | 2.2185 | 0.0271 | Yes ✓ |
| year:origin | 0.1483 | 0.0561 | 2.6410 | 0.0086 | Yes ✓ |
Comment: The interaction between
displacement and weight (and/or
horsepower and weight) tends to be
statistically significant, indicating that the effect of engine
displacement on mpg depends on vehicle weight. The
year:origin interaction may also be significant, suggesting
that the rate of fuel economy improvement differs across American,
European, and Japanese cars.
# Log transformation of horsepower
fit_log <- lm(mpg ~ log(horsepower) + weight + year + origin, data = Auto)
# Square root transformation of displacement
fit_sqrt <- lm(mpg ~ sqrt(displacement) + weight + year + origin, data = Auto)
# Polynomial (quadratic) weight
fit_poly <- lm(mpg ~ I(weight^2) + weight + horsepower + year + origin, data = Auto)
results_df <- data.frame(
Model = c("log(horsepower)", "sqrt(displacement)", "weight + weight^2"),
Adj_R2 = c(summary(fit_log)$adj.r.squared,
summary(fit_sqrt)$adj.r.squared,
summary(fit_poly)$adj.r.squared),
RSE = c(summary(fit_log)$sigma,
summary(fit_sqrt)$sigma,
summary(fit_poly)$sigma)
)
kable(results_df, digits = 4,
caption = "Comparison of models with transformed predictors") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Model | Adj_R2 | RSE |
|---|---|---|
| log(horsepower) | 0.8258 | 3.2572 |
| sqrt(displacement) | 0.8159 | 3.3487 |
| weight + weight^2 | 0.8522 | 3.0001 |
# Visualise: mpg vs log(horsepower)
ggplot(Auto, aes(x = log(horsepower), y = mpg)) +
geom_point(alpha = 0.5, colour = "steelblue") +
geom_smooth(method = "lm", colour = "tomato", se = TRUE) +
labs(title = "MPG vs. log(Horsepower)",
x = "log(Horsepower)", y = "MPG") +
theme_minimal()Comment: Taking the log of horsepower
or the square root of displacement linearises the
relationship with mpg more effectively than the raw
predictors, producing higher adjusted R² and lower residual standard
error. The quadratic term for weight also captures the
slightly curved relationship visible in the scatterplot.
Task: Predict per capita crime rate (
crim) using the other variables in theBostondata set.
## Rows: 506
## Columns: 13
## $ crim <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nox <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ lstat <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
kable(head(Boston, 6), caption = "First 6 rows of the Boston data set") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| crim | zn | indus | chas | nox | rm | age | dis | rad | tax | ptratio | lstat | medv |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.00632 | 18 | 2.31 | 0 | 0.538 | 6.575 | 65.2 | 4.0900 | 1 | 296 | 15.3 | 4.98 | 24.0 |
| 0.02731 | 0 | 7.07 | 0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2 | 242 | 17.8 | 9.14 | 21.6 |
| 0.02729 | 0 | 7.07 | 0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2 | 242 | 17.8 | 4.03 | 34.7 |
| 0.03237 | 0 | 2.18 | 0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3 | 222 | 18.7 | 2.94 | 33.4 |
| 0.06905 | 0 | 2.18 | 0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3 | 222 | 18.7 | 5.33 | 36.2 |
| 0.02985 | 0 | 2.18 | 0 | 0.458 | 6.430 | 58.7 | 6.0622 | 3 | 222 | 18.7 | 5.21 | 28.7 |
predictors <- setdiff(names(Boston), "crim")
# Fit SLR for each predictor and extract key statistics
slr_results <- map_dfr(predictors, function(pred) {
formula <- as.formula(paste("crim ~", pred))
fit <- lm(formula, data = Boston)
s <- summary(fit)
tibble(
Predictor = pred,
Coefficient = coef(fit)[2],
p_value = coef(s)[2, 4],
R_squared = s$r.squared
)
}) |>
mutate(Significant = ifelse(p_value < 0.05, "Yes ✓", "No")) |>
arrange(p_value)
kable(slr_results, digits = 4,
caption = "Simple Linear Regression results: crim ~ each predictor") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(slr_results$Significant == "Yes ✓"),
background = "#d4edda")| Predictor | Coefficient | p_value | R_squared | Significant |
|---|---|---|---|---|
| rad | 0.6179 | 0.0000 | 0.3913 | Yes ✓ |
| tax | 0.0297 | 0.0000 | 0.3396 | Yes ✓ |
| lstat | 0.5488 | 0.0000 | 0.2076 | Yes ✓ |
| nox | 31.2485 | 0.0000 | 0.1772 | Yes ✓ |
| indus | 0.5098 | 0.0000 | 0.1653 | Yes ✓ |
| medv | -0.3632 | 0.0000 | 0.1508 | Yes ✓ |
| dis | -1.5509 | 0.0000 | 0.1441 | Yes ✓ |
| age | 0.1078 | 0.0000 | 0.1244 | Yes ✓ |
| ptratio | 1.1520 | 0.0000 | 0.0841 | Yes ✓ |
| rm | -2.6841 | 0.0000 | 0.0481 | Yes ✓ |
| zn | -0.0739 | 0.0000 | 0.0402 | Yes ✓ |
| chas | -1.8928 | 0.2094 | 0.0031 | No |
# Visual: top 4 predictors by R²
top4 <- slr_results |> slice_max(R_squared, n = 4) |> pull(Predictor)
plot_list <- map(top4, function(pred) {
ggplot(Boston, aes(x = .data[[pred]], y = crim)) +
geom_point(alpha = 0.4, colour = "steelblue") +
geom_smooth(method = "lm", colour = "tomato", se = TRUE) +
labs(title = paste("crim ~", pred),
x = pred, y = "Crime Rate") +
theme_minimal(base_size = 11)
})
library(patchwork)
(plot_list[[1]] | plot_list[[2]]) / (plot_list[[3]] | plot_list[[4]])Findings: Almost all predictors show a statistically
significant univariate association with crim. The strongest
individual predictors (by R²) include rad (index of
accessibility to radial highways), tax (property tax rate),
and lstat (lower-status population share).
##
## Call:
## lm(formula = crim ~ ., data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.534 -2.248 -0.348 1.087 73.923
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.7783938 7.0818258 1.946 0.052271 .
## zn 0.0457100 0.0187903 2.433 0.015344 *
## indus -0.0583501 0.0836351 -0.698 0.485709
## chas -0.8253776 1.1833963 -0.697 0.485841
## nox -9.9575865 5.2898242 -1.882 0.060370 .
## rm 0.6289107 0.6070924 1.036 0.300738
## age -0.0008483 0.0179482 -0.047 0.962323
## dis -1.0122467 0.2824676 -3.584 0.000373 ***
## rad 0.6124653 0.0875358 6.997 8.59e-12 ***
## tax -0.0037756 0.0051723 -0.730 0.465757
## ptratio -0.3040728 0.1863598 -1.632 0.103393
## lstat 0.1388006 0.0757213 1.833 0.067398 .
## medv -0.2200564 0.0598240 -3.678 0.000261 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.46 on 493 degrees of freedom
## Multiple R-squared: 0.4493, Adjusted R-squared: 0.4359
## F-statistic: 33.52 on 12 and 493 DF, p-value: < 2.2e-16
mlr_boston_tidy <- broom::tidy(mlr_boston) |>
mutate(Significant = ifelse(p.value < 0.05, "Yes ✓", "No"))
kable(mlr_boston_tidy, digits = 4,
caption = "Multiple Regression: crim ~ all predictors") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(mlr_boston_tidy$Significant == "Yes ✓"),
bold = TRUE, background = "#d4edda")| term | estimate | std.error | statistic | p.value | Significant |
|---|---|---|---|---|---|
| (Intercept) | 13.7784 | 7.0818 | 1.9456 | 0.0523 | No |
| zn | 0.0457 | 0.0188 | 2.4326 | 0.0153 | Yes ✓ |
| indus | -0.0584 | 0.0836 | -0.6977 | 0.4857 | No |
| chas | -0.8254 | 1.1834 | -0.6975 | 0.4858 | No |
| nox | -9.9576 | 5.2898 | -1.8824 | 0.0604 | No |
| rm | 0.6289 | 0.6071 | 1.0359 | 0.3007 | No |
| age | -0.0008 | 0.0179 | -0.0473 | 0.9623 | No |
| dis | -1.0122 | 0.2825 | -3.5836 | 0.0004 | Yes ✓ |
| rad | 0.6125 | 0.0875 | 6.9967 | 0.0000 | Yes ✓ |
| tax | -0.0038 | 0.0052 | -0.7300 | 0.4658 | No |
| ptratio | -0.3041 | 0.1864 | -1.6316 | 0.1034 | No |
| lstat | 0.1388 | 0.0757 | 1.8330 | 0.0674 | No |
| medv | -0.2201 | 0.0598 | -3.6784 | 0.0003 | Yes ✓ |
Answer: In the multiple regression model, we can
reject H₀: βⱼ = 0 at α = 0.05 for zn,
dis, rad,
black, and
medv. Many predictors that were
significant in simple regression lose significance once we control for
others, indicating confounding.
# Collect univariate and multiple regression coefficients for comparison
mlr_coefs <- broom::tidy(mlr_boston) |>
filter(term != "(Intercept)") |>
select(term, estimate) |>
rename(Predictor = term, MLR_coef = estimate)
slr_coefs <- slr_results |>
select(Predictor, Coefficient) |>
rename(SLR_coef = Coefficient)
coef_comparison <- left_join(slr_coefs, mlr_coefs, by = "Predictor")
kable(coef_comparison, digits = 4,
caption = "Univariate vs. Multiple Regression Coefficients for each predictor") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Predictor | SLR_coef | MLR_coef |
|---|---|---|
| rad | 0.6179 | 0.6125 |
| tax | 0.0297 | -0.0038 |
| lstat | 0.5488 | 0.1388 |
| nox | 31.2485 | -9.9576 |
| indus | 0.5098 | -0.0584 |
| medv | -0.3632 | -0.2201 |
| dis | -1.5509 | -1.0122 |
| age | 0.1078 | -0.0008 |
| ptratio | 1.1520 | -0.3041 |
| rm | -2.6841 | 0.6289 |
| zn | -0.0739 | 0.0457 |
| chas | -1.8928 | -0.8254 |
ggplot(coef_comparison, aes(x = SLR_coef, y = MLR_coef, label = Predictor)) +
geom_point(colour = "steelblue", size = 3) +
ggrepel::geom_text_repel(size = 3.5) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "grey50") +
labs(title = "Univariate (x-axis) vs. Multiple Regression (y-axis) Coefficients",
subtitle = "Dashed line = perfect agreement",
x = "Simple Regression Coefficient",
y = "Multiple Regression Coefficient") +
theme_minimal()Comment: The coefficients differ substantially for
several predictors (notably nox and lstat),
illustrating the impact of multicollinearity: controlling for other
variables can reverse or shrink the apparent effect of a single
predictor.
# Fit cubic polynomial for each predictor and test significance of X² and X³ terms
poly_results <- map_dfr(predictors, function(pred) {
formula <- as.formula(paste0("crim ~ ", pred, " + I(", pred, "^2) + I(", pred, "^3)"))
fit <- lm(formula, data = Boston)
s <- coef(summary(fit))
# p-values for quadratic and cubic terms
p2 <- if (nrow(s) >= 3) s[3, 4] else NA_real_
p3 <- if (nrow(s) >= 4) s[4, 4] else NA_real_
tibble(
Predictor = pred,
p_quadratic = p2,
p_cubic = p3,
NonLinear_Sig = ifelse(!is.na(p2) & p2 < 0.05, "Yes ✓", "No")
)
})
kable(poly_results, digits = 4,
caption = "Evidence of non-linear association (p-values for X² and X³ terms)") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(poly_results$NonLinear_Sig == "Yes ✓"),
bold = TRUE, background = "#fff3cd")| Predictor | p_quadratic | p_cubic | NonLinear_Sig |
|---|---|---|---|
| zn | 0.0938 | 0.2295 | No |
| indus | 0.0000 | 0.0000 | Yes ✓ |
| chas | NA | NA | No |
| nox | 0.0000 | 0.0000 | Yes ✓ |
| rm | 0.3641 | 0.5086 | No |
| age | 0.0474 | 0.0067 | Yes ✓ |
| dis | 0.0000 | 0.0000 | Yes ✓ |
| rad | 0.6130 | 0.4823 | No |
| tax | 0.1375 | 0.2439 | No |
| ptratio | 0.0041 | 0.0063 | Yes ✓ |
| lstat | 0.0646 | 0.1299 | No |
| medv | 0.0000 | 0.0000 | Yes ✓ |
Comment: For many predictors (e.g.,
dis, lstat, nox), the quadratic
and/or cubic terms are statistically significant, providing evidence of
non-linear associations with crime rate. This suggests that linear
models may underfit the data for these predictors.
Task: Apply logistic regression to predict the direction of weekly stock market returns using the
Weeklydata set (1,089 weekly returns, 1990–2010).
## Rows: 1,089
## Columns: 9
## $ Year <dbl> 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, …
## $ Lag1 <dbl> 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0.807, 0…
## $ Lag2 <dbl> 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0…
## $ Lag3 <dbl> -3.936, 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -…
## $ Lag4 <dbl> -0.229, -3.936, 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, …
## $ Lag5 <dbl> -3.484, -0.229, -3.936, 1.572, 0.816, -0.270, -2.576, 3.514,…
## $ Volume <dbl> 0.1549760, 0.1485740, 0.1598375, 0.1616300, 0.1537280, 0.154…
## $ Today <dbl> -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0.807, 0.041, 1…
## $ Direction <fct> Down, Down, Up, Up, Up, Down, Up, Up, Up, Down, Down, Up, Up…
kable(head(Weekly, 6), caption = "First 6 rows of the Weekly data set") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Year | Lag1 | Lag2 | Lag3 | Lag4 | Lag5 | Volume | Today | Direction |
|---|---|---|---|---|---|---|---|---|
| 1990 | 0.816 | 1.572 | -3.936 | -0.229 | -3.484 | 0.1549760 | -0.270 | Down |
| 1990 | -0.270 | 0.816 | 1.572 | -3.936 | -0.229 | 0.1485740 | -2.576 | Down |
| 1990 | -2.576 | -0.270 | 0.816 | 1.572 | -3.936 | 0.1598375 | 3.514 | Up |
| 1990 | 3.514 | -2.576 | -0.270 | 0.816 | 1.572 | 0.1616300 | 0.712 | Up |
| 1990 | 0.712 | 3.514 | -2.576 | -0.270 | 0.816 | 0.1537280 | 1.178 | Up |
| 1990 | 1.178 | 0.712 | 3.514 | -2.576 | -0.270 | 0.1544440 | -1.372 | Down |
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
# Proportion of Up vs Down weeks
prop.table(table(Weekly$Direction)) |>
as.data.frame() |>
rename(Direction = Var1, Proportion = Freq) |>
kable(digits = 3, caption = "Proportion of Up vs Down weeks") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Direction | Proportion |
|---|---|
| Down | 0.444 |
| Up | 0.556 |
# Volume over time
p1 <- ggplot(Weekly, aes(x = Year, y = Volume)) +
geom_line(colour = "steelblue") +
geom_smooth(colour = "tomato", se = FALSE) +
labs(title = "Trading Volume Over Time", y = "Volume (billions of shares)") +
theme_minimal()
# Today's return by direction
p2 <- ggplot(Weekly, aes(x = Direction, y = Today, fill = Direction)) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
labs(title = "Weekly Return (Today) by Market Direction",
x = "Direction", y = "Return (%)") +
theme_minimal()
# Lag1 vs Lag2 coloured by Direction
p3 <- ggplot(Weekly, aes(x = Lag1, y = Lag2, colour = Direction)) +
geom_point(alpha = 0.4) +
scale_colour_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
labs(title = "Lag1 vs Lag2 Returns by Direction") +
theme_minimal()
# Volume distribution by direction
p4 <- ggplot(Weekly, aes(x = Volume, fill = Direction)) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
labs(title = "Volume Distribution by Market Direction") +
theme_minimal()
(p1 | p2) / (p3 | p4)Patterns observed:
Today returns are centred near 0 for both directions,
with the “Up” group having higher median returns as expected.# Fit logistic regression: Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume
logit_full <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly,
family = binomial)
summary(logit_full)##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
logit_tidy <- broom::tidy(logit_full) |>
mutate(Odds_Ratio = exp(estimate),
Significant = ifelse(p.value < 0.05, "Yes ✓", "No"))
kable(logit_tidy, digits = 4,
caption = "Logistic Regression Coefficients — Full Weekly Data") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
row_spec(which(logit_tidy$Significant == "Yes ✓"),
bold = TRUE, background = "#d4edda")| term | estimate | std.error | statistic | p.value | Odds_Ratio | Significant |
|---|---|---|---|---|---|---|
| (Intercept) | 0.2669 | 0.0859 | 3.1056 | 0.0019 | 1.3059 | Yes ✓ |
| Lag1 | -0.0413 | 0.0264 | -1.5626 | 0.1181 | 0.9596 | No |
| Lag2 | 0.0584 | 0.0269 | 2.1754 | 0.0296 | 1.0602 | Yes ✓ |
| Lag3 | -0.0161 | 0.0267 | -0.6024 | 0.5469 | 0.9841 | No |
| Lag4 | -0.0278 | 0.0265 | -1.0501 | 0.2937 | 0.9726 | No |
| Lag5 | -0.0145 | 0.0264 | -0.5485 | 0.5833 | 0.9856 | No |
| Volume | -0.0227 | 0.0369 | -0.6163 | 0.5377 | 0.9775 | No |
Answer: Only Lag2 has
a statistically significant p-value (< 0.05) in the full logistic
regression. Its positive coefficient indicates that a higher return in
the previous-previous week is associated with an increased probability
of an “Up” week.
# Predicted probabilities and classes (threshold = 0.5)
probs_full <- predict(logit_full, type = "response")
preds_full <- ifelse(probs_full > 0.5, "Up", "Down")
# Confusion matrix
conf_full <- table(Predicted = preds_full, Actual = Weekly$Direction)
kable(conf_full, caption = "Confusion Matrix — Full Logistic Regression") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Down | Up | |
|---|---|---|
| Down | 54 | 48 |
| Up | 430 | 557 |
# Overall accuracy
acc_full <- mean(preds_full == Weekly$Direction)
cat(sprintf("Overall fraction of correct predictions: %.4f (%.1f%%)\n",
acc_full, acc_full * 100))## Overall fraction of correct predictions: 0.5611 (56.1%)
Interpretation of the confusion matrix:
# Training period: 1990–2008; Test period: 2009–2010
train <- Weekly$Year <= 2008
test <- !train
Weekly_train <- Weekly[ train, ]
Weekly_test <- Weekly[ test, ]
cat("Training observations:", nrow(Weekly_train), "\n")## Training observations: 985
## Test observations: 104
# Fit logistic regression on training data with Lag2 only
logit_lag2 <- glm(Direction ~ Lag2,
data = Weekly_train,
family = binomial)
summary(logit_lag2)##
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = Weekly_train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20326 0.06428 3.162 0.00157 **
## Lag2 0.05810 0.02870 2.024 0.04298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1350.5 on 983 degrees of freedom
## AIC: 1354.5
##
## Number of Fisher Scoring iterations: 4
# Predict on held-out test data
probs_test <- predict(logit_lag2, newdata = Weekly_test, type = "response")
preds_test <- ifelse(probs_test > 0.5, "Up", "Down")
preds_test <- factor(preds_test, levels = c("Down", "Up"))
# Confusion matrix on test data
conf_test <- table(Predicted = preds_test,
Actual = Weekly_test$Direction)
kable(conf_test,
caption = "Confusion Matrix — Test Data (2009–2010), Lag2-only model") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Down | Up | |
|---|---|---|
| Down | 9 | 5 |
| Up | 34 | 56 |
acc_test <- mean(preds_test == Weekly_test$Direction)
cat(sprintf("Test accuracy: %.4f (%.1f%%)\n", acc_test, acc_test * 100))## Test accuracy: 0.6250 (62.5%)
# Sensitivity and Specificity
tp <- conf_test["Up", "Up"]
tn <- conf_test["Down", "Down"]
fp <- conf_test["Up", "Down"]
fn <- conf_test["Down", "Up"]
sensitivity <- tp / (tp + fn) # True Positive Rate
specificity <- tn / (tn + fp) # True Negative Rate
cat(sprintf("Sensitivity (Up correctly identified): %.3f\n", sensitivity))## Sensitivity (Up correctly identified): 0.918
## Specificity (Down correctly identified): 0.209
Comment:
The Lag2-only model achieves a test accuracy of approximately
62.5%, which is higher than the full model’s training
accuracy. This suggests that Lag2 captures some genuine
predictive signal, and that the simpler model generalises better to
held-out data. However, the model still tends to over-predict “Up”
weeks, as reflected in the relatively low specificity.
## R version 4.5.3 (2026-03-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Ulaanbaatar
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.3.2 kableExtra_1.4.0 knitr_1.51 caret_7.0-1
## [5] lattice_0.22-9 car_3.1-5 carData_3.0-6 corrplot_0.95
## [9] GGally_2.4.0 lubridate_1.9.5 forcats_1.0.1 stringr_1.6.0
## [13] dplyr_1.2.0 purrr_1.2.1 readr_2.2.0 tidyr_1.3.2
## [17] tibble_3.3.1 ggplot2_4.0.2 tidyverse_2.0.0 ISLR2_1.3-2
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.3 timeDate_4052.112
## [4] farver_2.1.2 S7_0.2.1 fastmap_1.2.0
## [7] pROC_1.19.0.1 digest_0.6.39 rpart_4.1.24
## [10] timechange_0.4.0 lifecycle_1.0.5 survival_3.8-6
## [13] magrittr_2.0.4 compiler_4.5.3 rlang_1.1.7
## [16] sass_0.4.10 tools_4.5.3 yaml_2.3.12
## [19] data.table_1.18.2.1 labeling_0.4.3 xml2_1.5.2
## [22] plyr_1.8.9 RColorBrewer_1.1-3 abind_1.4-8
## [25] withr_3.0.2 stats4_4.5.3 nnet_7.3-20
## [28] grid_4.5.3 future_1.70.0 globals_0.19.1
## [31] scales_1.4.0 iterators_1.0.14 MASS_7.3-65
## [34] cli_3.6.5 rmarkdown_2.30 generics_0.1.4
## [37] rstudioapi_0.18.0 future.apply_1.20.2 reshape2_1.4.5
## [40] tzdb_0.5.0 cachem_1.1.0 splines_4.5.3
## [43] parallel_4.5.3 vctrs_0.7.1 hardhat_1.4.2
## [46] Matrix_1.7-4 jsonlite_2.0.0 hms_1.1.4
## [49] ggrepel_0.9.8 Formula_1.2-5 listenv_0.10.1
## [52] systemfonts_1.3.2 foreach_1.5.2 gower_1.0.2
## [55] jquerylib_0.1.4 recipes_1.3.1 glue_1.8.0
## [58] parallelly_1.46.1 ggstats_0.13.0 codetools_0.2-20
## [61] stringi_1.8.7 gtable_0.3.6 pillar_1.11.1
## [64] htmltools_0.5.9 ipred_0.9-15 lava_1.8.2
## [67] R6_2.6.1 textshaping_1.0.5 evaluate_1.0.5
## [70] backports_1.5.0 broom_1.0.12 bslib_0.10.0
## [73] class_7.3-23 Rcpp_1.1.1 svglite_2.2.2
## [76] nlme_3.1-168 prodlim_2026.03.11 mgcv_1.9-4
## [79] xfun_0.56 pkgconfig_2.0.3 ModelMetrics_1.2.2.2