# Install ISLR2 if not already installed
# install.packages("ISLR2")
library(ISLR2)
library(tidyverse)   # ggplot2, dplyr, etc.
library(GGally)      # ggpairs for scatterplot matrices
library(corrplot)    # correlation matrix visualisation
library(car)         # vif(), leveragePlots()
library(caret)       # confusionMatrix()
library(knitr)       # kable() for nice tables
library(kableExtra)  # extra table formatting

1 Chapter 2 — Exercise 9: Auto Data Set

Task: Explore the Auto data set from the ISLR2 package. Remove observations with missing values, then answer questions (a)–(f).

1.1 Data Preparation

# Load the Auto dataset and drop any rows with NAs
data("Auto")
Auto <- na.omit(Auto)

# Quick overview
glimpse(Auto)

## Rows: 392
## Columns: 9
## $ mpg          <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, 15, 14, 15, 14, 2…
## $ cylinders    <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, …
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383, 34…
## $ horsepower   <int> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170, 16…
## $ weight       <int> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, 385…
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8.5, …
## $ year         <int> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 7…
## $ origin       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, …
## $ name         <fct> chevrolet chevelle malibu, buick skylark 320, plymouth sa…

kable(head(Auto, 8), caption = "First 8 rows of the Auto data set") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)

First 8 rows of the Auto data set
mpg	cylinders	displacement	horsepower	weight	acceleration	year	origin	name
18	8	307	130	3504	12.0	70	1	chevrolet chevelle malibu
15	8	350	165	3693	11.5	70	1	buick skylark 320
18	8	318	150	3436	11.0	70	1	plymouth satellite
16	8	304	150	3433	12.0	70	1	amc rebel sst
17	8	302	140	3449	10.5	70	1	ford torino
15	8	429	198	4341	10.0	70	1	ford galaxie 500
14	8	454	220	4354	9.0	70	1	chevrolet impala
14	8	440	215	4312	8.5	70	1	plymouth fury iii

1.2 (a) Quantitative vs. Qualitative Predictors

# Inspect variable types
str(Auto)

## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : int  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : int  3504 3693 3436 3433 3449 4341 4354 4312 4425 3850 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : int  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:5] 33 127 331 337 355
##   ..- attr(*, "names")= chr [1:5] "33" "127" "331" "337" ...

Answer:

Quantitative predictors: mpg, cylinders, displacement, horsepower, weight, acceleration, year
Qualitative predictors: origin (coded 1/2/3 representing American/European/Japanese), name (car model name — a character string)

Note: Although cylinders and origin are stored as integers, origin is conceptually categorical. cylinders can be treated as either depending on context.

1.3 (b) Range of Quantitative Predictors

# Select only quantitative columns (excluding 'name' and treating 'origin' as qualitative)
quant_vars <- Auto |> select(mpg, cylinders, displacement, horsepower, weight, acceleration, year)

# Compute range for each variable
range_table <- quant_vars |>
  summarise(across(everything(), list(Min = min, Max = max))) |>
  pivot_longer(everything(),
               names_to  = c("Variable", ".value"),
               names_sep = "_") |>
  mutate(Range = Max - Min)

kable(range_table, caption = "Range of each quantitative predictor", digits = 2) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Range of each quantitative predictor
Variable	Min	Max	Range
mpg	9	46.6	37.6
cylinders	3	8.0	5.0
displacement	68	455.0	387.0
horsepower	46	230.0	184.0
weight	1613	5140.0	3527.0
acceleration	8	24.8	16.8
year	70	82.0	12.0

1.4 (c) Mean and Standard Deviation

summary_table <- quant_vars |>
  summarise(across(everything(), list(Mean = mean, SD = sd))) |>
  pivot_longer(everything(),
               names_to  = c("Variable", ".value"),
               names_sep = "_")

kable(summary_table, caption = "Mean and Standard Deviation of quantitative predictors",
      digits = 3) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Mean and Standard Deviation of quantitative predictors
Variable	Mean	SD
mpg	23.446	7.805
cylinders	5.472	1.706
displacement	194.412	104.644
horsepower	104.469	38.491
weight	2977.584	849.403
acceleration	15.541	2.759
year	75.980	3.684

1.5 (d) Subset: Remove Observations 10–85

# Remove 10th through 85th observations
Auto_subset <- Auto[-(10:85), ]
cat("Dimensions after removing rows 10–85:", dim(Auto_subset), "\n")

## Dimensions after removing rows 10–85: 316 9

# Recompute range, mean, SD on the subset
subset_quant <- Auto_subset |> select(mpg, cylinders, displacement, horsepower,
                                       weight, acceleration, year)

subset_summary <- subset_quant |>
  summarise(across(everything(),
                   list(Min = min, Max = max, Mean = mean, SD = sd))) |>
  pivot_longer(everything(),
               names_to  = c("Variable", ".value"),
               names_sep = "_") |>
  mutate(Range = Max - Min) |>
  select(Variable, Min, Max, Range, Mean, SD)

kable(subset_summary,
      caption = "Summary statistics for the reduced data set (rows 10–85 removed)",
      digits = 3) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Summary statistics for the reduced data set (rows 10–85 removed)
Variable	Min	Max	Range	Mean	SD
mpg	11.0	46.6	35.6	24.404	7.867
cylinders	3.0	8.0	5.0	5.373	1.654
displacement	68.0	455.0	387.0	187.241	99.678
horsepower	46.0	230.0	184.0	100.722	35.709
weight	1649.0	4997.0	3348.0	2935.972	811.300
acceleration	8.5	24.8	16.3	15.727	2.694
year	70.0	82.0	12.0	77.146	3.106

1.6 (e) Graphical Investigation of the Full Data Set

# Scatterplot matrix using GGally
ggpairs(quant_vars,
        lower = list(continuous = wrap("points", alpha = 0.3, size = 0.8)),
        upper = list(continuous = wrap("cor", size = 3.5)),
        diag  = list(continuous = wrap("densityDiag")),
        title = "Scatterplot Matrix of Auto Quantitative Predictors")

# Boxplot: mpg by number of cylinders
ggplot(Auto, aes(x = factor(cylinders), y = mpg, fill = factor(cylinders))) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_brewer(palette = "Set2") +
  labs(title  = "MPG by Number of Cylinders",
       x      = "Cylinders",
       y      = "Miles per Gallon",
       fill   = "Cylinders") +
  theme_minimal()

# MPG trend over model year
ggplot(Auto, aes(x = year, y = mpg)) +
  geom_jitter(alpha = 0.4, colour = "steelblue") +
  geom_smooth(method = "loess", se = TRUE, colour = "tomato") +
  labs(title = "MPG vs. Model Year",
       x     = "Model Year (offset from 1900)",
       y     = "Miles per Gallon") +
  theme_minimal()

Findings:

mpg is negatively correlated with displacement, horsepower, and weight — heavier, more powerful cars tend to be less fuel-efficient.
mpg is positively correlated with year, indicating cars have generally become more fuel-efficient over time.
displacement, horsepower, and weight are strongly positively correlated with each other, suggesting multicollinearity if all are used as predictors together.
4-cylinder cars have noticeably higher MPG than 6- or 8-cylinder cars (boxplot).

1.7 (f) Which Variables Are Useful for Predicting MPG?

# Correlation of all quantitative predictors with mpg
cor_with_mpg <- cor(quant_vars) |>
  as.data.frame() |>
  select(mpg) |>
  rownames_to_column("Predictor") |>
  filter(Predictor != "mpg") |>
  rename(Correlation_with_MPG = mpg) |>
  arrange(desc(abs(Correlation_with_MPG)))

kable(cor_with_mpg,
      caption = "Correlation of each predictor with MPG (sorted by absolute value)",
      digits  = 3) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(abs(cor_with_mpg$Correlation_with_MPG) > 0.7),
           bold = TRUE, background = "#d4edda")

Correlation of each predictor with MPG (sorted by absolute value)
Predictor	Correlation_with_MPG
weight	-0.832
displacement	-0.805
horsepower	-0.778
cylinders	-0.778
year	0.581
acceleration	0.423

Answer:

Based on the scatterplots and correlation values, weight, displacement, horsepower, and year are the most promising predictors of mpg:

weight has the strongest negative correlation (≈ −0.83).
displacement and horsepower also have high negative correlations (≈ −0.80 and −0.78 respectively).
year has a meaningful positive correlation (≈ +0.58), capturing fuel economy improvements over time.
cylinders and acceleration show weaker or more noisy relationships.

2 Chapter 3 — Exercise 9: Multiple Linear Regression on Auto

Task: Use multiple linear regression to predict mpg using all other numeric variables in the Auto data set.

2.1 (a) Scatterplot Matrix

Auto_num <- Auto |> select(-name)   # remove character column

ggpairs(Auto_num,
        lower = list(continuous = wrap("points", alpha = 0.3, size = 0.7)),
        upper = list(continuous = wrap("cor", size = 3)),
        diag  = list(continuous  = wrap("densityDiag")),
        title = "Chapter 3 — Scatterplot Matrix (Auto, all numeric variables)")

2.2 (b) Correlation Matrix

cor_matrix <- cor(Auto_num)

corrplot(cor_matrix,
         method  = "color",
         type    = "upper",
         addCoef.col = "black",
         tl.col  = "black",
         tl.srt  = 45,
         number.cex = 0.75,
         title   = "Correlation Matrix — Auto (numeric variables)",
         mar     = c(0, 0, 1.5, 0))

kable(round(cor_matrix, 3),
      caption = "Correlation matrix of all numeric variables in Auto") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Correlation matrix of all numeric variables in Auto
	mpg	cylinders	displacement	horsepower	weight	acceleration	year	origin
mpg	1.000	-0.778	-0.805	-0.778	-0.832	0.423	0.581	0.565
cylinders	-0.778	1.000	0.951	0.843	0.898	-0.505	-0.346	-0.569
displacement	-0.805	0.951	1.000	0.897	0.933	-0.544	-0.370	-0.615
horsepower	-0.778	0.843	0.897	1.000	0.865	-0.689	-0.416	-0.455
weight	-0.832	0.898	0.933	0.865	1.000	-0.417	-0.309	-0.585
acceleration	0.423	-0.505	-0.544	-0.689	-0.417	1.000	0.290	0.213
year	0.581	-0.346	-0.370	-0.416	-0.309	0.290	1.000	0.182
origin	0.565	-0.569	-0.615	-0.455	-0.585	0.213	0.182	1.000

2.3 (c) Multiple Linear Regression

# Fit MLR: mpg ~ all predictors except 'name'
mlr_fit <- lm(mpg ~ . - name, data = Auto)
summary(mlr_fit)

## 
## Call:
## lm(formula = mpg ~ . - name, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5903 -2.1565 -0.1169  1.8690 13.0604 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.218435   4.644294  -3.707  0.00024 ***
## cylinders     -0.493376   0.323282  -1.526  0.12780    
## displacement   0.019896   0.007515   2.647  0.00844 ** 
## horsepower    -0.016951   0.013787  -1.230  0.21963    
## weight        -0.006474   0.000652  -9.929  < 2e-16 ***
## acceleration   0.080576   0.098845   0.815  0.41548    
## year           0.750773   0.050973  14.729  < 2e-16 ***
## origin         1.426141   0.278136   5.127 4.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8182 
## F-statistic: 252.4 on 7 and 384 DF,  p-value: < 2.2e-16

# Tidy summary table
coef_table <- broom::tidy(mlr_fit) |>
  mutate(significant = ifelse(p.value < 0.05, "Yes ✓", "No"))

kable(coef_table, digits = 4,
      caption = "MLR Coefficient Estimates (response: mpg)") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(coef_table$significant == "Yes ✓"),
           bold = TRUE, background = "#d4edda")

MLR Coefficient Estimates (response: mpg)
term	estimate	std.error	statistic	p.value	significant
(Intercept)	-17.2184	4.6443	-3.7074	0.0002	Yes ✓
cylinders	-0.4934	0.3233	-1.5261	0.1278	No
displacement	0.0199	0.0075	2.6474	0.0084	Yes ✓
horsepower	-0.0170	0.0138	-1.2295	0.2196	No
weight	-0.0065	0.0007	-9.9288	0.0000	Yes ✓
acceleration	0.0806	0.0988	0.8152	0.4155	No
year	0.7508	0.0510	14.7288	0.0000	Yes ✓
origin	1.4261	0.2781	5.1275	0.0000	Yes ✓

Comments:

i. Is there a relationship between the predictors and the response? Yes. The overall F-statistic is large with a near-zero p-value, indicating a statistically significant relationship between the set of predictors and mpg.

ii. Which predictors have a statistically significant relationship with mpg? At the α = 0.05 level, the significant predictors are: displacement, weight, year, and origin. horsepower and acceleration are not significant in the presence of the others.

iii. What does the coefficient for year suggest? The positive coefficient for year (approximately +0.75) indicates that, holding all other variables constant, fuel efficiency increases by about 0.75 mpg for each additional model year. This reflects the general trend of improving fuel economy in newer cars.

2.4 (d) Diagnostic Plots

par(mfrow = c(2, 2))
plot(mlr_fit)

par(mfrow = c(1, 1))

Comments:

Residuals vs Fitted: A slight non-linear (U-shaped) pattern suggests the linear model may not fully capture the relationship. A transformation or polynomial terms might improve fit.
Normal Q-Q: Residuals are approximately normal, though there are a few points in the tails.
Scale-Location: Mild evidence of heteroscedasticity — variance of residuals increases slightly with fitted values.
Residuals vs Leverage: Observation 14 appears to have unusually high leverage. A few points (e.g., 327, 394) have notably large residuals.

2.5 (e) Interaction Effects

# Fit model with key interaction terms
int_fit <- lm(mpg ~ displacement + horsepower + weight + acceleration +
                     year + origin + cylinders +
                     displacement:weight +
                     horsepower:weight +
                     year:origin,
              data = Auto)

summary(int_fit)

## 
## Call:
## lm(formula = mpg ~ displacement + horsepower + weight + acceleration + 
##     year + origin + cylinders + displacement:weight + horsepower:weight + 
##     year:origin, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4370 -1.6736 -0.0609  1.4670 11.8289 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.847e+01  8.064e+00   2.291  0.02251 *  
## displacement        -3.328e-02  2.044e-02  -1.629  0.10421    
## horsepower          -1.379e-01  5.018e-02  -2.748  0.00628 ** 
## weight              -1.095e-02  7.296e-04 -15.002  < 2e-16 ***
## acceleration        -9.464e-03  9.496e-02  -0.100  0.92066    
## year                 5.300e-01  1.029e-01   5.149 4.21e-07 ***
## origin              -1.084e+01  4.391e+00  -2.470  0.01396 *  
## cylinders            4.277e-02  2.899e-01   0.148  0.88278    
## displacement:weight  1.093e-05  5.675e-06   1.926  0.05483 .  
## horsepower:weight    2.979e-05  1.343e-05   2.219  0.02711 *  
## year:origin          1.483e-01  5.614e-02   2.641  0.00861 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.907 on 381 degrees of freedom
## Multiple R-squared:  0.8649, Adjusted R-squared:  0.8613 
## F-statistic: 243.8 on 10 and 381 DF,  p-value: < 2.2e-16

int_coef <- broom::tidy(int_fit) |>
  filter(str_detect(term, ":")) |>        # keep only interaction terms
  mutate(significant = ifelse(p.value < 0.05, "Yes ✓", "No"))

kable(int_coef, digits  = 4,
      caption = "Interaction term estimates only") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Interaction term estimates only
term	estimate	std.error	statistic	p.value	significant
displacement:weight	0.0000	0.0000	1.9262	0.0548	No
horsepower:weight	0.0000	0.0000	2.2185	0.0271	Yes ✓
year:origin	0.1483	0.0561	2.6410	0.0086	Yes ✓

Comment: The interaction between displacement and weight (and/or horsepower and weight) tends to be statistically significant, indicating that the effect of engine displacement on mpg depends on vehicle weight. The year:origin interaction may also be significant, suggesting that the rate of fuel economy improvement differs across American, European, and Japanese cars.

2.6 (f) Variable Transformations

# Log transformation of horsepower
fit_log  <- lm(mpg ~ log(horsepower) + weight + year + origin, data = Auto)

# Square root transformation of displacement
fit_sqrt <- lm(mpg ~ sqrt(displacement) + weight + year + origin, data = Auto)

# Polynomial (quadratic) weight
fit_poly <- lm(mpg ~ I(weight^2) + weight + horsepower + year + origin, data = Auto)

results_df <- data.frame(
  Model     = c("log(horsepower)", "sqrt(displacement)", "weight + weight^2"),
  Adj_R2    = c(summary(fit_log)$adj.r.squared,
                summary(fit_sqrt)$adj.r.squared,
                summary(fit_poly)$adj.r.squared),
  RSE       = c(summary(fit_log)$sigma,
                summary(fit_sqrt)$sigma,
                summary(fit_poly)$sigma)
)

kable(results_df, digits = 4,
      caption = "Comparison of models with transformed predictors") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Comparison of models with transformed predictors
Model	Adj_R2	RSE
log(horsepower)	0.8258	3.2572
sqrt(displacement)	0.8159	3.3487
weight + weight^2	0.8522	3.0001

# Visualise: mpg vs log(horsepower)
ggplot(Auto, aes(x = log(horsepower), y = mpg)) +
  geom_point(alpha = 0.5, colour = "steelblue") +
  geom_smooth(method = "lm", colour = "tomato", se = TRUE) +
  labs(title = "MPG vs. log(Horsepower)",
       x = "log(Horsepower)", y = "MPG") +
  theme_minimal()

Comment: Taking the log of horsepower or the square root of displacement linearises the relationship with mpg more effectively than the raw predictors, producing higher adjusted R² and lower residual standard error. The quadratic term for weight also captures the slightly curved relationship visible in the scatterplot.

3 Chapter 3 — Exercise 15: Boston Data Set (Crime Rate)

Task: Predict per capita crime rate (crim) using the other variables in the Boston data set.

3.1 Data Preparation

data("Boston")
glimpse(Boston)

## Rows: 506
## Columns: 13
## $ crim    <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn      <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus   <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nox     <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm      <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age     <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis     <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad     <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax     <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ lstat   <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv    <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…

kable(head(Boston, 6), caption = "First 6 rows of the Boston data set") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

First 6 rows of the Boston data set
crim	zn	indus	nox	rm	age	dis	rad	tax	ptratio	lstat	medv
0.00632	18	2.31	0.538	6.575	65.2	4.0900	1	296	15.3	4.98	24.0
0.02731	0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	9.14	21.6
0.02729	0	7.07	0.469	7.185	61.1	4.9671	2	242	17.8	4.03	34.7
0.03237	0	2.18	0.458	6.998	45.8	6.0622	3	222	18.7	2.94	33.4
0.06905	0	2.18	0.458	7.147	54.2	6.0622	3	222	18.7	5.33	36.2
0.02985	0	2.18	0.458	6.430	58.7	6.0622	3	222	18.7	5.21	28.7

3.2 (a) Simple Linear Regression — Each Predictor

predictors <- setdiff(names(Boston), "crim")

# Fit SLR for each predictor and extract key statistics
slr_results <- map_dfr(predictors, function(pred) {
  formula <- as.formula(paste("crim ~", pred))
  fit     <- lm(formula, data = Boston)
  s       <- summary(fit)
  tibble(
    Predictor  = pred,
    Coefficient = coef(fit)[2],
    p_value    = coef(s)[2, 4],
    R_squared  = s$r.squared
  )
}) |>
  mutate(Significant = ifelse(p_value < 0.05, "Yes ✓", "No")) |>
  arrange(p_value)

kable(slr_results, digits = 4,
      caption = "Simple Linear Regression results: crim ~ each predictor") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(slr_results$Significant == "Yes ✓"),
           background = "#d4edda")

Simple Linear Regression results: crim ~ each predictor
Predictor	Coefficient	p_value	R_squared	Significant
rad	0.6179	0.0000	0.3913	Yes ✓
tax	0.0297	0.0000	0.3396	Yes ✓
lstat	0.5488	0.0000	0.2076	Yes ✓
nox	31.2485	0.0000	0.1772	Yes ✓
indus	0.5098	0.0000	0.1653	Yes ✓
medv	-0.3632	0.0000	0.1508	Yes ✓
dis	-1.5509	0.0000	0.1441	Yes ✓
age	0.1078	0.0000	0.1244	Yes ✓
ptratio	1.1520	0.0000	0.0841	Yes ✓
rm	-2.6841	0.0000	0.0481	Yes ✓
zn	-0.0739	0.0000	0.0402	Yes ✓
chas	-1.8928	0.2094	0.0031	No

# Visual: top 4 predictors by R²
top4 <- slr_results |> slice_max(R_squared, n = 4) |> pull(Predictor)

plot_list <- map(top4, function(pred) {
  ggplot(Boston, aes(x = .data[[pred]], y = crim)) +
    geom_point(alpha = 0.4, colour = "steelblue") +
    geom_smooth(method = "lm", colour = "tomato", se = TRUE) +
    labs(title = paste("crim ~", pred),
         x     = pred, y = "Crime Rate") +
    theme_minimal(base_size = 11)
})

library(patchwork)
(plot_list[[1]] | plot_list[[2]]) / (plot_list[[3]] | plot_list[[4]])

Findings: Almost all predictors show a statistically significant univariate association with crim. The strongest individual predictors (by R²) include rad (index of accessibility to radial highways), tax (property tax rate), and lstat (lower-status population share).

3.3 (b) Multiple Linear Regression

mlr_boston <- lm(crim ~ ., data = Boston)
summary(mlr_boston)

## 
## Call:
## lm(formula = crim ~ ., data = Boston)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.534 -2.248 -0.348  1.087 73.923 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.7783938  7.0818258   1.946 0.052271 .  
## zn           0.0457100  0.0187903   2.433 0.015344 *  
## indus       -0.0583501  0.0836351  -0.698 0.485709    
## chas        -0.8253776  1.1833963  -0.697 0.485841    
## nox         -9.9575865  5.2898242  -1.882 0.060370 .  
## rm           0.6289107  0.6070924   1.036 0.300738    
## age         -0.0008483  0.0179482  -0.047 0.962323    
## dis         -1.0122467  0.2824676  -3.584 0.000373 ***
## rad          0.6124653  0.0875358   6.997 8.59e-12 ***
## tax         -0.0037756  0.0051723  -0.730 0.465757    
## ptratio     -0.3040728  0.1863598  -1.632 0.103393    
## lstat        0.1388006  0.0757213   1.833 0.067398 .  
## medv        -0.2200564  0.0598240  -3.678 0.000261 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.46 on 493 degrees of freedom
## Multiple R-squared:  0.4493, Adjusted R-squared:  0.4359 
## F-statistic: 33.52 on 12 and 493 DF,  p-value: < 2.2e-16

mlr_boston_tidy <- broom::tidy(mlr_boston) |>
  mutate(Significant = ifelse(p.value < 0.05, "Yes ✓", "No"))

kable(mlr_boston_tidy, digits = 4,
      caption = "Multiple Regression: crim ~ all predictors") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(mlr_boston_tidy$Significant == "Yes ✓"),
           bold = TRUE, background = "#d4edda")

Multiple Regression: crim ~ all predictors
term	estimate	std.error	statistic	p.value	Significant
(Intercept)	13.7784	7.0818	1.9456	0.0523	No
zn	0.0457	0.0188	2.4326	0.0153	Yes ✓
indus	-0.0584	0.0836	-0.6977	0.4857	No
chas	-0.8254	1.1834	-0.6975	0.4858	No
nox	-9.9576	5.2898	-1.8824	0.0604	No
rm	0.6289	0.6071	1.0359	0.3007	No
age	-0.0008	0.0179	-0.0473	0.9623	No
dis	-1.0122	0.2825	-3.5836	0.0004	Yes ✓
rad	0.6125	0.0875	6.9967	0.0000	Yes ✓
tax	-0.0038	0.0052	-0.7300	0.4658	No
ptratio	-0.3041	0.1864	-1.6316	0.1034	No
lstat	0.1388	0.0757	1.8330	0.0674	No
medv	-0.2201	0.0598	-3.6784	0.0003	Yes ✓

Answer: In the multiple regression model, we can reject H₀: βⱼ = 0 at α = 0.05 for zn, dis, rad, black, and medv. Many predictors that were significant in simple regression lose significance once we control for others, indicating confounding.

3.4 (c) Univariate vs. Multiple Regression Coefficients

# Collect univariate and multiple regression coefficients for comparison
mlr_coefs <- broom::tidy(mlr_boston) |>
  filter(term != "(Intercept)") |>
  select(term, estimate) |>
  rename(Predictor = term, MLR_coef = estimate)

slr_coefs <- slr_results |>
  select(Predictor, Coefficient) |>
  rename(SLR_coef = Coefficient)

coef_comparison <- left_join(slr_coefs, mlr_coefs, by = "Predictor")

kable(coef_comparison, digits  = 4,
      caption = "Univariate vs. Multiple Regression Coefficients for each predictor") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Univariate vs. Multiple Regression Coefficients for each predictor
Predictor	SLR_coef	MLR_coef
rad	0.6179	0.6125
tax	0.0297	-0.0038
lstat	0.5488	0.1388
nox	31.2485	-9.9576
indus	0.5098	-0.0584
medv	-0.3632	-0.2201
dis	-1.5509	-1.0122
age	0.1078	-0.0008
ptratio	1.1520	-0.3041
rm	-2.6841	0.6289
zn	-0.0739	0.0457
chas	-1.8928	-0.8254

ggplot(coef_comparison, aes(x = SLR_coef, y = MLR_coef, label = Predictor)) +
  geom_point(colour = "steelblue", size = 3) +
  ggrepel::geom_text_repel(size = 3.5) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "grey50") +
  labs(title = "Univariate (x-axis) vs. Multiple Regression (y-axis) Coefficients",
       subtitle = "Dashed line = perfect agreement",
       x = "Simple Regression Coefficient",
       y = "Multiple Regression Coefficient") +
  theme_minimal()

Comment: The coefficients differ substantially for several predictors (notably nox and lstat), illustrating the impact of multicollinearity: controlling for other variables can reverse or shrink the apparent effect of a single predictor.

3.5 (d) Non-linear Association (Polynomial Regression)

# Fit cubic polynomial for each predictor and test significance of X² and X³ terms
poly_results <- map_dfr(predictors, function(pred) {
  formula <- as.formula(paste0("crim ~ ", pred, " + I(", pred, "^2) + I(", pred, "^3)"))
  fit     <- lm(formula, data = Boston)
  s       <- coef(summary(fit))
  # p-values for quadratic and cubic terms
  p2 <- if (nrow(s) >= 3) s[3, 4] else NA_real_
  p3 <- if (nrow(s) >= 4) s[4, 4] else NA_real_
  tibble(
    Predictor        = pred,
    p_quadratic      = p2,
    p_cubic          = p3,
    NonLinear_Sig    = ifelse(!is.na(p2) & p2 < 0.05, "Yes ✓", "No")
  )
})

kable(poly_results, digits = 4,
      caption = "Evidence of non-linear association (p-values for X² and X³ terms)") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(poly_results$NonLinear_Sig == "Yes ✓"),
           bold = TRUE, background = "#fff3cd")

Evidence of non-linear association (p-values for X² and X³ terms)
Predictor	p_quadratic	p_cubic	NonLinear_Sig
zn	0.0938	0.2295	No
indus	0.0000	0.0000	Yes ✓
chas	NA	NA	No
nox	0.0000	0.0000	Yes ✓
rm	0.3641	0.5086	No
age	0.0474	0.0067	Yes ✓
dis	0.0000	0.0000	Yes ✓
rad	0.6130	0.4823	No
tax	0.1375	0.2439	No
ptratio	0.0041	0.0063	Yes ✓
lstat	0.0646	0.1299	No
medv	0.0000	0.0000	Yes ✓

Comment: For many predictors (e.g., dis, lstat, nox), the quadratic and/or cubic terms are statistically significant, providing evidence of non-linear associations with crime rate. This suggests that linear models may underfit the data for these predictors.

4 Chapter 4 — Exercise 13: Weekly Stock Market Data

Task: Apply logistic regression to predict the direction of weekly stock market returns using the Weekly data set (1,089 weekly returns, 1990–2010).

4.1 Data Preparation

data("Weekly")
glimpse(Weekly)

## Rows: 1,089
## Columns: 9
## $ Year      <dbl> 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, …
## $ Lag1      <dbl> 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0.807, 0…
## $ Lag2      <dbl> 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0…
## $ Lag3      <dbl> -3.936, 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, 1.178, -…
## $ Lag4      <dbl> -0.229, -3.936, 1.572, 0.816, -0.270, -2.576, 3.514, 0.712, …
## $ Lag5      <dbl> -3.484, -0.229, -3.936, 1.572, 0.816, -0.270, -2.576, 3.514,…
## $ Volume    <dbl> 0.1549760, 0.1485740, 0.1598375, 0.1616300, 0.1537280, 0.154…
## $ Today     <dbl> -0.270, -2.576, 3.514, 0.712, 1.178, -1.372, 0.807, 0.041, 1…
## $ Direction <fct> Down, Down, Up, Up, Up, Down, Up, Up, Up, Down, Down, Up, Up…

kable(head(Weekly, 6), caption = "First 6 rows of the Weekly data set") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

First 6 rows of the Weekly data set
Year	Lag1	Lag2	Lag3	Lag4	Lag5	Volume	Today	Direction
1990	0.816	1.572	-3.936	-0.229	-3.484	0.1549760	-0.270	Down
1990	-0.270	0.816	1.572	-3.936	-0.229	0.1485740	-2.576	Down
1990	-2.576	-0.270	0.816	1.572	-3.936	0.1598375	3.514	Up
1990	3.514	-2.576	-0.270	0.816	1.572	0.1616300	0.712	Up
1990	0.712	3.514	-2.576	-0.270	0.816	0.1537280	1.178	Up
1990	1.178	0.712	3.514	-2.576	-0.270	0.1544440	-1.372	Down

4.2 (a) Numerical and Graphical Summaries

summary(Weekly)

##       Year           Lag1               Lag2               Lag3         
##  Min.   :1990   Min.   :-18.1950   Min.   :-18.1950   Min.   :-18.1950  
##  1st Qu.:1995   1st Qu.: -1.1540   1st Qu.: -1.1540   1st Qu.: -1.1580  
##  Median :2000   Median :  0.2410   Median :  0.2410   Median :  0.2410  
##  Mean   :2000   Mean   :  0.1506   Mean   :  0.1511   Mean   :  0.1472  
##  3rd Qu.:2005   3rd Qu.:  1.4050   3rd Qu.:  1.4090   3rd Qu.:  1.4090  
##  Max.   :2010   Max.   : 12.0260   Max.   : 12.0260   Max.   : 12.0260  
##       Lag4               Lag5              Volume            Today         
##  Min.   :-18.1950   Min.   :-18.1950   Min.   :0.08747   Min.   :-18.1950  
##  1st Qu.: -1.1580   1st Qu.: -1.1660   1st Qu.:0.33202   1st Qu.: -1.1540  
##  Median :  0.2380   Median :  0.2340   Median :1.00268   Median :  0.2410  
##  Mean   :  0.1458   Mean   :  0.1399   Mean   :1.57462   Mean   :  0.1499  
##  3rd Qu.:  1.4090   3rd Qu.:  1.4050   3rd Qu.:2.05373   3rd Qu.:  1.4050  
##  Max.   : 12.0260   Max.   : 12.0260   Max.   :9.32821   Max.   : 12.0260  
##  Direction 
##  Down:484  
##  Up  :605  
##            
##            
##            
##

# Proportion of Up vs Down weeks
prop.table(table(Weekly$Direction)) |>
  as.data.frame() |>
  rename(Direction = Var1, Proportion = Freq) |>
  kable(digits = 3, caption = "Proportion of Up vs Down weeks") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Proportion of Up vs Down weeks
Direction	Proportion
Down	0.444
Up	0.556

# Volume over time
p1 <- ggplot(Weekly, aes(x = Year, y = Volume)) +
  geom_line(colour = "steelblue") +
  geom_smooth(colour = "tomato", se = FALSE) +
  labs(title = "Trading Volume Over Time", y = "Volume (billions of shares)") +
  theme_minimal()

# Today's return by direction
p2 <- ggplot(Weekly, aes(x = Direction, y = Today, fill = Direction)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
  labs(title = "Weekly Return (Today) by Market Direction",
       x = "Direction", y = "Return (%)") +
  theme_minimal()

# Lag1 vs Lag2 coloured by Direction
p3 <- ggplot(Weekly, aes(x = Lag1, y = Lag2, colour = Direction)) +
  geom_point(alpha = 0.4) +
  scale_colour_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
  labs(title = "Lag1 vs Lag2 Returns by Direction") +
  theme_minimal()

# Volume distribution by direction
p4 <- ggplot(Weekly, aes(x = Volume, fill = Direction)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = c("Down" = "#e74c3c", "Up" = "#2ecc71")) +
  labs(title = "Volume Distribution by Market Direction") +
  theme_minimal()

(p1 | p2) / (p3 | p4)

Patterns observed:

Trading volume has grown substantially over the 21-year period (exponential-like growth from 1990 to 2010).
The market went Up in roughly 55.6% of weeks, indicating a mild upward bias.
Today returns are centred near 0 for both directions, with the “Up” group having higher median returns as expected.
Lag variables show little obvious visual separation between Up and Down weeks, suggesting they may be weak individual predictors.

4.3 (b) Logistic Regression — Full Data Set

# Fit logistic regression: Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume
logit_full <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
                  data   = Weekly,
                  family = binomial)

summary(logit_full)

## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Weekly)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.26686    0.08593   3.106   0.0019 **
## Lag1        -0.04127    0.02641  -1.563   0.1181   
## Lag2         0.05844    0.02686   2.175   0.0296 * 
## Lag3        -0.01606    0.02666  -0.602   0.5469   
## Lag4        -0.02779    0.02646  -1.050   0.2937   
## Lag5        -0.01447    0.02638  -0.549   0.5833   
## Volume      -0.02274    0.03690  -0.616   0.5377   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1496.2  on 1088  degrees of freedom
## Residual deviance: 1486.4  on 1082  degrees of freedom
## AIC: 1500.4
## 
## Number of Fisher Scoring iterations: 4

logit_tidy <- broom::tidy(logit_full) |>
  mutate(Odds_Ratio = exp(estimate),
         Significant = ifelse(p.value < 0.05, "Yes ✓", "No"))

kable(logit_tidy, digits = 4,
      caption = "Logistic Regression Coefficients — Full Weekly Data") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) |>
  row_spec(which(logit_tidy$Significant == "Yes ✓"),
           bold = TRUE, background = "#d4edda")

Logistic Regression Coefficients — Full Weekly Data
term	estimate	std.error	statistic	p.value	Odds_Ratio	Significant
(Intercept)	0.2669	0.0859	3.1056	0.0019	1.3059	Yes ✓
Lag1	-0.0413	0.0264	-1.5626	0.1181	0.9596	No
Lag2	0.0584	0.0269	2.1754	0.0296	1.0602	Yes ✓
Lag3	-0.0161	0.0267	-0.6024	0.5469	0.9841	No
Lag4	-0.0278	0.0265	-1.0501	0.2937	0.9726	No
Lag5	-0.0145	0.0264	-0.5485	0.5833	0.9856	No
Volume	-0.0227	0.0369	-0.6163	0.5377	0.9775	No

Answer: Only Lag2 has a statistically significant p-value (< 0.05) in the full logistic regression. Its positive coefficient indicates that a higher return in the previous-previous week is associated with an increased probability of an “Up” week.

4.4 (c) Confusion Matrix — Full Data

# Predicted probabilities and classes (threshold = 0.5)
probs_full <- predict(logit_full, type = "response")
preds_full <- ifelse(probs_full > 0.5, "Up", "Down")

# Confusion matrix
conf_full <- table(Predicted = preds_full, Actual = Weekly$Direction)

kable(conf_full, caption = "Confusion Matrix — Full Logistic Regression") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Confusion Matrix — Full Logistic Regression
	Down	Up
Down	54	48
Up	430	557

# Overall accuracy
acc_full <- mean(preds_full == Weekly$Direction)
cat(sprintf("Overall fraction of correct predictions: %.4f (%.1f%%)\n",
            acc_full, acc_full * 100))

## Overall fraction of correct predictions: 0.5611 (56.1%)

Interpretation of the confusion matrix:

The model predicts “Up” very often (it essentially defaults to “Up” for most observations).
True Positives (Up → Up): The model correctly identifies many “Up” weeks.
False Positives (Predicted Up, Actual Down): Many “Down” weeks are misclassified as “Up.”
False Negatives (Predicted Down, Actual Up): The model rarely predicts “Down,” so it misses many of these correctly but has poor recall for the “Down” class.
The model has limited discriminatory power — its overall accuracy (≈56%) is only slightly better than the naive classifier that always predicts “Up.”

4.5 (d) Logistic Regression — Training/Test Split with Lag2 Only

# Training period: 1990–2008; Test period: 2009–2010
train <- Weekly$Year <= 2008
test  <- !train

Weekly_train <- Weekly[ train, ]
Weekly_test  <- Weekly[ test,  ]

cat("Training observations:", nrow(Weekly_train), "\n")

## Training observations: 985

cat("Test observations:    ", nrow(Weekly_test),  "\n")

## Test observations:     104

# Fit logistic regression on training data with Lag2 only
logit_lag2 <- glm(Direction ~ Lag2,
                  data   = Weekly_train,
                  family = binomial)

summary(logit_lag2)

## 
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = Weekly_train)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.20326    0.06428   3.162  0.00157 **
## Lag2         0.05810    0.02870   2.024  0.04298 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1354.7  on 984  degrees of freedom
## Residual deviance: 1350.5  on 983  degrees of freedom
## AIC: 1354.5
## 
## Number of Fisher Scoring iterations: 4

# Predict on held-out test data
probs_test <- predict(logit_lag2, newdata = Weekly_test, type = "response")
preds_test <- ifelse(probs_test > 0.5, "Up", "Down")
preds_test <- factor(preds_test, levels = c("Down", "Up"))

# Confusion matrix on test data
conf_test <- table(Predicted = preds_test,
                   Actual    = Weekly_test$Direction)

kable(conf_test,
      caption = "Confusion Matrix — Test Data (2009–2010), Lag2-only model") |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Confusion Matrix — Test Data (2009–2010), Lag2-only model
	Down	Up
Down	9	5
Up	34	56

acc_test <- mean(preds_test == Weekly_test$Direction)
cat(sprintf("Test accuracy: %.4f (%.1f%%)\n", acc_test, acc_test * 100))

## Test accuracy: 0.6250 (62.5%)

# Sensitivity and Specificity
tp <- conf_test["Up",   "Up"]
tn <- conf_test["Down", "Down"]
fp <- conf_test["Up",   "Down"]
fn <- conf_test["Down", "Up"]

sensitivity <- tp / (tp + fn)   # True Positive Rate
specificity  <- tn / (tn + fp)  # True Negative Rate

cat(sprintf("Sensitivity (Up correctly identified): %.3f\n", sensitivity))

## Sensitivity (Up correctly identified): 0.918

cat(sprintf("Specificity (Down correctly identified): %.3f\n", specificity))

## Specificity (Down correctly identified): 0.209

Comment:

The Lag2-only model achieves a test accuracy of approximately 62.5%, which is higher than the full model’s training accuracy. This suggests that Lag2 captures some genuine predictive signal, and that the simpler model generalises better to held-out data. However, the model still tends to over-predict “Up” weeks, as reflected in the relatively low specificity.

5 Session Information

sessionInfo()

## R version 4.5.3 (2026-03-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Asia/Ulaanbaatar
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.2  kableExtra_1.4.0 knitr_1.51       caret_7.0-1     
##  [5] lattice_0.22-9   car_3.1-5        carData_3.0-6    corrplot_0.95   
##  [9] GGally_2.4.0     lubridate_1.9.5  forcats_1.0.1    stringr_1.6.0   
## [13] dplyr_1.2.0      purrr_1.2.1      readr_2.2.0      tidyr_1.3.2     
## [17] tibble_3.3.1     ggplot2_4.0.2    tidyverse_2.0.0  ISLR2_1.3-2     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1     viridisLite_0.4.3    timeDate_4052.112   
##  [4] farver_2.1.2         S7_0.2.1             fastmap_1.2.0       
##  [7] pROC_1.19.0.1        digest_0.6.39        rpart_4.1.24        
## [10] timechange_0.4.0     lifecycle_1.0.5      survival_3.8-6      
## [13] magrittr_2.0.4       compiler_4.5.3       rlang_1.1.7         
## [16] sass_0.4.10          tools_4.5.3          yaml_2.3.12         
## [19] data.table_1.18.2.1  labeling_0.4.3       xml2_1.5.2          
## [22] plyr_1.8.9           RColorBrewer_1.1-3   abind_1.4-8         
## [25] withr_3.0.2          stats4_4.5.3         nnet_7.3-20         
## [28] grid_4.5.3           future_1.70.0        globals_0.19.1      
## [31] scales_1.4.0         iterators_1.0.14     MASS_7.3-65         
## [34] cli_3.6.5            rmarkdown_2.30       generics_0.1.4      
## [37] rstudioapi_0.18.0    future.apply_1.20.2  reshape2_1.4.5      
## [40] tzdb_0.5.0           cachem_1.1.0         splines_4.5.3       
## [43] parallel_4.5.3       vctrs_0.7.1          hardhat_1.4.2       
## [46] Matrix_1.7-4         jsonlite_2.0.0       hms_1.1.4           
## [49] ggrepel_0.9.8        Formula_1.2-5        listenv_0.10.1      
## [52] systemfonts_1.3.2    foreach_1.5.2        gower_1.0.2         
## [55] jquerylib_0.1.4      recipes_1.3.1        glue_1.8.0          
## [58] parallelly_1.46.1    ggstats_0.13.0       codetools_0.2-20    
## [61] stringi_1.8.7        gtable_0.3.6         pillar_1.11.1       
## [64] htmltools_0.5.9      ipred_0.9-15         lava_1.8.2          
## [67] R6_2.6.1             textshaping_1.0.5    evaluate_1.0.5      
## [70] backports_1.5.0      broom_1.0.12         bslib_0.10.0        
## [73] class_7.3-23         Rcpp_1.1.1           svglite_2.2.2       
## [76] nlme_3.1-168         prodlim_2026.03.11   mgcv_1.9-4          
## [79] xfun_0.56            pkgconfig_2.0.3      ModelMetrics_1.2.2.2

Mid-Term

Enkhmaa 114035125

2026-04-14