Exercises: #8, #10 and #14

Exercise #8: Polynomial Regression

# Load dataset
data("Auto", package = "ISLR2")
Auto <- as.data.frame(Auto)

# Fit polynomial regression models
lm_fit1 <- lm(mpg ~ horsepower, data = Auto)
lm_fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto)
lm_fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto)

# Summary of models
summary(lm_fit1)

## 
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5710  -3.2592  -0.3435   2.7630  16.9240 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.935861   0.717499   55.66   <2e-16 ***
## horsepower  -0.157845   0.006446  -24.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared:  0.6059, Adjusted R-squared:  0.6049 
## F-statistic: 599.7 on 1 and 390 DF,  p-value: < 2.2e-16

summary(lm_fit2)

## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7135  -2.5943  -0.0859   2.2868  15.8961 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            23.4459     0.2209  106.13   <2e-16 ***
## poly(horsepower, 2)1 -120.1377     4.3739  -27.47   <2e-16 ***
## poly(horsepower, 2)2   44.0895     4.3739   10.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared:  0.6876, Adjusted R-squared:  0.686 
## F-statistic:   428 on 2 and 389 DF,  p-value: < 2.2e-16

summary(lm_fit3)

## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 3), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7039  -2.4491  -0.1519   2.2035  15.8159 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            23.446      0.221 106.105   <2e-16 ***
## poly(horsepower, 3)1 -120.138      4.375 -27.460   <2e-16 ***
## poly(horsepower, 3)2   44.090      4.375  10.078   <2e-16 ***
## poly(horsepower, 3)3   -3.949      4.375  -0.903    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.375 on 388 degrees of freedom
## Multiple R-squared:  0.6882, Adjusted R-squared:  0.6858 
## F-statistic: 285.5 on 3 and 388 DF,  p-value: < 2.2e-16

# Compare models using ANOVA
anova(lm_fit1, lm_fit2, lm_fit3)

## Analysis of Variance Table
## 
## Model 1: mpg ~ horsepower
## Model 2: mpg ~ poly(horsepower, 2)
## Model 3: mpg ~ poly(horsepower, 3)
##   Res.Df    RSS Df Sum of Sq        F Pr(>F)    
## 1    390 9385.9                                 
## 2    389 7442.0  1   1943.89 101.5599 <2e-16 ***
## 3    388 7426.4  1     15.59   0.8147 0.3673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Visualization
ggplot(Auto, aes(x = horsepower, y = mpg)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), col = "blue") +
  geom_smooth(method = "lm", formula = y ~ poly(x, 3), col = "red") +
  ggtitle("Polynomial Regression: MPG vs Horsepower")

Exercise #10: Logistic Regression

# Load dataset
data("Default", package = "ISLR2")

# Fit logistic regression model
glm_fit <- glm(default ~ balance + income + student, data = Default, family = binomial)
summary(glm_fit)

## 
## Call:
## glm(formula = default ~ balance + income + student, family = binomial, 
##     data = Default)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.087e+01  4.923e-01 -22.080  < 2e-16 ***
## balance      5.737e-03  2.319e-04  24.738  < 2e-16 ***
## income       3.033e-06  8.203e-06   0.370  0.71152    
## studentYes  -6.468e-01  2.363e-01  -2.738  0.00619 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2920.6  on 9999  degrees of freedom
## Residual deviance: 1571.5  on 9996  degrees of freedom
## AIC: 1579.5
## 
## Number of Fisher Scoring iterations: 8

# Predict probabilities for new data
new_data <- data.frame(balance = c(1000, 2000), income = c(40000, 50000), student = c("Yes", "No"))
pred_probs <- predict(glm_fit, newdata = new_data, type = "response")
pred_probs

##           1           2 
## 0.003477432 0.680405909

Exercise #14: Linear Discriminant Analysis (LDA)

# Load dataset
data("Weekly", package = "ISLR2")

# Fit LDA model
lda_fit <- lda(Direction ~ Lag1 + Lag2, data = Weekly)
lda_fit

## Call:
## lda(Direction ~ Lag1 + Lag2, data = Weekly)
## 
## Prior probabilities of groups:
##      Down        Up 
## 0.4444444 0.5555556 
## 
## Group means:
##            Lag1        Lag2
## Down 0.28229545 -0.04042355
## Up   0.04521653  0.30428099
## 
## Coefficients of linear discriminants:
##             LD1
## Lag1 -0.2235194
## Lag2  0.3458272

# Predictions
lda_pred <- predict(lda_fit, Weekly)
table(lda_pred$class, Weekly$Direction)

##       
##        Down  Up
##   Down   37  37
##   Up    447 568

# Accuracy
mean(lda_pred$class == Weekly$Direction)

## [1] 0.5555556

Including Plots

plot(pressure)

Exercises: #8, #10 and #14

Msibi Sandziso_111021084

2025-03-12

Exercise #8: Polynomial Regression

Exercise #10: Logistic Regression

Exercise #14: Linear Discriminant Analysis (LDA)

Including Plots