Exercise #8: Polynomial Regression
# Load dataset
data("Auto", package = "ISLR2")
Auto <- as.data.frame(Auto)
# Fit polynomial regression models
lm_fit1 <- lm(mpg ~ horsepower, data = Auto)
lm_fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto)
lm_fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto)
# Summary of models
summary(lm_fit1)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## horsepower -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
summary(lm_fit2)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 2), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7135 -2.5943 -0.0859 2.2868 15.8961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459 0.2209 106.13 <2e-16 ***
## poly(horsepower, 2)1 -120.1377 4.3739 -27.47 <2e-16 ***
## poly(horsepower, 2)2 44.0895 4.3739 10.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared: 0.6876, Adjusted R-squared: 0.686
## F-statistic: 428 on 2 and 389 DF, p-value: < 2.2e-16
summary(lm_fit3)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 3), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7039 -2.4491 -0.1519 2.2035 15.8159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.446 0.221 106.105 <2e-16 ***
## poly(horsepower, 3)1 -120.138 4.375 -27.460 <2e-16 ***
## poly(horsepower, 3)2 44.090 4.375 10.078 <2e-16 ***
## poly(horsepower, 3)3 -3.949 4.375 -0.903 0.367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.375 on 388 degrees of freedom
## Multiple R-squared: 0.6882, Adjusted R-squared: 0.6858
## F-statistic: 285.5 on 3 and 388 DF, p-value: < 2.2e-16
# Compare models using ANOVA
anova(lm_fit1, lm_fit2, lm_fit3)
## Analysis of Variance Table
##
## Model 1: mpg ~ horsepower
## Model 2: mpg ~ poly(horsepower, 2)
## Model 3: mpg ~ poly(horsepower, 3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 390 9385.9
## 2 389 7442.0 1 1943.89 101.5599 <2e-16 ***
## 3 388 7426.4 1 15.59 0.8147 0.3673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Visualization
ggplot(Auto, aes(x = horsepower, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), col = "blue") +
geom_smooth(method = "lm", formula = y ~ poly(x, 3), col = "red") +
ggtitle("Polynomial Regression: MPG vs Horsepower")

Exercise #10: Logistic Regression
# Load dataset
data("Default", package = "ISLR2")
# Fit logistic regression model
glm_fit <- glm(default ~ balance + income + student, data = Default, family = binomial)
summary(glm_fit)
##
## Call:
## glm(formula = default ~ balance + income + student, family = binomial,
## data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.087e+01 4.923e-01 -22.080 < 2e-16 ***
## balance 5.737e-03 2.319e-04 24.738 < 2e-16 ***
## income 3.033e-06 8.203e-06 0.370 0.71152
## studentYes -6.468e-01 2.363e-01 -2.738 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1571.5 on 9996 degrees of freedom
## AIC: 1579.5
##
## Number of Fisher Scoring iterations: 8
# Predict probabilities for new data
new_data <- data.frame(balance = c(1000, 2000), income = c(40000, 50000), student = c("Yes", "No"))
pred_probs <- predict(glm_fit, newdata = new_data, type = "response")
pred_probs
## 1 2
## 0.003477432 0.680405909
Exercise #14: Linear Discriminant Analysis
(LDA)
# Load dataset
data("Weekly", package = "ISLR2")
# Fit LDA model
lda_fit <- lda(Direction ~ Lag1 + Lag2, data = Weekly)
lda_fit
## Call:
## lda(Direction ~ Lag1 + Lag2, data = Weekly)
##
## Prior probabilities of groups:
## Down Up
## 0.4444444 0.5555556
##
## Group means:
## Lag1 Lag2
## Down 0.28229545 -0.04042355
## Up 0.04521653 0.30428099
##
## Coefficients of linear discriminants:
## LD1
## Lag1 -0.2235194
## Lag2 0.3458272
# Predictions
lda_pred <- predict(lda_fit, Weekly)
table(lda_pred$class, Weekly$Direction)
##
## Down Up
## Down 37 37
## Up 447 568
# Accuracy
mean(lda_pred$class == Weekly$Direction)
## [1] 0.5555556
Including Plots
plot(pressure)
