Packages

library(tidyverse)
library(ISLR)
library(splines)
library(MASS)

Question 8

df.Auto <- Auto

Polynomial

coef(summary(lm(mpg ~ poly(displacement, 5), data = df.Auto))) #Displacement seems to be squared
##                            Estimate Std. Error     t value      Pr(>|t|)
## (Intercept)              23.4459184  0.2208595 106.1576041 9.704609e-288
## poly(displacement, 5)1 -124.2584881  4.3727956 -28.4162583  1.197947e-96
## poly(displacement, 5)2   31.0895299  4.3727956   7.1097607  5.667627e-12
## poly(displacement, 5)3   -4.4655059  4.3727956  -1.0212016  3.077985e-01
## poly(displacement, 5)4    0.7747124  4.3727956   0.1771664  8.594706e-01
## poly(displacement, 5)5    3.2991195  4.3727956   0.7544646  4.510303e-01
coef(summary(lm(mpg ~ poly(horsepower, 5), data = df.Auto))) # Horse power seems to be squared
##                         Estimate Std. Error     t value      Pr(>|t|)
## (Intercept)            23.445918  0.2184909 107.3084577 1.731769e-289
## poly(horsepower, 5)1 -120.137744  4.3258985 -27.7717434  4.617361e-94
## poly(horsepower, 5)2   44.089528  4.3258985  10.1919930  9.240203e-22
## poly(horsepower, 5)3   -3.948849  4.3258985  -0.9128389  3.618971e-01
## poly(horsepower, 5)4   -5.187810  4.3258985  -1.1992446  2.311685e-01
## poly(horsepower, 5)5   13.272187  4.3258985   3.0680763  2.306428e-03
coef(summary(lm(mpg ~ poly(weight, 5), data = df.Auto))) # Weight seems to be squared
##                     Estimate Std. Error      t value      Pr(>|t|)
## (Intercept)        23.445918  0.2114026 110.90646131 7.640371e-295
## poly(weight, 5)1 -128.443605  4.1855587 -30.68732650 1.363957e-105
## poly(weight, 5)2   23.158912  4.1855587   5.53305154  5.817391e-08
## poly(weight, 5)3    0.220417  4.1855587   0.05266131  9.580290e-01
## poly(weight, 5)4   -2.807895  4.1855587  -0.67085301  5.027154e-01
## poly(weight, 5)5    3.830108  4.1855587   0.91507697  3.607225e-01
coef(summary(lm(mpg ~ poly(year, 5), data = df.Auto))) # year seems to be squared
##                  Estimate Std. Error    t value      Pr(>|t|)
## (Intercept)     23.445918  0.3134606 74.7970210 8.365018e-232
## poly(year, 5)1  89.597228  6.2062031 14.4367220  4.419409e-38
## poly(year, 5)2  26.952408  6.2062031  4.3428177  1.799956e-05
## poly(year, 5)3  -5.320927  6.2062031 -0.8573563  3.917800e-01
## poly(year, 5)4 -12.298598  6.2062031 -1.9816622  4.822607e-02
## poly(year, 5)5  -4.217216  6.2062031 -0.6795163  4.972181e-01

There seems to be quadratic relationship with the variables and miles per gallon

lm.fit <- lm(mpg ~ poly(displacement, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), displacement = df.Auto$displacement)
ggplot(data = df.Auto, mapping = aes(x = displacement, y = mpg))+
  geom_point() +
  geom_line(data = y.pred, aes(y = y.pred), color= "red")

lm.fit <- lm(mpg ~ poly(horsepower, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), horsepower = df.Auto$horsepower)
ggplot(data = df.Auto, mapping = aes(x = horsepower, y = mpg))+
  geom_point() +
  geom_line(data = y.pred, aes(y = y.pred), color= "red")

lm.fit <- lm(mpg ~ poly(weight, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), weight = df.Auto$weight)
ggplot(data = df.Auto, mapping = aes(x = weight, y = mpg))+
  geom_point() +
  geom_line(data = y.pred, aes(y = y.pred) ,color= "red")

Question 9

df.9 <- Boston
lm.9.fit <- lm(nox ~ poly(dis, 3), data = df.9)
summary(lm.9.fit)
## 
## Call:
## lm(formula = nox ~ poly(dis, 3), data = df.9)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.121130 -0.040619 -0.009738  0.023385  0.194904 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.554695   0.002759 201.021  < 2e-16 ***
## poly(dis, 3)1 -2.003096   0.062071 -32.271  < 2e-16 ***
## poly(dis, 3)2  0.856330   0.062071  13.796  < 2e-16 ***
## poly(dis, 3)3 -0.318049   0.062071  -5.124 4.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared:  0.7148, Adjusted R-squared:  0.7131 
## F-statistic: 419.3 on 3 and 502 DF,  p-value: < 2.2e-16
for (i in 1:10){
  lm.9.fit <- lm(nox ~ poly(dis, i), data = df.9)
  y.pred <- data.frame(y.pred = predict(lm.9.fit, df.9), dis = df.9$dis)
  gg<- ggplot(data = df.9, mapping = aes(x = dis, y = nox))+
    ggtitle(paste("Degrees ", i)) +
    geom_point() +
    geom_line(data = y.pred, aes(y = y.pred) ,color= "red")
  print(gg)
}