library(tidyverse)
library(ISLR)
library(splines)
library(MASS)
df.Auto <- Auto
coef(summary(lm(mpg ~ poly(displacement, 5), data = df.Auto))) #Displacement seems to be squared
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459184 0.2208595 106.1576041 9.704609e-288
## poly(displacement, 5)1 -124.2584881 4.3727956 -28.4162583 1.197947e-96
## poly(displacement, 5)2 31.0895299 4.3727956 7.1097607 5.667627e-12
## poly(displacement, 5)3 -4.4655059 4.3727956 -1.0212016 3.077985e-01
## poly(displacement, 5)4 0.7747124 4.3727956 0.1771664 8.594706e-01
## poly(displacement, 5)5 3.2991195 4.3727956 0.7544646 4.510303e-01
coef(summary(lm(mpg ~ poly(horsepower, 5), data = df.Auto))) # Horse power seems to be squared
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.445918 0.2184909 107.3084577 1.731769e-289
## poly(horsepower, 5)1 -120.137744 4.3258985 -27.7717434 4.617361e-94
## poly(horsepower, 5)2 44.089528 4.3258985 10.1919930 9.240203e-22
## poly(horsepower, 5)3 -3.948849 4.3258985 -0.9128389 3.618971e-01
## poly(horsepower, 5)4 -5.187810 4.3258985 -1.1992446 2.311685e-01
## poly(horsepower, 5)5 13.272187 4.3258985 3.0680763 2.306428e-03
coef(summary(lm(mpg ~ poly(weight, 5), data = df.Auto))) # Weight seems to be squared
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.445918 0.2114026 110.90646131 7.640371e-295
## poly(weight, 5)1 -128.443605 4.1855587 -30.68732650 1.363957e-105
## poly(weight, 5)2 23.158912 4.1855587 5.53305154 5.817391e-08
## poly(weight, 5)3 0.220417 4.1855587 0.05266131 9.580290e-01
## poly(weight, 5)4 -2.807895 4.1855587 -0.67085301 5.027154e-01
## poly(weight, 5)5 3.830108 4.1855587 0.91507697 3.607225e-01
coef(summary(lm(mpg ~ poly(year, 5), data = df.Auto))) # year seems to be squared
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.445918 0.3134606 74.7970210 8.365018e-232
## poly(year, 5)1 89.597228 6.2062031 14.4367220 4.419409e-38
## poly(year, 5)2 26.952408 6.2062031 4.3428177 1.799956e-05
## poly(year, 5)3 -5.320927 6.2062031 -0.8573563 3.917800e-01
## poly(year, 5)4 -12.298598 6.2062031 -1.9816622 4.822607e-02
## poly(year, 5)5 -4.217216 6.2062031 -0.6795163 4.972181e-01
There seems to be quadratic relationship with the variables and miles per gallon
lm.fit <- lm(mpg ~ poly(displacement, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), displacement = df.Auto$displacement)
ggplot(data = df.Auto, mapping = aes(x = displacement, y = mpg))+
geom_point() +
geom_line(data = y.pred, aes(y = y.pred), color= "red")
lm.fit <- lm(mpg ~ poly(horsepower, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), horsepower = df.Auto$horsepower)
ggplot(data = df.Auto, mapping = aes(x = horsepower, y = mpg))+
geom_point() +
geom_line(data = y.pred, aes(y = y.pred), color= "red")
lm.fit <- lm(mpg ~ poly(weight, 2), data = df.Auto)
y.pred <- data.frame(y.pred = predict(lm.fit, df.Auto), weight = df.Auto$weight)
ggplot(data = df.Auto, mapping = aes(x = weight, y = mpg))+
geom_point() +
geom_line(data = y.pred, aes(y = y.pred) ,color= "red")
df.9 <- Boston
lm.9.fit <- lm(nox ~ poly(dis, 3), data = df.9)
summary(lm.9.fit)
##
## Call:
## lm(formula = nox ~ poly(dis, 3), data = df.9)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.121130 -0.040619 -0.009738 0.023385 0.194904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.554695 0.002759 201.021 < 2e-16 ***
## poly(dis, 3)1 -2.003096 0.062071 -32.271 < 2e-16 ***
## poly(dis, 3)2 0.856330 0.062071 13.796 < 2e-16 ***
## poly(dis, 3)3 -0.318049 0.062071 -5.124 4.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared: 0.7148, Adjusted R-squared: 0.7131
## F-statistic: 419.3 on 3 and 502 DF, p-value: < 2.2e-16
for (i in 1:10){
lm.9.fit <- lm(nox ~ poly(dis, i), data = df.9)
y.pred <- data.frame(y.pred = predict(lm.9.fit, df.9), dis = df.9$dis)
gg<- ggplot(data = df.9, mapping = aes(x = dis, y = nox))+
ggtitle(paste("Degrees ", i)) +
geom_point() +
geom_line(data = y.pred, aes(y = y.pred) ,color= "red")
print(gg)
}