Package

library(ISLR)
library(boot)
library(splines)
library(ggplot2)
attach(Auto)
## The following object is masked from package:ggplot2:
## 
##     mpg

Polinomial MPG x Displacement

poly = 10
poly.fit_disp <- glm(mpg ~ poly(displacement, poly), data=Auto)
summary(poly.fit_disp)
## 
## Call:
## glm(formula = mpg ~ poly(displacement, poly), data = Auto)
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  23.4459     0.2104 111.446  < 2e-16 ***
## poly(displacement, poly)1  -124.2585     4.1653 -29.832  < 2e-16 ***
## poly(displacement, poly)2    31.0895     4.1653   7.464 5.76e-13 ***
## poly(displacement, poly)3    -4.4655     4.1653  -1.072  0.28436    
## poly(displacement, poly)4     0.7747     4.1653   0.186  0.85255    
## poly(displacement, poly)5     3.2991     4.1653   0.792  0.42882    
## poly(displacement, poly)6   -10.4924     4.1653  -2.519  0.01218 *  
## poly(displacement, poly)7    13.4548     4.1653   3.230  0.00134 ** 
## poly(displacement, poly)8   -13.1269     4.1653  -3.151  0.00175 ** 
## poly(displacement, poly)9    13.4015     4.1653   3.217  0.00140 ** 
## poly(displacement, poly)10  -11.2965     4.1653  -2.712  0.00699 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 17.34958)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6610.2  on 381  degrees of freedom
## AIC: 2243.9
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,poly,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Polinomial MPG x Weight

poly = 2
poly.fit_weight <- glm(mpg ~ poly(weight, poly), data=Auto)
summary(poly.fit_weight)
## 
## Call:
## glm(formula = mpg ~ poly(weight, poly), data = Auto)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           23.4459     0.2109 111.151  < 2e-16 ***
## poly(weight, poly)1 -128.4436     4.1763 -30.755  < 2e-16 ***
## poly(weight, poly)2   23.1589     4.1763   5.545 5.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 17.4419)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6784.9  on 389  degrees of freedom
## AIC: 2238.1
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,poly,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Polinomial MPG x Horse Power

poly = 7
poly.fit_hp <- glm(mpg ~ poly(horsepower, poly), data=Auto)
summary(poly.fit_hp)
## 
## Call:
## glm(formula = mpg ~ poly(horsepower, poly), data = Auto)
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               23.446      0.217 108.058  < 2e-16 ***
## poly(horsepower, poly)1 -120.138      4.296 -27.966  < 2e-16 ***
## poly(horsepower, poly)2   44.090      4.296  10.263  < 2e-16 ***
## poly(horsepower, poly)3   -3.949      4.296  -0.919  0.35856    
## poly(horsepower, poly)4   -5.188      4.296  -1.208  0.22794    
## poly(horsepower, poly)5   13.272      4.296   3.089  0.00215 ** 
## poly(horsepower, poly)6   -8.546      4.296  -1.989  0.04737 *  
## poly(horsepower, poly)7    7.981      4.296   1.858  0.06397 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 18.4548)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  7086.6  on 384  degrees of freedom
## AIC: 2265.2
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,poly,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Polinomial MPG x Acceleration

poly = 6
poly.fit_acc <- glm(mpg ~ poly(acceleration, poly), data=Auto)
summary(poly.fit_acc)
## 
## Call:
## glm(formula = mpg ~ poly(acceleration, poly), data = Auto)
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                23.4459     0.3515  66.704  < 2e-16 ***
## poly(acceleration, poly)1  65.3340     6.9591   9.388  < 2e-16 ***
## poly(acceleration, poly)2 -18.7482     6.9591  -2.694  0.00737 ** 
## poly(acceleration, poly)3   6.0643     6.9591   0.871  0.38407    
## poly(acceleration, poly)4  20.7577     6.9591   2.983  0.00304 ** 
## poly(acceleration, poly)5  -5.3550     6.9591  -0.769  0.44207    
## poly(acceleration, poly)6  -7.5655     6.9591  -1.087  0.27766    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 48.42959)
## 
##     Null deviance: 23819  on 391  degrees of freedom
## Residual deviance: 18645  on 385  degrees of freedom
## AIC: 2642.4
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,poly,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Piecewise MPG x Displacement

cuts = 9
piece.fit_disp <- glm(mpg ~ cut(displacement, cuts), data=Auto)
summary(piece.fit_disp)
## 
## Call:
## glm(formula = mpg ~ cut(displacement, cuts), data = Auto)
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       31.5181     0.3947  79.850  < 2e-16 ***
## cut(displacement, cuts)(111,154]  -5.5601     0.6010  -9.252  < 2e-16 ***
## cut(displacement, cuts)(154,197]  -8.2848     1.0770  -7.693 1.23e-13 ***
## cut(displacement, cuts)(197,240] -11.7022     0.7527 -15.547  < 2e-16 ***
## cut(displacement, cuts)(240,283] -12.9788     0.8951 -14.499  < 2e-16 ***
## cut(displacement, cuts)(283,326] -16.2276     0.7656 -21.197  < 2e-16 ***
## cut(displacement, cuts)(326,369] -16.7923     0.8595 -19.537  < 2e-16 ***
## cut(displacement, cuts)(369,412] -17.5494     1.1337 -15.479  < 2e-16 ***
## cut(displacement, cuts)(412,455] -18.2959     1.4710 -12.438  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 18.07284)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6921.9  on 383  degrees of freedom
## AIC: 2258
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x, cuts,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Piecewise MPG x Weight

cuts = 12
piece.fit_weight <- glm(mpg ~ cut(weight, cuts), data=Auto)
summary(piece.fit_weight)
## 
## Call:
## glm(formula = mpg ~ cut(weight, cuts), data = Auto)
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           33.4611     0.9801  34.141  < 2e-16 ***
## cut(weight, cuts)(1.91e+03,2.2e+03]   -1.0722     1.0958  -0.979    0.328    
## cut(weight, cuts)(2.2e+03,2.49e+03]   -6.4215     1.1344  -5.661 2.97e-08 ***
## cut(weight, cuts)(2.49e+03,2.79e+03]  -7.5468     1.1460  -6.585 1.52e-10 ***
## cut(weight, cuts)(2.79e+03,3.08e+03] -10.6536     1.1802  -9.027  < 2e-16 ***
## cut(weight, cuts)(3.08e+03,3.38e+03] -13.5763     1.2184 -11.143  < 2e-16 ***
## cut(weight, cuts)(3.38e+03,3.67e+03] -15.2640     1.2061 -12.656  < 2e-16 ***
## cut(weight, cuts)(3.67e+03,3.96e+03] -16.9315     1.2653 -13.382  < 2e-16 ***
## cut(weight, cuts)(3.96e+03,4.26e+03] -18.6694     1.2965 -14.400  < 2e-16 ***
## cut(weight, cuts)(4.26e+03,4.55e+03] -19.5051     1.2854 -15.175  < 2e-16 ***
## cut(weight, cuts)(4.55e+03,4.85e+03] -20.8611     1.6400 -12.720  < 2e-16 ***
## cut(weight, cuts)(4.85e+03,5.14e+03] -21.4611     1.9602 -10.949  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 17.2901)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6570.2  on 380  degrees of freedom
## AIC: 2243.5
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x, cuts,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Piecewise MPG x Horsepower

cuts = 16
piece.fit_hp <- glm(mpg ~ cut(horsepower, cuts), data=Auto)
summary(piece.fit_hp)
## 
## Call:
## glm(formula = mpg ~ cut(horsepower, cuts), data = Auto)
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     33.6615     1.1968  28.125  < 2e-16 ***
## cut(horsepower, cuts)(57.5,69]   0.3167     1.3555   0.234 0.815373    
## cut(horsepower, cuts)(69,80.5]  -4.6882     1.3202  -3.551 0.000432 ***
## cut(horsepower, cuts)(80.5,92]  -8.3142     1.2952  -6.419 4.13e-10 ***
## cut(horsepower, cuts)(92,104]  -12.1901     1.3463  -9.055  < 2e-16 ***
## cut(horsepower, cuts)(104,115] -12.8884     1.3735  -9.383  < 2e-16 ***
## cut(horsepower, cuts)(115,126] -13.7838     1.8712  -7.366 1.12e-12 ***
## cut(horsepower, cuts)(126,138] -16.6449     1.7275  -9.635  < 2e-16 ***
## cut(horsepower, cuts)(138,150] -17.5352     1.5532 -11.289  < 2e-16 ***
## cut(horsepower, cuts)(150,161] -19.1649     1.4329 -13.375  < 2e-16 ***
## cut(horsepower, cuts)(161,172] -19.2415     1.8151 -10.601  < 2e-16 ***
## cut(horsepower, cuts)(172,184] -20.2115     1.8151 -11.135  < 2e-16 ***
## cut(horsepower, cuts)(184,196] -20.5365     2.4674  -8.323 1.60e-15 ***
## cut(horsepower, cuts)(196,207] -21.3282     2.7640  -7.716 1.09e-13 ***
## cut(horsepower, cuts)(207,218] -21.8615     2.2709  -9.627  < 2e-16 ***
## cut(horsepower, cuts)(218,230] -19.6615     2.2709  -8.658  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 18.62178)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  7001.8  on 376  degrees of freedom
## AIC: 2276.5
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x, cuts,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Piecewise MPG x Acceleration

cuts = 3
piece.fit_acc <- glm(mpg ~ cut(acceleration, cuts), data=Auto)
summary(piece.fit_acc)
## 
## Call:
## glm(formula = mpg ~ cut(acceleration, cuts), data = Auto)
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         17.4146     0.7122  24.453  < 2e-16 ***
## cut(acceleration, cuts)(13.6,19.2]   7.6541     0.8325   9.194  < 2e-16 ***
## cut(acceleration, cuts)(19.2,24.8]  10.5560     1.3926   7.580 2.55e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 48.69093)
## 
##     Null deviance: 23819  on 391  degrees of freedom
## Residual deviance: 18941  on 389  degrees of freedom
## AIC: 2640.5
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x, cuts,raw=T), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Natural Spline MPG x Displacement

knots=11
spline.fit_disp <- glm(mpg ~ ns(displacement, knots), data=Auto)
summary(spline.fit_disp)
## 
## Call:
## glm(formula = mpg ~ ns(displacement, knots), data = Auto)
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 24.072      1.725  13.956  < 2e-16 ***
## ns(displacement, knots)1     4.014      1.791   2.241 0.025602 *  
## ns(displacement, knots)2     8.300      2.382   3.485 0.000549 ***
## ns(displacement, knots)3    -1.989      2.148  -0.926 0.355155    
## ns(displacement, knots)4     3.740      2.191   1.707 0.088692 .  
## ns(displacement, knots)5    -2.691      2.825  -0.953 0.341422    
## ns(displacement, knots)6    -4.750      2.392  -1.986 0.047757 *  
## ns(displacement, knots)7    -5.493      2.482  -2.213 0.027497 *  
## ns(displacement, knots)8    -9.443      2.112  -4.471 1.03e-05 ***
## ns(displacement, knots)9   -15.271      1.696  -9.002  < 2e-16 ***
## ns(displacement, knots)10    3.221      4.067   0.792 0.428812    
## ns(displacement, knots)11  -18.798      1.766 -10.642  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 16.77548)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6374.7  on 380  degrees of freedom
## AIC: 2231.7
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~ns(x, knots), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Natural Spline MPG x Weight

knots=10
spline.fit_weight <- glm(mpg ~ ns(weight, knots), data=Auto)
summary(spline.fit_weight)
## 
## Call:
## glm(formula = mpg ~ ns(weight, knots), data = Auto)
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          33.4812     2.4526  13.651  < 2e-16 ***
## ns(weight, knots)1   -0.9416     2.3707  -0.397 0.691472    
## ns(weight, knots)2   -9.7994     3.0480  -3.215 0.001416 ** 
## ns(weight, knots)3   -5.2222     2.7460  -1.902 0.057955 .  
## ns(weight, knots)4  -10.8104     2.9050  -3.721 0.000228 ***
## ns(weight, knots)5  -12.2591     2.8677  -4.275 2.42e-05 ***
## ns(weight, knots)6  -15.2130     2.8385  -5.360 1.45e-07 ***
## ns(weight, knots)7  -17.2037     2.8052  -6.133 2.16e-09 ***
## ns(weight, knots)8  -19.8434     2.0280  -9.785  < 2e-16 ***
## ns(weight, knots)9  -21.3953     5.5916  -3.826 0.000152 ***
## ns(weight, knots)10 -21.8925     2.2987  -9.524  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 17.16657)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6540.5  on 381  degrees of freedom
## AIC: 2239.7
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~ns(x, knots), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Natural Spline MPG x Horsepower

knots=11
spline.fit_hp <- glm(mpg ~ ns(horsepower, knots), data=Auto)
summary(spline.fit_hp)
## 
## Call:
## glm(formula = mpg ~ ns(horsepower, knots), data = Auto)
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               32.495      1.752  18.550  < 2e-16 ***
## ns(horsepower, knots)1    -3.143      1.846  -1.703 0.089367 .  
## ns(horsepower, knots)2    -3.655      2.620  -1.395 0.163855    
## ns(horsepower, knots)3    -7.430      2.244  -3.311 0.001019 ** 
## ns(horsepower, knots)4    -7.157      2.208  -3.241 0.001296 ** 
## ns(horsepower, knots)5   -11.556      2.201  -5.250 2.54e-07 ***
## ns(horsepower, knots)6   -12.808      2.215  -5.781 1.55e-08 ***
## ns(horsepower, knots)7   -10.417      2.655  -3.924 0.000103 ***
## ns(horsepower, knots)8   -17.936      2.157  -8.316 1.64e-15 ***
## ns(horsepower, knots)9   -21.380      1.998 -10.699  < 2e-16 ***
## ns(horsepower, knots)10  -14.628      4.372  -3.346 0.000901 ***
## ns(horsepower, knots)11  -22.318      2.033 -10.980  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 18.23732)
## 
##     Null deviance: 23819.0  on 391  degrees of freedom
## Residual deviance:  6930.2  on 380  degrees of freedom
## AIC: 2264.4
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~ns(x, knots), 
               lty = 1, col = "blue",se = F)+
  theme_bw()

Natural Spline MPG x Horsepower

knots=7
spline.fit_acc <- glm(mpg ~ ns(acceleration, knots), data=Auto)
summary(spline.fit_acc)
## 
## Call:
## glm(formula = mpg ~ ns(acceleration, knots), data = Auto)
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                14.450      3.530   4.094 5.18e-05 ***
## ns(acceleration, knots)1    6.727      3.410   1.973  0.04923 *  
## ns(acceleration, knots)2   13.110      4.114   3.187  0.00156 ** 
## ns(acceleration, knots)3    7.781      3.793   2.051  0.04091 *  
## ns(acceleration, knots)4   13.760      3.900   3.528  0.00047 ***
## ns(acceleration, knots)5    7.985      3.033   2.633  0.00882 ** 
## ns(acceleration, knots)6   15.990      8.223   1.945  0.05254 .  
## ns(acceleration, knots)7   21.223      3.944   5.381 1.29e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 47.81463)
## 
##     Null deviance: 23819  on 391  degrees of freedom
## Residual deviance: 18361  on 384  degrees of freedom
## AIC: 2638.4
## 
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) + 
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~ns(x, knots), 
               lty = 1, col = "blue",se = F)+
  theme_bw()