Package
library(ISLR)
library(boot)
library(splines)
library(ggplot2)
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
Polinomial MPG x Displacement
poly = 10
poly.fit_disp <- glm(mpg ~ poly(displacement, poly), data=Auto)
summary(poly.fit_disp)
##
## Call:
## glm(formula = mpg ~ poly(displacement, poly), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459 0.2104 111.446 < 2e-16 ***
## poly(displacement, poly)1 -124.2585 4.1653 -29.832 < 2e-16 ***
## poly(displacement, poly)2 31.0895 4.1653 7.464 5.76e-13 ***
## poly(displacement, poly)3 -4.4655 4.1653 -1.072 0.28436
## poly(displacement, poly)4 0.7747 4.1653 0.186 0.85255
## poly(displacement, poly)5 3.2991 4.1653 0.792 0.42882
## poly(displacement, poly)6 -10.4924 4.1653 -2.519 0.01218 *
## poly(displacement, poly)7 13.4548 4.1653 3.230 0.00134 **
## poly(displacement, poly)8 -13.1269 4.1653 -3.151 0.00175 **
## poly(displacement, poly)9 13.4015 4.1653 3.217 0.00140 **
## poly(displacement, poly)10 -11.2965 4.1653 -2.712 0.00699 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17.34958)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6610.2 on 381 degrees of freedom
## AIC: 2243.9
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,poly,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Polinomial MPG x Weight
poly = 2
poly.fit_weight <- glm(mpg ~ poly(weight, poly), data=Auto)
summary(poly.fit_weight)
##
## Call:
## glm(formula = mpg ~ poly(weight, poly), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459 0.2109 111.151 < 2e-16 ***
## poly(weight, poly)1 -128.4436 4.1763 -30.755 < 2e-16 ***
## poly(weight, poly)2 23.1589 4.1763 5.545 5.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17.4419)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6784.9 on 389 degrees of freedom
## AIC: 2238.1
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,poly,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Polinomial MPG x Horse Power
poly = 7
poly.fit_hp <- glm(mpg ~ poly(horsepower, poly), data=Auto)
summary(poly.fit_hp)
##
## Call:
## glm(formula = mpg ~ poly(horsepower, poly), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.446 0.217 108.058 < 2e-16 ***
## poly(horsepower, poly)1 -120.138 4.296 -27.966 < 2e-16 ***
## poly(horsepower, poly)2 44.090 4.296 10.263 < 2e-16 ***
## poly(horsepower, poly)3 -3.949 4.296 -0.919 0.35856
## poly(horsepower, poly)4 -5.188 4.296 -1.208 0.22794
## poly(horsepower, poly)5 13.272 4.296 3.089 0.00215 **
## poly(horsepower, poly)6 -8.546 4.296 -1.989 0.04737 *
## poly(horsepower, poly)7 7.981 4.296 1.858 0.06397 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 18.4548)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 7086.6 on 384 degrees of freedom
## AIC: 2265.2
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,poly,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Polinomial MPG x Acceleration
poly = 6
poly.fit_acc <- glm(mpg ~ poly(acceleration, poly), data=Auto)
summary(poly.fit_acc)
##
## Call:
## glm(formula = mpg ~ poly(acceleration, poly), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.4459 0.3515 66.704 < 2e-16 ***
## poly(acceleration, poly)1 65.3340 6.9591 9.388 < 2e-16 ***
## poly(acceleration, poly)2 -18.7482 6.9591 -2.694 0.00737 **
## poly(acceleration, poly)3 6.0643 6.9591 0.871 0.38407
## poly(acceleration, poly)4 20.7577 6.9591 2.983 0.00304 **
## poly(acceleration, poly)5 -5.3550 6.9591 -0.769 0.44207
## poly(acceleration, poly)6 -7.5655 6.9591 -1.087 0.27766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 48.42959)
##
## Null deviance: 23819 on 391 degrees of freedom
## Residual deviance: 18645 on 385 degrees of freedom
## AIC: 2642.4
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,poly,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Piecewise MPG x Displacement
cuts = 9
piece.fit_disp <- glm(mpg ~ cut(displacement, cuts), data=Auto)
summary(piece.fit_disp)
##
## Call:
## glm(formula = mpg ~ cut(displacement, cuts), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.5181 0.3947 79.850 < 2e-16 ***
## cut(displacement, cuts)(111,154] -5.5601 0.6010 -9.252 < 2e-16 ***
## cut(displacement, cuts)(154,197] -8.2848 1.0770 -7.693 1.23e-13 ***
## cut(displacement, cuts)(197,240] -11.7022 0.7527 -15.547 < 2e-16 ***
## cut(displacement, cuts)(240,283] -12.9788 0.8951 -14.499 < 2e-16 ***
## cut(displacement, cuts)(283,326] -16.2276 0.7656 -21.197 < 2e-16 ***
## cut(displacement, cuts)(326,369] -16.7923 0.8595 -19.537 < 2e-16 ***
## cut(displacement, cuts)(369,412] -17.5494 1.1337 -15.479 < 2e-16 ***
## cut(displacement, cuts)(412,455] -18.2959 1.4710 -12.438 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 18.07284)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6921.9 on 383 degrees of freedom
## AIC: 2258
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x, cuts,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Piecewise MPG x Weight
cuts = 12
piece.fit_weight <- glm(mpg ~ cut(weight, cuts), data=Auto)
summary(piece.fit_weight)
##
## Call:
## glm(formula = mpg ~ cut(weight, cuts), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.4611 0.9801 34.141 < 2e-16 ***
## cut(weight, cuts)(1.91e+03,2.2e+03] -1.0722 1.0958 -0.979 0.328
## cut(weight, cuts)(2.2e+03,2.49e+03] -6.4215 1.1344 -5.661 2.97e-08 ***
## cut(weight, cuts)(2.49e+03,2.79e+03] -7.5468 1.1460 -6.585 1.52e-10 ***
## cut(weight, cuts)(2.79e+03,3.08e+03] -10.6536 1.1802 -9.027 < 2e-16 ***
## cut(weight, cuts)(3.08e+03,3.38e+03] -13.5763 1.2184 -11.143 < 2e-16 ***
## cut(weight, cuts)(3.38e+03,3.67e+03] -15.2640 1.2061 -12.656 < 2e-16 ***
## cut(weight, cuts)(3.67e+03,3.96e+03] -16.9315 1.2653 -13.382 < 2e-16 ***
## cut(weight, cuts)(3.96e+03,4.26e+03] -18.6694 1.2965 -14.400 < 2e-16 ***
## cut(weight, cuts)(4.26e+03,4.55e+03] -19.5051 1.2854 -15.175 < 2e-16 ***
## cut(weight, cuts)(4.55e+03,4.85e+03] -20.8611 1.6400 -12.720 < 2e-16 ***
## cut(weight, cuts)(4.85e+03,5.14e+03] -21.4611 1.9602 -10.949 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17.2901)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6570.2 on 380 degrees of freedom
## AIC: 2243.5
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x, cuts,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Piecewise MPG x Horsepower
cuts = 16
piece.fit_hp <- glm(mpg ~ cut(horsepower, cuts), data=Auto)
summary(piece.fit_hp)
##
## Call:
## glm(formula = mpg ~ cut(horsepower, cuts), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.6615 1.1968 28.125 < 2e-16 ***
## cut(horsepower, cuts)(57.5,69] 0.3167 1.3555 0.234 0.815373
## cut(horsepower, cuts)(69,80.5] -4.6882 1.3202 -3.551 0.000432 ***
## cut(horsepower, cuts)(80.5,92] -8.3142 1.2952 -6.419 4.13e-10 ***
## cut(horsepower, cuts)(92,104] -12.1901 1.3463 -9.055 < 2e-16 ***
## cut(horsepower, cuts)(104,115] -12.8884 1.3735 -9.383 < 2e-16 ***
## cut(horsepower, cuts)(115,126] -13.7838 1.8712 -7.366 1.12e-12 ***
## cut(horsepower, cuts)(126,138] -16.6449 1.7275 -9.635 < 2e-16 ***
## cut(horsepower, cuts)(138,150] -17.5352 1.5532 -11.289 < 2e-16 ***
## cut(horsepower, cuts)(150,161] -19.1649 1.4329 -13.375 < 2e-16 ***
## cut(horsepower, cuts)(161,172] -19.2415 1.8151 -10.601 < 2e-16 ***
## cut(horsepower, cuts)(172,184] -20.2115 1.8151 -11.135 < 2e-16 ***
## cut(horsepower, cuts)(184,196] -20.5365 2.4674 -8.323 1.60e-15 ***
## cut(horsepower, cuts)(196,207] -21.3282 2.7640 -7.716 1.09e-13 ***
## cut(horsepower, cuts)(207,218] -21.8615 2.2709 -9.627 < 2e-16 ***
## cut(horsepower, cuts)(218,230] -19.6615 2.2709 -8.658 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 18.62178)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 7001.8 on 376 degrees of freedom
## AIC: 2276.5
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x, cuts,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Piecewise MPG x Acceleration
cuts = 3
piece.fit_acc <- glm(mpg ~ cut(acceleration, cuts), data=Auto)
summary(piece.fit_acc)
##
## Call:
## glm(formula = mpg ~ cut(acceleration, cuts), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.4146 0.7122 24.453 < 2e-16 ***
## cut(acceleration, cuts)(13.6,19.2] 7.6541 0.8325 9.194 < 2e-16 ***
## cut(acceleration, cuts)(19.2,24.8] 10.5560 1.3926 7.580 2.55e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 48.69093)
##
## Null deviance: 23819 on 391 degrees of freedom
## Residual deviance: 18941 on 389 degrees of freedom
## AIC: 2640.5
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x, cuts,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()

Natural Spline MPG x Displacement
knots=11
spline.fit_disp <- glm(mpg ~ ns(displacement, knots), data=Auto)
summary(spline.fit_disp)
##
## Call:
## glm(formula = mpg ~ ns(displacement, knots), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.072 1.725 13.956 < 2e-16 ***
## ns(displacement, knots)1 4.014 1.791 2.241 0.025602 *
## ns(displacement, knots)2 8.300 2.382 3.485 0.000549 ***
## ns(displacement, knots)3 -1.989 2.148 -0.926 0.355155
## ns(displacement, knots)4 3.740 2.191 1.707 0.088692 .
## ns(displacement, knots)5 -2.691 2.825 -0.953 0.341422
## ns(displacement, knots)6 -4.750 2.392 -1.986 0.047757 *
## ns(displacement, knots)7 -5.493 2.482 -2.213 0.027497 *
## ns(displacement, knots)8 -9.443 2.112 -4.471 1.03e-05 ***
## ns(displacement, knots)9 -15.271 1.696 -9.002 < 2e-16 ***
## ns(displacement, knots)10 3.221 4.067 0.792 0.428812
## ns(displacement, knots)11 -18.798 1.766 -10.642 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 16.77548)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6374.7 on 380 degrees of freedom
## AIC: 2231.7
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=displacement, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~ns(x, knots),
lty = 1, col = "blue",se = F)+
theme_bw()

Natural Spline MPG x Weight
knots=10
spline.fit_weight <- glm(mpg ~ ns(weight, knots), data=Auto)
summary(spline.fit_weight)
##
## Call:
## glm(formula = mpg ~ ns(weight, knots), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.4812 2.4526 13.651 < 2e-16 ***
## ns(weight, knots)1 -0.9416 2.3707 -0.397 0.691472
## ns(weight, knots)2 -9.7994 3.0480 -3.215 0.001416 **
## ns(weight, knots)3 -5.2222 2.7460 -1.902 0.057955 .
## ns(weight, knots)4 -10.8104 2.9050 -3.721 0.000228 ***
## ns(weight, knots)5 -12.2591 2.8677 -4.275 2.42e-05 ***
## ns(weight, knots)6 -15.2130 2.8385 -5.360 1.45e-07 ***
## ns(weight, knots)7 -17.2037 2.8052 -6.133 2.16e-09 ***
## ns(weight, knots)8 -19.8434 2.0280 -9.785 < 2e-16 ***
## ns(weight, knots)9 -21.3953 5.5916 -3.826 0.000152 ***
## ns(weight, knots)10 -21.8925 2.2987 -9.524 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 17.16657)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6540.5 on 381 degrees of freedom
## AIC: 2239.7
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=weight, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~ns(x, knots),
lty = 1, col = "blue",se = F)+
theme_bw()

Natural Spline MPG x Horsepower
knots=11
spline.fit_hp <- glm(mpg ~ ns(horsepower, knots), data=Auto)
summary(spline.fit_hp)
##
## Call:
## glm(formula = mpg ~ ns(horsepower, knots), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.495 1.752 18.550 < 2e-16 ***
## ns(horsepower, knots)1 -3.143 1.846 -1.703 0.089367 .
## ns(horsepower, knots)2 -3.655 2.620 -1.395 0.163855
## ns(horsepower, knots)3 -7.430 2.244 -3.311 0.001019 **
## ns(horsepower, knots)4 -7.157 2.208 -3.241 0.001296 **
## ns(horsepower, knots)5 -11.556 2.201 -5.250 2.54e-07 ***
## ns(horsepower, knots)6 -12.808 2.215 -5.781 1.55e-08 ***
## ns(horsepower, knots)7 -10.417 2.655 -3.924 0.000103 ***
## ns(horsepower, knots)8 -17.936 2.157 -8.316 1.64e-15 ***
## ns(horsepower, knots)9 -21.380 1.998 -10.699 < 2e-16 ***
## ns(horsepower, knots)10 -14.628 4.372 -3.346 0.000901 ***
## ns(horsepower, knots)11 -22.318 2.033 -10.980 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 18.23732)
##
## Null deviance: 23819.0 on 391 degrees of freedom
## Residual deviance: 6930.2 on 380 degrees of freedom
## AIC: 2264.4
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~ns(x, knots),
lty = 1, col = "blue",se = F)+
theme_bw()

Natural Spline MPG x Horsepower
knots=7
spline.fit_acc <- glm(mpg ~ ns(acceleration, knots), data=Auto)
summary(spline.fit_acc)
##
## Call:
## glm(formula = mpg ~ ns(acceleration, knots), data = Auto)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.450 3.530 4.094 5.18e-05 ***
## ns(acceleration, knots)1 6.727 3.410 1.973 0.04923 *
## ns(acceleration, knots)2 13.110 4.114 3.187 0.00156 **
## ns(acceleration, knots)3 7.781 3.793 2.051 0.04091 *
## ns(acceleration, knots)4 13.760 3.900 3.528 0.00047 ***
## ns(acceleration, knots)5 7.985 3.033 2.633 0.00882 **
## ns(acceleration, knots)6 15.990 8.223 1.945 0.05254 .
## ns(acceleration, knots)7 21.223 3.944 5.381 1.29e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 47.81463)
##
## Null deviance: 23819 on 391 degrees of freedom
## Residual deviance: 18361 on 384 degrees of freedom
## AIC: 2638.4
##
## Number of Fisher Scoring iterations: 2
ggplot(Auto,aes(x=acceleration, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~ns(x, knots),
lty = 1, col = "blue",se = F)+
theme_bw()
