library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(splines)
library(ISLR)
attach(Auto)
## The following object is masked from package:ggplot2:
## 
##     mpg
summary(Auto)
##       mpg          cylinders      displacement     horsepower        weight    
##  Min.   : 9.00   Min.   :3.000   Min.   : 68.0   Min.   : 46.0   Min.   :1613  
##  1st Qu.:17.00   1st Qu.:4.000   1st Qu.:105.0   1st Qu.: 75.0   1st Qu.:2225  
##  Median :22.75   Median :4.000   Median :151.0   Median : 93.5   Median :2804  
##  Mean   :23.45   Mean   :5.472   Mean   :194.4   Mean   :104.5   Mean   :2978  
##  3rd Qu.:29.00   3rd Qu.:8.000   3rd Qu.:275.8   3rd Qu.:126.0   3rd Qu.:3615  
##  Max.   :46.60   Max.   :8.000   Max.   :455.0   Max.   :230.0   Max.   :5140  
##                                                                                
##   acceleration        year           origin                      name    
##  Min.   : 8.00   Min.   :70.00   Min.   :1.000   amc matador       :  5  
##  1st Qu.:13.78   1st Qu.:73.00   1st Qu.:1.000   ford pinto        :  5  
##  Median :15.50   Median :76.00   Median :1.000   toyota corolla    :  5  
##  Mean   :15.54   Mean   :75.98   Mean   :1.577   amc gremlin       :  4  
##  3rd Qu.:17.02   3rd Qu.:79.00   3rd Qu.:2.000   amc hornet        :  4  
##  Max.   :24.80   Max.   :82.00   Max.   :3.000   chevrolet chevette:  4  
##                                                  (Other)           :365

plot antara mpg dan horsepower =Auto

ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower")+
  geom_point(alpha=0.6, color="blue") + 
  theme_bw()

regresi linear =Auto

mod_linear = lm(mpg~horsepower,data=Auto)
summary(mod_linear)
## 
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5710  -3.2592  -0.3435   2.7630  16.9240 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.935861   0.717499   55.66   <2e-16 ***
## horsepower  -0.157845   0.006446  -24.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared:  0.6059, Adjusted R-squared:  0.6049 
## F-statistic: 599.7 on 1 and 390 DF,  p-value: < 2.2e-16

plot mpg vs horsepower dalam regresi linear

ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower Regresi Linear")+
  geom_point(alpha=0.55, color="blue") +
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1, col = "red",se = F)+
  theme_bw()

regresi polinomial

mod_polinomial2 = lm(mpg ~ poly(horsepower,2,raw = T),data=Auto)  #Polinomial berderajat 2
summary(mod_polinomial2)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7135  -2.5943  -0.0859   2.2868  15.8961 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   56.9000997  1.8004268   31.60   <2e-16 ***
## poly(horsepower, 2, raw = T)1 -0.4661896  0.0311246  -14.98   <2e-16 ***
## poly(horsepower, 2, raw = T)2  0.0012305  0.0001221   10.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared:  0.6876, Adjusted R-squared:  0.686 
## F-statistic:   428 on 2 and 389 DF,  p-value: < 2.2e-16
pplot.d2<-ggplot(Auto,aes(x=horsepower, y=mpg)) +   #plot polinomial derajat 2
  ggtitle("Plot mpg vs. horsepower Polinomial Derajat 2")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d2

mod_polinomial3 = lm(mpg ~ poly(horsepower,3,raw = T),data=Auto)  #polinomial berderajat 3
summary(mod_polinomial3)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7039  -2.4491  -0.1519   2.2035  15.8159 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    6.068e+01  4.563e+00  13.298  < 2e-16 ***
## poly(horsepower, 3, raw = T)1 -5.689e-01  1.179e-01  -4.824 2.03e-06 ***
## poly(horsepower, 3, raw = T)2  2.079e-03  9.479e-04   2.193   0.0289 *  
## poly(horsepower, 3, raw = T)3 -2.147e-06  2.378e-06  -0.903   0.3673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.375 on 388 degrees of freedom
## Multiple R-squared:  0.6882, Adjusted R-squared:  0.6858 
## F-statistic: 285.5 on 3 and 388 DF,  p-value: < 2.2e-16
pplot.d3<-ggplot(Auto,aes(x=horsepower, y=mpg)) +           #plot polinomial derajat 3
  ggtitle("Plot mpg vs. horsepower Polinomial Derajat 3
          ")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,3,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d3

mod_polinomial4 = lm(mpg ~ poly(horsepower,4,raw = T),data=Auto)  #polinomial berderajat 4
summary(mod_polinomial4)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.8820  -2.5802  -0.1682   2.2100  16.1434 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    4.757e+01  1.196e+01   3.977 8.32e-05 ***
## poly(horsepower, 4, raw = T)1 -7.667e-02  4.313e-01  -0.178    0.859    
## poly(horsepower, 4, raw = T)2 -4.345e-03  5.497e-03  -0.790    0.430    
## poly(horsepower, 4, raw = T)3  3.245e-05  2.926e-05   1.109    0.268    
## poly(horsepower, 4, raw = T)4 -6.530e-08  5.504e-08  -1.186    0.236    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.373 on 387 degrees of freedom
## Multiple R-squared:  0.6893, Adjusted R-squared:  0.6861 
## F-statistic: 214.7 on 4 and 387 DF,  p-value: < 2.2e-16
pplot.d4<-ggplot(Auto,aes(x=horsepower, y=mpg)) +                     #plot polinomial derajat 4
  ggtitle("Plot mpg vs. horsepower Polinomial Derajat 4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d4

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(Linear=MSE(predict(mod_linear),Auto$mpg),
           Poly2=MSE(predict(mod_polinomial2),Auto$mpg),
           Poly3=MSE(predict(mod_polinomial3),Auto$mpg),
           Poly4=MSE(predict(mod_polinomial4),Auto$mpg)
)
##     Linear    Poly2    Poly3    Poly4
## 1 23.94366 18.98477 18.94499 18.87633

regresi fungsi tangga

mod_tangga4 = lm(mpg ~ cut(horsepower,4),data=Auto)
summary(mod_tangga4)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0533  -2.7826  -0.2927   2.8593  17.5467 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  29.0533     0.3581   81.12   <2e-16 ***
## cut(horsepower, 4)(92,138]   -8.4506     0.5946  -14.21   <2e-16 ***
## cut(horsepower, 4)(138,184] -14.2707     0.7005  -20.37   <2e-16 ***
## cut(horsepower, 4)(184,230] -16.2004     1.2647  -12.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.001 on 388 degrees of freedom
## Multiple R-squared:  0.5926, Adjusted R-squared:  0.5894 
## F-statistic: 188.1 on 3 and 388 DF,  p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=4")+
  geom_point(alpha=0.55, color="azure4") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,4), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga5 = lm(mpg ~ cut(horsepower,5),data=Auto)
summary(mod_tangga5)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.3033  -3.0220  -0.5413   2.4074  16.6394 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   31.3033     0.4272   73.27   <2e-16 ***
## cut(horsepower, 5)(82.8,120]  -8.2813     0.5642  -14.68   <2e-16 ***
## cut(horsepower, 5)(120,156]  -15.2427     0.7210  -21.14   <2e-16 ***
## cut(horsepower, 5)(156,193]  -17.5922     1.0036  -17.53   <2e-16 ***
## cut(horsepower, 5)(193,230]  -18.5340     1.3767  -13.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.719 on 387 degrees of freedom
## Multiple R-squared:  0.6382, Adjusted R-squared:  0.6345 
## F-statistic: 170.7 on 4 and 387 DF,  p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=5")+
  geom_point(alpha=0.55, color="azure4") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,5), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga6= lm(mpg ~ cut(horsepower,6),data=Auto)
summary(mod_tangga6)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.0252  -2.9344  -0.1298   2.5172  14.5748 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   32.0252     0.4604   69.56   <2e-16 ***
## cut(horsepower, 6)(76.7,107]  -8.0908     0.5947  -13.60   <2e-16 ***
## cut(horsepower, 6)(107,138]  -12.2742     0.8109  -15.14   <2e-16 ***
## cut(horsepower, 6)(138,169]  -16.9697     0.7850  -21.62   <2e-16 ***
## cut(horsepower, 6)(169,199]  -18.3824     1.1187  -16.43   <2e-16 ***
## cut(horsepower, 6)(199,230]  -19.3889     1.4821  -13.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.672 on 386 degrees of freedom
## Multiple R-squared:  0.6462, Adjusted R-squared:  0.6416 
## F-statistic:   141 on 5 and 386 DF,  p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=6
          ")+
  geom_point(alpha=0.55, color="azure4") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,6), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(Tangga4=MSE(predict(mod_tangga4),Auto$mpg),
           Tangga5=MSE(predict(mod_tangga5),Auto$mpg),
           Tangga6=MSE(predict(mod_tangga6),Auto$mpg)
)
##    Tangga4  Tangga5  Tangga6
## 1 24.75519 21.98227 21.49764

Natural cubic spline

mod_spline4ns = lm(mpg ~ ns(horsepower, knots = c(80,120,160,200)),data=Auto)   #knot=4
summary(mod_spline4ns)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)), 
##     data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.9354  -2.5877  -0.2446   2.2605  15.9892 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                     37.975      1.128  33.677
## ns(horsepower, knots = c(80, 120, 160, 200))1  -21.281      1.254 -16.969
## ns(horsepower, knots = c(80, 120, 160, 200))2  -22.882      2.010 -11.386
## ns(horsepower, knots = c(80, 120, 160, 200))3  -24.045      2.238 -10.743
## ns(horsepower, knots = c(80, 120, 160, 200))4  -34.239      2.950 -11.607
## ns(horsepower, knots = c(80, 120, 160, 200))5  -17.341      2.189  -7.921
##                                               Pr(>|t|)    
## (Intercept)                                    < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))1  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))2  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))3  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))4  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))5 2.54e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.369 on 386 degrees of freedom
## Multiple R-squared:  0.6907, Adjusted R-squared:  0.6867 
## F-statistic: 172.4 on 5 and 386 DF,  p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Natural Cubic Spline k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(80,120,160,200)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(80,120,160,200)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

mod_spline5ns = lm(mpg ~ ns(horsepower, knots = c(80,110,140,170,200)),data=Auto)   #knot=5
summary(mod_spline5ns)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 110, 140, 170, 
##     200)), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.1552  -2.6065  -0.2325   2.2470  15.4384 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                          37.302      1.193  31.263
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1  -19.217      1.220 -15.747
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2  -20.006      1.982 -10.092
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3  -24.965      2.137 -11.684
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4  -22.110      2.766  -7.995
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5  -31.488      3.085 -10.207
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6  -18.837      2.384  -7.903
##                                                    Pr(>|t|)    
## (Intercept)                                         < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1  < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2  < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3  < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 1.53e-14 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5  < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 2.90e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.36 on 385 degrees of freedom
## Multiple R-squared:  0.6927, Adjusted R-squared:  0.6879 
## F-statistic: 144.7 on 6 and 385 DF,  p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
   ggtitle("mpg vs. horsepower Natural Cubic Spline k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(80,110,140,170,200)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(80,110,140,170,200)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(NSpline4=MSE(predict(mod_spline4ns),Auto$mpg),
           NSpline5=MSE(predict(mod_spline5ns),Auto$mpg)
)
##   NSpline4 NSpline5
## 1 18.79342 18.67029
data.frame(Linear=MSE(predict(mod_linear),Auto$mpg),
           Poly2=MSE(predict(mod_polinomial2),Auto$mpg),
           Poly3=MSE(predict(mod_polinomial3),Auto$mpg),
           Poly4=MSE(predict(mod_polinomial4),Auto$mpg),
           Tangga4=MSE(predict(mod_tangga4),Auto$mpg),
           Tangga5=MSE(predict(mod_tangga5),Auto$mpg),
           Tangga6=MSE(predict(mod_tangga6),Auto$mpg),
           NSpline4=MSE(predict(mod_spline4ns),Auto$mpg),
           NSpline5=MSE(predict(mod_spline5ns),Auto$mpg)
)
##     Linear    Poly2    Poly3    Poly4  Tangga4  Tangga5  Tangga6 NSpline4
## 1 23.94366 18.98477 18.94499 18.87633 24.75519 21.98227 21.49764 18.79342
##   NSpline5
## 1 18.67029

Data AMERIKA n=245

Amerika<-Auto%>% 
  select(mpg, horsepower, origin)%>%
  filter(origin == 1)

plot antara mpg dan horsepower

ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower")+
  geom_point(alpha=0.6, color="navy") + 
  theme_bw()

regresi linear

mod_linearA = lm(mpg~horsepower,data=Amerika)
summary(mod_linearA)
## 
## Call:
## lm(formula = mpg ~ horsepower, data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7415  -3.1643  -0.6389   2.4849  13.8357 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.476496   0.857484   40.21   <2e-16 ***
## horsepower  -0.121320   0.006831  -17.76   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.257 on 243 degrees of freedom
## Multiple R-squared:  0.5649, Adjusted R-squared:  0.5631 
## F-statistic: 315.4 on 1 and 243 DF,  p-value: < 2.2e-16

plot mpg vs horsepower dalam regresi linear

ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Linear")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1, col = "blue",se = F)+
  theme_bw()

regresi polinomial Amerika

mod_polinomial2A = lm(mpg ~ poly(horsepower,2,raw = T),data=Amerika)  #Polinomial berderajat 2
summary(mod_polinomial2A)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9497  -2.5745  -0.0895   2.1583  13.1866 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   51.9150173  2.3893530   21.73  < 2e-16 ***
## poly(horsepower, 2, raw = T)1 -0.4104369  0.0379990  -10.80  < 2e-16 ***
## poly(horsepower, 2, raw = T)2  0.0010776  0.0001398    7.71 3.26e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.822 on 242 degrees of freedom
## Multiple R-squared:  0.6507, Adjusted R-squared:  0.6478 
## F-statistic: 225.4 on 2 and 242 DF,  p-value: < 2.2e-16
pplot.d2A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) +   #polinomial derajat 2
   ggtitle("mpg vs. horsepower Polinomial Derajat 2")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d2A

mod_polinomial3A = lm(mpg ~ poly(horsepower,3,raw = T),data=Amerika)  #polinomial berderajat 3
summary(mod_polinomial3A)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3092  -2.3199  -0.2081   2.0794  13.2928 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    6.224e+01  7.181e+00   8.667 6.56e-16 ***
## poly(horsepower, 3, raw = T)1 -6.634e-01  1.703e-01  -3.896 0.000127 ***
## poly(horsepower, 3, raw = T)2  2.995e-03  1.266e-03   2.366 0.018796 *  
## poly(horsepower, 3, raw = T)3 -4.531e-06  2.974e-06  -1.524 0.128867    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.812 on 241 degrees of freedom
## Multiple R-squared:  0.654,  Adjusted R-squared:  0.6497 
## F-statistic: 151.8 on 3 and 241 DF,  p-value: < 2.2e-16
pplot.d3A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) + 
  ggtitle("mpg vs. horsepower Polinomial Derajat 3")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,3,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d3A

mod_polinomial4A = lm(mpg ~ poly(horsepower,4,raw = T),data=Amerika)  #polinomial berderajat 4
summary(mod_polinomial4A)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3162  -2.3080  -0.2287   2.0895  13.3711 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    7.080e+01  1.953e+01   3.625 0.000352 ***
## poly(horsepower, 4, raw = T)1 -9.562e-01  6.440e-01  -1.485 0.138931    
## poly(horsepower, 4, raw = T)2  6.540e-03  7.623e-03   0.858 0.391827    
## poly(horsepower, 4, raw = T)3 -2.252e-05  3.827e-05  -0.588 0.556777    
## poly(horsepower, 4, raw = T)4  3.244e-08  6.879e-08   0.471 0.637715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.818 on 240 degrees of freedom
## Multiple R-squared:  0.6543, Adjusted R-squared:  0.6486 
## F-statistic: 113.6 on 4 and 240 DF,  p-value: < 2.2e-16
pplot.d4A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) + 
  ggtitle("mpg vs. horsepower Polinomial Derajat 4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d4A

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(LinearA=MSE(predict(mod_linearA),Amerika$mpg),
           Poly2A=MSE(predict(mod_polinomial2A),Amerika$mpg),
           Poly3A=MSE(predict(mod_polinomial3A),Amerika$mpg),
           Poly4A=MSE(predict(mod_polinomial4A),Amerika$mpg)
)
##    LinearA   Poly2A   Poly3A   Poly4A
## 1 17.97539 14.43097 14.29326 14.28003

regresi fungsi tangga Amerika

mod_tangga4A = lm(mpg ~ cut(horsepower,4),data=Amerika)
summary(mod_tangga4A)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Amerika)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.805  -2.005  -0.455   1.821  13.195 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   25.8053     0.4230   61.00   <2e-16 ***
## cut(horsepower, 4)(96.5,141]  -7.0258     0.6418  -10.95   <2e-16 ***
## cut(horsepower, 4)(141,186]  -11.3503     0.6800  -16.69   <2e-16 ***
## cut(horsepower, 4)(186,230]  -12.9523     1.0859  -11.93   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.123 on 241 degrees of freedom
## Multiple R-squared:  0.5951, Adjusted R-squared:  0.5901 
## F-statistic: 118.1 on 3 and 241 DF,  p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Fungsi Tangga k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,4), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga5A = lm(mpg ~ cut(horsepower,5),data=Amerika)
summary(mod_tangga5A)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.4268  -2.4712  -0.4712   2.2615  12.7220 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   27.4268     0.5438  50.436   <2e-16 ***
## cut(horsepower, 5)(87.6,123]  -6.1488     0.6912  -8.896   <2e-16 ***
## cut(horsepower, 5)(123,159]  -11.9556     0.7592 -15.747   <2e-16 ***
## cut(horsepower, 5)(159,194]  -13.6883     0.9657 -14.174   <2e-16 ***
## cut(horsepower, 5)(194,230]  -14.6576     1.2528 -11.700   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.069 on 240 degrees of freedom
## Multiple R-squared:  0.6073, Adjusted R-squared:  0.6008 
## F-statistic: 92.79 on 4 and 240 DF,  p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
   ggtitle("mpg vs. horsepower Fungsi Tangga k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,5), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga6A = lm(mpg ~ cut(horsepower,6),data=Amerika)
summary(mod_tangga6A)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3353  -2.6560  -0.3852   1.8235  15.7935 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   28.3353     0.7353  38.535  < 2e-16 ***
## cut(horsepower, 6)(81.7,111]  -6.1288     0.8441  -7.261 5.38e-12 ***
## cut(horsepower, 6)(111,141]  -10.4501     1.1052  -9.455  < 2e-16 ***
## cut(horsepower, 6)(141,171]  -13.6793     0.9531 -14.353  < 2e-16 ***
## cut(horsepower, 6)(171,200]  -15.1588     1.2736 -11.902  < 2e-16 ***
## cut(horsepower, 6)(200,230]  -15.4353     1.5424 -10.007  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.288 on 239 degrees of freedom
## Multiple R-squared:  0.5659, Adjusted R-squared:  0.5568 
## F-statistic: 62.31 on 5 and 239 DF,  p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Fungsi Tangga k=6")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,6), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(Tangga4A=MSE(predict(mod_tangga4),Amerika$mpg),
           Tangga5A=MSE(predict(mod_tangga5),Amerika$mpg),
           TanggaA=MSE(predict(mod_tangga6),Amerika$mpg)
)
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length

## Warning in pred - actual: longer object length is not a multiple of shorter
## object length

## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
##   Tangga4A Tangga5A TanggaA
## 1 93.45857  93.3432 96.5047

Natural cubic spline Amerika

mod_spline4nsA = lm(mpg ~ ns(horsepower, knots = c(80,120,160,200)),data=Amerika)   #knot=4
summary(mod_spline4nsA)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)), 
##     data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4671  -2.1836  -0.3698   2.0508  13.2883 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                     35.039      1.813  19.322
## ns(horsepower, knots = c(80, 120, 160, 200))1  -18.270      1.748 -10.454
## ns(horsepower, knots = c(80, 120, 160, 200))2  -20.386      2.556  -7.975
## ns(horsepower, knots = c(80, 120, 160, 200))3  -20.800      2.173  -9.574
## ns(horsepower, knots = c(80, 120, 160, 200))4  -30.835      4.178  -7.380
## ns(horsepower, knots = c(80, 120, 160, 200))5  -14.921      1.967  -7.586
##                                               Pr(>|t|)    
## (Intercept)                                    < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))1  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))2 6.29e-14 ***
## ns(horsepower, knots = c(80, 120, 160, 200))3  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))4 2.61e-12 ***
## ns(horsepower, knots = c(80, 120, 160, 200))5 7.33e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.819 on 239 degrees of freedom
## Multiple R-squared:  0.6557, Adjusted R-squared:  0.6485 
## F-statistic: 91.02 on 5 and 239 DF,  p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Natural Cubic Spline k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(80,120,160,200)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(80,120,160,200)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

mod_spline5nsA = lm(mpg ~ ns(horsepower, knots = c(80,110,140,170,200)),data=Amerika)   #knot=5
summary(mod_spline5nsA)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 110, 140, 170, 
##     200)), data = Amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5219  -2.1716  -0.3143   2.0250  13.1723 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                          34.495      1.961  17.589
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1  -16.343      1.841  -8.880
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2  -18.485      2.534  -7.296
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3  -20.965      2.471  -8.486
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4  -20.274      2.681  -7.562
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5  -28.611      4.475  -6.393
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6  -15.750      2.164  -7.279
##                                                    Pr(>|t|)    
## (Intercept)                                         < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1  < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2 4.39e-12 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3 2.31e-15 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 8.58e-13 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5 8.51e-10 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 4.86e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.823 on 238 degrees of freedom
## Multiple R-squared:  0.6564, Adjusted R-squared:  0.6477 
## F-statistic: 75.78 on 6 and 238 DF,  p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Natural Cubic Spline k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(80,110,140,170,200)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(80,110,140,170,200)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(NSpline4A=MSE(predict(mod_spline4nsA),Amerika$mpg),
           NSpline5A=MSE(predict(mod_spline5nsA),Amerika$mpg)
)
##   NSpline4A NSpline5A
## 1   14.2242  14.19415

Membandingkan model

data.frame(LinearA=MSE(predict(mod_linearA),Amerika$mpg),
           Poly2A=MSE(predict(mod_polinomial2A),Amerika$mpg),
           Poly3A=MSE(predict(mod_polinomial3A),Amerika$mpg),
           Poly4A=MSE(predict(mod_polinomial4A),Amerika$mpg),
           Tangga4A=MSE(predict(mod_tangga4A),Amerika$mpg),
           Tangga5A=MSE(predict(mod_tangga5A),Amerika$mpg),
           Tangga6A=MSE(predict(mod_tangga6A),Amerika$mpg),
           NSpline4A=MSE(predict(mod_spline4nsA),Amerika$mpg),
           NSpline5A=MSE(predict(mod_spline5nsA),Amerika$mpg)
)
##    LinearA   Poly2A   Poly3A   Poly4A Tangga4A Tangga5A Tangga6A NSpline4A
## 1 17.97539 14.43097 14.29326 14.28003 16.72448 16.22219 17.93286   14.2242
##   NSpline5A
## 1  14.19415

EROPA

Eropa<-Auto%>% 
  select(mpg, horsepower, origin)%>%
  filter(origin == 2)

plot antara mpg dan horsepower

ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower")+
  geom_point(alpha=0.6, color="black") + 
  theme_bw()

regresi linear

mod_linearE = lm(mpg~horsepower,data=Eropa)
summary(mod_linearE)
## 
## Call:
## lm(formula = mpg ~ horsepower, data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4946  -2.8387  -0.4171   2.2148  12.8858 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 45.47373    2.44679  18.585  < 2e-16 ***
## horsepower  -0.22184    0.02948  -7.526 1.87e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.864 on 66 degrees of freedom
## Multiple R-squared:  0.4618, Adjusted R-squared:  0.4537 
## F-statistic: 56.64 on 1 and 66 DF,  p-value: 1.869e-10

plot mpg vs horsepower dalam regresi linear

ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Linear")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1, col = "blue",se = F)+
  theme_bw()

regresi polinomial Eropa

mod_polinomial2E = lm(mpg ~ poly(horsepower,2,raw = T),data=Eropa)  #Polinomial berderajat 2
summary(mod_polinomial2E)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6282  -2.7724  -0.4052   2.2773  12.9676 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   47.1311209  8.2852287   5.689  3.3e-07 ***
## poly(horsepower, 2, raw = T)1 -0.2631496  0.1994015  -1.320    0.192    
## poly(horsepower, 2, raw = T)2  0.0002425  0.0011574   0.210    0.835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.899 on 65 degrees of freedom
## Multiple R-squared:  0.4622, Adjusted R-squared:  0.4456 
## F-statistic: 27.93 on 2 and 65 DF,  p-value: 1.76e-09
pplot.d2E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) +   #polinomial derajat 2
  ggtitle("mpg vs. horsepower Polinomial Derajat 2")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d2E

mod_polinomial3E = lm(mpg ~ poly(horsepower,3,raw = T),data=Eropa)  #polinomial berderajat 3
summary(mod_polinomial3E)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6346  -2.6497  -0.8141   2.1774  12.8935 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                    4.060e+01  2.781e+01   1.460    0.149
## poly(horsepower, 3, raw = T)1 -7.245e-03  1.058e+00  -0.007    0.995
## poly(horsepower, 3, raw = T)2 -2.924e-03  1.291e-02  -0.226    0.822
## poly(horsepower, 3, raw = T)3  1.241e-05  5.039e-05   0.246    0.806
## 
## Residual standard error: 4.935 on 64 degrees of freedom
## Multiple R-squared:  0.4627, Adjusted R-squared:  0.4375 
## F-statistic: 18.37 on 3 and 64 DF,  p-value: 1.041e-08
pplot.d3E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) + 
  ggtitle("mpg vs. horsepower Polinomial Derajat 3")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,3,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d3E

mod_polinomial4E = lm(mpg ~ poly(horsepower,4,raw = T),data=Eropa)  #polinomial berderajat 4
summary(mod_polinomial4E)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0736  -2.8215  -0.5424   2.4147  12.9708 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                   -2.497e+01  1.109e+02  -0.225    0.823
## poly(horsepower, 4, raw = T)1  3.362e+00  5.617e+00   0.599    0.552
## poly(horsepower, 4, raw = T)2 -6.496e-02  1.024e-01  -0.635    0.528
## poly(horsepower, 4, raw = T)3  4.998e-04  7.994e-04   0.625    0.534
## poly(horsepower, 4, raw = T)4 -1.384e-06  2.266e-06  -0.611    0.543
## 
## Residual standard error: 4.959 on 63 degrees of freedom
## Multiple R-squared:  0.4659, Adjusted R-squared:  0.4319 
## F-statistic: 13.74 on 4 and 63 DF,  p-value: 4.134e-08
pplot.d4E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) + 
  ggtitle("mpg vs. horsepower Polinomial Derajat 4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d4E

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(LinearE=MSE(predict(mod_linearE),Eropa$mpg),
           Poly2E=MSE(predict(mod_polinomial2E),Eropa$mpg),
           Poly3E=MSE(predict(mod_polinomial3E),Eropa$mpg),
           Poly4E=MSE(predict(mod_polinomial4E),Eropa$mpg)
)
##    LinearE   Poly2E   Poly3E   Poly4E
## 1 22.95978 22.94428 22.92256 22.78757

regresi fungsi tangga Eropa

mod_tangga4E = lm(mpg ~ cut(horsepower,4),data=Eropa)
summary(mod_tangga4E)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0000  -3.2531  -0.7048   2.9455  13.0029 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     33.000      1.346  24.526  < 2e-16 ***
## cut(horsepower, 4)(67.8,89.5]   -4.503      1.615  -2.788  0.00698 ** 
## cut(horsepower, 4)(89.5,111]    -9.927      2.069  -4.799 9.94e-06 ***
## cut(horsepower, 4)(111,133]    -13.087      2.281  -5.736 2.84e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.211 on 64 degrees of freedom
## Multiple R-squared:  0.4009, Adjusted R-squared:  0.3728 
## F-statistic: 14.28 on 3 and 64 DF,  p-value: 3.171e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Fungsi Tangga k=
          4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,4), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga5E = lm(mpg ~ cut(horsepower,5),data=Eropa)
summary(mod_tangga5E)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7818  -3.6655  -0.1383   2.2481  11.8345 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     33.782      1.506  22.434  < 2e-16 ***
## cut(horsepower, 5)(63.4,80.8]   -4.116      1.769  -2.328   0.0232 *  
## cut(horsepower, 5)(80.8,98.2]   -8.782      1.956  -4.489 3.11e-05 ***
## cut(horsepower, 5)(98.2,116]   -12.071      2.245  -5.377 1.18e-06 ***
## cut(horsepower, 5)(116,133]    -17.215      3.253  -5.292 1.63e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.994 on 63 degrees of freedom
## Multiple R-squared:  0.4583, Adjusted R-squared:  0.4239 
## F-statistic: 13.33 on 4 and 63 DF,  p-value: 6.335e-08
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Fungsi Tangga k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,5), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga6E = lm(mpg ~ cut(horsepower,6),data=Eropa)
summary(mod_tangga6E)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1800  -3.1476  -0.0983   2.3604  13.6316 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    34.180      1.623  21.061  < 2e-16 ***
## cut(horsepower, 6)(60.5,75]    -4.550      1.988  -2.289  0.02549 *  
## cut(horsepower, 6)(75,89.5]    -6.312      2.005  -3.148  0.00253 ** 
## cut(horsepower, 6)(89.5,104]  -11.036      2.358  -4.680 1.60e-05 ***
## cut(horsepower, 6)(104,118]   -12.023      2.529  -4.754 1.23e-05 ***
## cut(horsepower, 6)(118,133]   -17.613      3.378  -5.214 2.26e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.132 on 62 degrees of freedom
## Multiple R-squared:  0.4371, Adjusted R-squared:  0.3917 
## F-statistic: 9.628 on 5 and 62 DF,  p-value: 7.756e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("mpg vs. horsepower Fungsi Tangga k=6")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,6), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(Tangga4E=MSE(predict(mod_tangga4E),Eropa$mpg),
           Tangga5E=MSE(predict(mod_tangga5E),Eropa$mpg),
           Tangga6E=MSE(predict(mod_tangga6E),Eropa$mpg)
)
##   Tangga4E Tangga5E Tangga6E
## 1 25.55912 23.10967 24.01507

Natural cubic spline Eropa

mod_spline4nsE = lm(mpg ~ ns(horsepower, knots = c(60,80,100,120)),data=Eropa)   #knot=4
summary(mod_spline4nsE)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 80, 100, 120)), 
##     data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1994  -3.2453  -0.1622   1.9845  12.4669 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                    35.567      2.211  16.090
## ns(horsepower, knots = c(60, 80, 100, 120))1   -6.052      3.219  -1.880
## ns(horsepower, knots = c(60, 80, 100, 120))2  -16.032      4.813  -3.331
## ns(horsepower, knots = c(60, 80, 100, 120))3  -11.640      4.156  -2.801
## ns(horsepower, knots = c(60, 80, 100, 120))4  -22.488      6.278  -3.582
## ns(horsepower, knots = c(60, 80, 100, 120))5  -17.760      4.793  -3.705
##                                              Pr(>|t|)    
## (Intercept)                                   < 2e-16 ***
## ns(horsepower, knots = c(60, 80, 100, 120))1 0.064829 .  
## ns(horsepower, knots = c(60, 80, 100, 120))2 0.001462 ** 
## ns(horsepower, knots = c(60, 80, 100, 120))3 0.006789 ** 
## ns(horsepower, knots = c(60, 80, 100, 120))4 0.000670 ***
## ns(horsepower, knots = c(60, 80, 100, 120))5 0.000452 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.975 on 62 degrees of freedom
## Multiple R-squared:  0.4711, Adjusted R-squared:  0.4285 
## F-statistic: 11.05 on 5 and 62 DF,  p-value: 1.246e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("Natural Cubic Spline k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(60,80,100,120)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(60,80,100,120)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

mod_spline5nsE = lm(mpg ~ ns(horsepower, knots = c(60,75,90,105,120)),data=Eropa)   #knot=5
summary(mod_spline5nsE)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 75, 90, 105, 
##     120)), data = Eropa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0509  -3.2520  -0.3632   2.0788  12.4257 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                        35.654      2.305  15.466
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1   -5.394      3.004  -1.796
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2  -11.603      4.074  -2.848
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3  -14.717      5.232  -2.813
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4  -12.711      5.096  -2.494
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5  -22.634      6.906  -3.277
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6  -17.803      5.175  -3.440
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1  0.07749 .  
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2  0.00598 ** 
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3  0.00659 ** 
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4  0.01534 *  
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5  0.00173 ** 
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6  0.00105 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.019 on 61 degrees of freedom
## Multiple R-squared:  0.4704, Adjusted R-squared:  0.4183 
## F-statistic: 9.029 on 6 and 61 DF,  p-value: 4.636e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
  ggtitle("Natural Cubic Spline k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(60,75,90,105,120)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(60,75,90,105,120)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(NSpline4E=MSE(predict(mod_spline4nsE),Eropa$mpg),
           NSpline5E=MSE(predict(mod_spline5nsE),Eropa$mpg)
)
##   NSpline4E NSpline5E
## 1  22.56312  22.59495

membandingkan model

data.frame(LinearE=MSE(predict(mod_linearE),Eropa$mpg),
           Poly2E=MSE(predict(mod_polinomial2E),Eropa$mpg),
           Poly3E=MSE(predict(mod_polinomial3E),Eropa$mpg),
           Poly4E=MSE(predict(mod_polinomial4E),Eropa$mpg),
           Tangga4E=MSE(predict(mod_tangga4E),Eropa$mpg),
           Tangga5E=MSE(predict(mod_tangga5E),Eropa$mpg),
           Tangga6E=MSE(predict(mod_tangga6E),Eropa$mpg),
           NSpline4E=MSE(predict(mod_spline4nsE),Eropa$mpg),
           NSpline5E=MSE(predict(mod_spline5nsE),Eropa$mpg)
)
##    LinearE   Poly2E   Poly3E   Poly4E Tangga4E Tangga5E Tangga6E NSpline4E
## 1 22.95978 22.94428 22.92256 22.78757 25.55912 23.10967 24.01507  22.56312
##   NSpline5E
## 1  22.59495

JEPANG

Jepang<-Auto%>% 
  select(mpg, horsepower, origin)%>%
  filter(origin == 3)

plot antara mpg dan horsepower

ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower")+
  geom_point(alpha=0.6, color="black") + 
  theme_bw()

regresi linear

mod_linearJ = lm(mpg~horsepower,data=Jepang)
summary(mod_linearJ)
## 
## Call:
## lm(formula = mpg ~ horsepower, data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.112  -2.758  -0.751   2.563  14.249 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  48.8162     2.3555  20.724  < 2e-16 ***
## horsepower   -0.2300     0.0288  -7.986 1.08e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.533 on 77 degrees of freedom
## Multiple R-squared:  0.4531, Adjusted R-squared:  0.446 
## F-statistic: 63.78 on 1 and 77 DF,  p-value: 1.08e-11

plot mpg vs horsepower dalam regresi linear

ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Plot mpg vs. horsepower Linear")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1, col = "blue",se = F)+
  theme_bw()

regresi polinomial Jepang

mod_polinomial2J = lm(mpg ~ poly(horsepower,2,raw = T),data=Jepang)  #Polinomial berderajat 2
summary(mod_polinomial2J)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1949 -2.5568 -0.6165  2.2408 12.5211 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   68.401527   9.731163   7.029 7.74e-10 ***
## poly(horsepower, 2, raw = T)1 -0.710534   0.233642  -3.041  0.00323 ** 
## poly(horsepower, 2, raw = T)2  0.002808   0.001355   2.072  0.04169 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.439 on 76 degrees of freedom
## Multiple R-squared:  0.4823, Adjusted R-squared:  0.4687 
## F-statistic:  35.4 on 2 and 76 DF,  p-value: 1.365e-11
pplot.d2J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) +   #polinomial derajat 2
  ggtitle("Polinomial derajat 2")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d2J

mod_polinomial3J = lm(mpg ~ poly(horsepower,3,raw = T),data=Jepang)  #polinomial berderajat 3
summary(mod_polinomial3J)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.8602 -2.7115 -0.5224  2.1985 11.6985 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -7.163e+01  3.494e+01  -2.050  0.04385 *  
## poly(horsepower, 3, raw = T)1  4.308e+00  1.230e+00   3.503  0.00078 ***
## poly(horsepower, 3, raw = T)2 -5.498e-02  1.400e-02  -3.926  0.00019 ***
## poly(horsepower, 3, raw = T)3  2.142e-04  5.170e-05   4.142 8.92e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.031 on 75 degrees of freedom
## Multiple R-squared:  0.5787, Adjusted R-squared:  0.5618 
## F-statistic: 34.34 on 3 and 75 DF,  p-value: 4.485e-14
pplot.d3J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Polinomial derajat 3")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,3,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d3J

mod_polinomial4J = lm(mpg ~ poly(horsepower,4,raw = T),data=Jepang)  #polinomial berderajat 4
summary(mod_polinomial4J)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6872 -2.5927 -0.4482  2.1121 11.5842 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                   -1.218e+02  1.338e+02  -0.910    0.366
## poly(horsepower, 4, raw = T)1  6.792e+00  6.518e+00   1.042    0.301
## poly(horsepower, 4, raw = T)2 -9.980e-02  1.163e-01  -0.858    0.394
## poly(horsepower, 4, raw = T)3  5.626e-04  8.990e-04   0.626    0.533
## poly(horsepower, 4, raw = T)4 -9.851e-07  2.537e-06  -0.388    0.699
## 
## Residual standard error: 4.054 on 74 degrees of freedom
## Multiple R-squared:  0.5795, Adjusted R-squared:  0.5568 
## F-statistic:  25.5 on 4 and 74 DF,  p-value: 2.684e-13
pplot.d4J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) + 
  ggtitle("Polinomial derajat 4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T), 
              lty = 1, col = "blue",se = F)+
  theme_bw()
pplot.d4J

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(LinearJ=MSE(predict(mod_linearJ),Jepang$mpg),
           Poly2J=MSE(predict(mod_polinomial2J),Jepang$mpg),
           Poly3J=MSE(predict(mod_polinomial3J),Jepang$mpg),
           Poly4J=MSE(predict(mod_polinomial4J),Jepang$mpg)
)
##    LinearJ   Poly2J   Poly3J   Poly4J
## 1 20.02862 18.95803 15.42813 15.39677

regresi fungsi tangga Jepang

mod_tangga4J = lm(mpg ~ cut(horsepower,4),data=Jepang)
summary(mod_tangga4J)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.444  -2.644  -0.590   2.579  11.803 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  34.7973     0.6901  50.425  < 2e-16 ***
## cut(horsepower, 4)(72,92]    -5.3529     1.2063  -4.438 3.07e-05 ***
## cut(horsepower, 4)(92,112]  -10.5073     1.1650  -9.019 1.37e-13 ***
## cut(horsepower, 4)(112,132]  -9.2223     2.2093  -4.174 7.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.198 on 75 degrees of freedom
## Multiple R-squared:  0.5432, Adjusted R-squared:  0.5249 
## F-statistic: 29.73 on 3 and 75 DF,  p-value: 9.028e-13
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Fungsi Tangga k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,4), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga5J = lm(mpg ~ cut(horsepower,5),data=Jepang)
summary(mod_tangga5J)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7444 -2.8774 -0.7774  2.1891 11.7226 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  34.8774     0.7651  45.585  < 2e-16 ***
## cut(horsepower, 5)(68,84]    -2.4441     1.3399  -1.824 0.072169 .  
## cut(horsepower, 5)(84,100]   -9.1330     1.1214  -8.144 6.91e-12 ***
## cut(horsepower, 5)(100,116] -12.9108     2.5758  -5.012 3.56e-06 ***
## cut(horsepower, 5)(116,132]  -9.2441     2.5758  -3.589 0.000594 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.26 on 74 degrees of freedom
## Multiple R-squared:  0.5358, Adjusted R-squared:  0.5107 
## F-statistic: 21.35 on 4 and 74 DF,  p-value: 9.702e-12
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Fungsi Tangga k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,5), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

mod_tangga6J = lm(mpg ~ cut(horsepower,6),data=Jepang)
summary(mod_tangga6J)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7556 -2.4667 -0.7556  1.9276 11.4107 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    35.4667     0.9915  35.769  < 2e-16 ***
## cut(horsepower, 6)(65.3,78.7]  -2.2774     1.2709  -1.792 0.077286 .  
## cut(horsepower, 6)(78.7,92]    -7.7111     1.7174  -4.490 2.61e-05 ***
## cut(horsepower, 6)(92,105]    -10.7278     1.4023  -7.650 6.38e-11 ***
## cut(horsepower, 6)(105,119]   -13.5000     2.6234  -5.146 2.16e-06 ***
## cut(horsepower, 6)(119,132]    -9.8333     2.6234  -3.748 0.000353 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.207 on 73 degrees of freedom
## Multiple R-squared:  0.5534, Adjusted R-squared:  0.5229 
## F-statistic: 18.09 on 5 and 73 DF,  p-value: 1.233e-11
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Fungsi Tangga k=6")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,6), 
              lty = 1, col = "blue",se = F)+
  theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(Tangga4J=MSE(predict(mod_tangga4J),Jepang$mpg),
           Tangga5J=MSE(predict(mod_tangga5J),Jepang$mpg),
           Tangga6J=MSE(predict(mod_tangga6J),Jepang$mpg)
)
##   Tangga4J Tangga5J Tangga6J
## 1 16.72759 16.99883 16.35272

Natural cubic spline Eropa

mod_spline4nsJ = lm(mpg ~ ns(horsepower, knots = c(60,80,100,120)),data=Jepang)   #knot=4
summary(mod_spline4nsJ)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 80, 100, 120)), 
##     data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2865 -2.8742 -0.3009  1.8593 11.0460 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                    32.676      2.022  16.163
## ns(horsepower, knots = c(60, 80, 100, 120))1   -6.328      3.188  -1.985
## ns(horsepower, knots = c(60, 80, 100, 120))2   -6.721      3.444  -1.951
## ns(horsepower, knots = c(60, 80, 100, 120))3  -16.242      3.971  -4.090
## ns(horsepower, knots = c(60, 80, 100, 120))4    0.768      5.220   0.147
## ns(horsepower, knots = c(60, 80, 100, 120))5   -4.158      3.979  -1.045
##                                              Pr(>|t|)    
## (Intercept)                                   < 2e-16 ***
## ns(horsepower, knots = c(60, 80, 100, 120))1  0.05091 .  
## ns(horsepower, knots = c(60, 80, 100, 120))2  0.05485 .  
## ns(horsepower, knots = c(60, 80, 100, 120))3  0.00011 ***
## ns(horsepower, knots = c(60, 80, 100, 120))4  0.88343    
## ns(horsepower, knots = c(60, 80, 100, 120))5  0.29940    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.018 on 73 degrees of freedom
## Multiple R-squared:  0.5926, Adjusted R-squared:  0.5647 
## F-statistic: 21.24 on 5 and 73 DF,  p-value: 4.75e-13
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Natural Cubic Spline k=4")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(60,80,100,120)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(60,80,100,120)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

mod_spline5nsJ = lm(mpg ~ ns(horsepower, knots = c(60,75,90,105,120)),data=Jepang)   #knot=5
summary(mod_spline5nsJ)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 75, 90, 105, 
##     120)), data = Jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7575 -2.7975 -0.2206  1.5872 10.8295 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                        32.376      2.080  15.568
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1   -2.868      2.876  -0.997
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2   -5.189      3.498  -1.483
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3  -10.090      4.199  -2.403
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4  -14.030      4.285  -3.274
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5    1.516      5.393   0.281
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6   -4.142      4.164  -0.995
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1  0.32202    
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2  0.14238    
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3  0.01883 *  
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4  0.00163 ** 
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5  0.77942    
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6  0.32327    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.034 on 72 degrees of freedom
## Multiple R-squared:  0.595,  Adjusted R-squared:  0.5613 
## F-statistic: 17.63 on 6 and 72 DF,  p-value: 1.905e-12
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
  ggtitle("Natural Cubic Spline k=5")+
  geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(60,75,90,105,120)), 
              lty = 1, aes(col = "Cubic Spline"),se = F)+
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(60,75,90,105,120)), 
              lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()

MSE=function(pred,actual){
  mean((pred-actual)^2)
}
data.frame(NSpline4J=MSE(predict(mod_spline4nsJ),Jepang$mpg),
           NSpline5J=MSE(predict(mod_spline5nsJ),Jepang$mpg)
)
##   NSpline4J NSpline5J
## 1  14.91692  14.83045

Jepang

data.frame(LinearJ=MSE(predict(mod_linearJ),Jepang$mpg),
           Poly2J=MSE(predict(mod_polinomial2J),Jepang$mpg),
           Poly3J=MSE(predict(mod_polinomial3J),Jepang$mpg),
           Poly4J=MSE(predict(mod_polinomial4J),Jepang$mpg),
           Tangga4J=MSE(predict(mod_tangga4J),Jepang$mpg),
           Tangga5J=MSE(predict(mod_tangga5J),Jepang$mpg),
           Tangga6J=MSE(predict(mod_tangga6J),Jepang$mpg),
           NSpline4J=MSE(predict(mod_spline4nsJ),Jepang$mpg),
           NSpline5J=MSE(predict(mod_spline5nsJ),Jepang$mpg)
)
##    LinearJ   Poly2J   Poly3J   Poly4J Tangga4J Tangga5J Tangga6J NSpline4J
## 1 20.02862 18.95803 15.42813 15.39677 16.72759 16.99883 16.35272  14.91692
##   NSpline5J
## 1  14.83045