P9581 Regresi Polinomial, Regresi Spline, Fungsi Tangga

Soal

Dataset

Dataset yang digunakan yaitu data Auto dari package ISLR. Data ini terdiri dari 9 variabel dan 392 observasi. Berikut penjelasan untuk masing-masing variabel dalam data:

  • mpg: miles per gallon

  • cylinders: Number of cylinders between 4 and 8

  • displacement: Engine displacement (cu. inches)

  • horsepower: Engine horsepower

  • weight: Vehicle weight (lbs.)

  • acceleration: Time to accelerate from 0 to 60 mph (sec.)

  • year: Model year (modulo 100)

  • origin: Origin of car (1. American, 2. European, 3. Japanese)

  • name: Vehicle name

Pada studi kasus kali ini hanya diambil 3 variabel bebas yaitu Horsepower, Aceleration dan displacement.

Visualisasi Data

attach(Auto)
## The following object is masked from package:ggplot2:
## 
##     mpg
mydata <- cbind.data.frame(mpg,horsepower,displacement,acceleration)
datatable(mydata)
A = ggplot(mydata,aes(x=horsepower,y=mpg),group = origin) +
  geom_point(aes(color = factor(origin)))+
  stat_smooth(aes(x=horsepower,y=mpg),method = "lm", formula = y~x,lty = 1, col = "red",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan Horse Power")+
  xlab("Horse Power")+
  ylab("Mpg")

B = ggplot(mydata,aes(x=acceleration,y=mpg),group = origin) +
  geom_point(aes(color = factor(origin)))+
  stat_smooth(aes(x=acceleration,y=mpg),method = "lm", formula = y~x,lty = 1, col = "red",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan acceleration ")+
  xlab("Displacement")+
  ylab("Mpg")

C = ggplot(mydata,aes(x=displacement,y=mpg),group = origin) +
  geom_point(aes(color = factor(origin)))+
  stat_smooth(aes(x=displacement,y=mpg),method = "lm", formula = y~x,lty = 1, col = "red",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan displacement")+
  xlab("Acceleration")+
  ylab("Mpg")

plot_grid(A,B,C)

Berdasarkan hasiil dari masing-masing peubah independen x dan dependen y. setaip x (Horsepower, Aceleration dan displacement.) yang berpasangan dengan y (mpg) membentuk polo yang bukan linier.

Model Non-Linier

Regresi Polinomial

Empiris

Mpg ~ Horsepower

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(poly(horsepower, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg~horsepower - Polinomial",
       subtitle = "Selecting number ordo of with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_hp = min(CV_RMSE) 

Mpg ~ displacement

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(poly(displacement, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg~diceplacement - Polinomial",
       subtitle = "Selecting number ordos of  with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_dp = min(CV_RMSE) 

Mpg ~ Acceleration

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(poly(acceleration, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg~Acceleration - Polinomial",
       subtitle = "Selecting number ordos of with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_ap = min(CV_RMSE) 

Visual

Mpg ~ Horsepower

mo_pol1 = lm(mpg ~ poly(horsepower,7,raw = T),
                     data=mydata)
mo_pol1
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 7, raw = T), data = mydata)
## 
## Coefficients:
##                   (Intercept)  poly(horsepower, 7, raw = T)1  
##                    -4.891e+02                      3.325e+01  
## poly(horsepower, 7, raw = T)2  poly(horsepower, 7, raw = T)3  
##                    -8.476e-01                      1.135e-02  
## poly(horsepower, 7, raw = T)4  poly(horsepower, 7, raw = T)5  
##                    -8.755e-05                      3.914e-07  
## poly(horsepower, 7, raw = T)6  poly(horsepower, 7, raw = T)7  
##                    -9.429e-10                      9.472e-13
poly_data1<- ggplot(mydata,aes(x=horsepower, y=mpg)) + 
  geom_point(color="orchid2") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,7,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi Polinomial Ordo 7") +
  xlab("Horse Power") + 
  ylab("mile per gallon")  

MPg ~ Displacement

mo_pol2 = lm(mpg ~ poly(displacement,9,raw = T),
                     data=mydata)
mo_pol2
## 
## Call:
## lm(formula = mpg ~ poly(displacement, 9, raw = T), data = mydata)
## 
## Coefficients:
##                     (Intercept)  poly(displacement, 9, raw = T)1  
##                      -1.401e+03                        6.785e+01  
## poly(displacement, 9, raw = T)2  poly(displacement, 9, raw = T)3  
##                      -1.345e+00                        1.473e-02  
## poly(displacement, 9, raw = T)4  poly(displacement, 9, raw = T)5  
##                      -9.871e-05                        4.222e-07  
## poly(displacement, 9, raw = T)6  poly(displacement, 9, raw = T)7  
##                      -1.157e-09                        1.967e-12  
## poly(displacement, 9, raw = T)8  poly(displacement, 9, raw = T)9  
##                      -1.889e-15                        7.830e-19
poly_mydata1<- ggplot(mydata,aes(x=horsepower, y=mpg)) + 
  geom_point(color="orange") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,9,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi Polinomial Ordo 9") +
  xlab("Displacement") + 
  ylab("mile per gallon")  

Mpg ~ Acceleration

mo_pol3 = lm(mpg ~ poly(acceleration,4,raw = T),
                     data=mydata)
mo_pol3
## 
## Call:
## lm(formula = mpg ~ poly(acceleration, 4, raw = T), data = mydata)
## 
## Coefficients:
##                     (Intercept)  poly(acceleration, 4, raw = T)1  
##                      208.119805                       -58.179057  
## poly(acceleration, 4, raw = T)2  poly(acceleration, 4, raw = T)3  
##                        6.135768                        -0.267935  
## poly(acceleration, 4, raw = T)4  
##                        0.004193
poly_mydata2 <- ggplot(mydata,aes(x=acceleration, y=mpg)) + 
  geom_point(color="lightblue") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi Polinomial Ordo 4") +
  xlab(" Acceleration") + 
  ylab("mile per gallon")  

plot_grid(poly_data1,poly_mydata1,poly_mydata2)

Fungsi Tangga

Empiris

Mpg ~ Horsepower

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(cut(horsepower, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg ~ horsepowerStep Function",
       subtitle = "Selecting number of 'horsepower' cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_ht = min(CV_RMSE) 

Mpg ~ Displacement

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(cut(displacement, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg ~ displacement - Step Function",
       subtitle = "Selecting number of 'displacement' cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_dt = min(CV_RMSE) 

Mpg ~ Acceleration

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(cut(acceleration, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "mpg ~ acceleration - Step Function",
       subtitle = "Selecting number of 'acceleration' cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_at = min(CV_RMSE) 

Visual

Mpg ~ Horsepower

mo_pc1 = lm(mpg ~ cut(horsepower,8),
                     data=mydata)
mo_pc1
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 8), data = mydata)
## 
## Coefficients:
##                 (Intercept)    cut(horsepower, 8)(69,92]  
##                      33.908                       -6.961  
##  cut(horsepower, 8)(92,115]  cut(horsepower, 8)(115,138]  
##                     -12.755                      -15.666  
## cut(horsepower, 8)(138,161]  cut(horsepower, 8)(161,184]  
##                     -18.780                      -19.973  
## cut(horsepower, 8)(184,207]  cut(horsepower, 8)(207,230]  
##                     -21.123                      -21.008
pice1 <- ggplot(mydata,aes(x=horsepower, y=mpg)) + 
  geom_point(color="red") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,8,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Picewise Constant Breaks 8") +
  xlab("Horsepower") + 
  ylab("mile per gallon") 

Mpg ~ Displacement

mo_pc2 = lm(mpg ~ ns(displacement,9),
                     data=mydata)
mo_pc2
## 
## Call:
## lm(formula = mpg ~ ns(displacement, 9), data = mydata)
## 
## Coefficients:
##          (Intercept)  ns(displacement, 9)1  ns(displacement, 9)2  
##              25.5644                3.0660               -0.2712  
## ns(displacement, 9)3  ns(displacement, 9)4  ns(displacement, 9)5  
##               0.9362               -4.0868               -6.0371  
## ns(displacement, 9)6  ns(displacement, 9)7  ns(displacement, 9)8  
##              -9.9299              -15.4379               -1.9057  
## ns(displacement, 9)9  
##             -18.2925
pice2 <- ggplot(mydata,aes(x=displacement, y=mpg)) + 
  geom_point(color="orange") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,9,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Picewise Constant Breaks 9") +
  xlab("Displacement") + 
  ylab("mile per gallon") 

Mpg ~ Acceleration

mo_pc3 = lm(mpg ~ ns(horsepower+displacement+acceleration,4),
                     data=mydata)
mo_pc3
## 
## Call:
## lm(formula = mpg ~ ns(horsepower + displacement + acceleration, 
##     4), data = mydata)
## 
## Coefficients:
##                                      (Intercept)  
##                                            37.61  
## ns(horsepower + displacement + acceleration, 4)1  
##                                           -15.85  
## ns(horsepower + displacement + acceleration, 4)2  
##                                           -20.04  
## ns(horsepower + displacement + acceleration, 4)3  
##                                           -31.70  
## ns(horsepower + displacement + acceleration, 4)4  
##                                           -20.29
pice3 <- ggplot(mydata,aes(x=acceleration, y=mpg)) + 
  geom_point(color="orchid2") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,6,raw=T), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Picewise Constant Breaks 6") +
  xlab("acceleration") + 
  ylab("mile per gallon") 

plot_grid(pice1,pice2,pice3)

Cubic Spline

Empiris

Mpg ~ Horsepower

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(ns(horsepower, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "Mpg~horsepower NCS",
       subtitle = "Selecting number of df cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

attr(ns(horsepower, df=4),"knots")
##   25%   50%   75% 
##  75.0  93.5 126.0
cv_hs = min(CV_RMSE) 

MPg ~ displacement

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(ns(displacement, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "Mpg~Displacement - Step Function",
       subtitle = "Selecting number of df cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

attr(ns(displacement, df=4),"knots")
##    25%    50%    75% 
## 105.00 151.00 275.75
cv_ds = min(CV_RMSE) 

Mpg ~ Acceleration

ctrl <- trainControl(method = "cv", number = 5)

CV_RMSE <- c()

set.seed(039)

for (i in 2:10) {
  model_temp <- train(y = mpg,
                      x = data.frame(ns(acceleration, i)),
                      method = "lm",
                      metric = "RMSE",
                      trControl = ctrl)
  CV_RMSE[i-1] <- model_temp$results$RMSE
}


data.frame(cuts = 2:10, CV_RMSE = CV_RMSE) %>%
  mutate(min_CV_RMSE = as.numeric(min(CV_RMSE) == CV_RMSE)) %>%
  ggplot(aes(x = cuts, y = CV_RMSE)) +
  geom_line(col = "grey55") +
  geom_point(size = 2, aes(col = factor(min_CV_RMSE))) +
  scale_x_continuous(breaks = seq(2, 10), minor_breaks = NULL) +
  scale_color_manual(values = c("deepskyblue3", "green")) +
  theme(legend.position = "none") +
  labs(title = "Mpg~Acceleration - Step Function",
       subtitle = "Selecting number of df cut-points with cross-validation",
       x = "Intervals",
       y = "CV RMSE")

cv_as = min(CV_RMSE) 

Visual

Mpg ~ Horsepower

mo_sp1 = lm(mpg ~ ns(horsepower,7),
                     data=mydata)
mo_sp1
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, 7), data = mydata)
## 
## Coefficients:
##        (Intercept)  ns(horsepower, 7)1  ns(horsepower, 7)2  ns(horsepower, 7)3  
##             33.748              -7.888              -7.844             -14.226  
## ns(horsepower, 7)4  ns(horsepower, 7)5  ns(horsepower, 7)6  ns(horsepower, 7)7  
##            -12.527             -24.049             -19.387             -20.803
attr(ns(horsepower, df=7),"knots")
## 14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429% 
##  68.85714  79.71429  89.57143  98.00000 113.57143 150.00000
ps1 <- ggplot(mydata,aes(x=horsepower, y=mpg)) + 
  geom_point(color="orange") +
  stat_smooth(method = "lm", 
              formula = y~ns(x,7), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi NCS Knot 7") +
  xlab("Horse Power 7") + 
  ylab("mile per gallon")+ 
  geom_vline(xintercept = c(68.8,79.7,89.5,98.0,113.5,150), col="grey50", lty=2)

Mpg ~ displacement

mo_sp2 = lm(mpg ~ ns(displacement,9),
                     data=mydata)
mo_sp2
## 
## Call:
## lm(formula = mpg ~ ns(displacement, 9), data = mydata)
## 
## Coefficients:
##          (Intercept)  ns(displacement, 9)1  ns(displacement, 9)2  
##              25.5644                3.0660               -0.2712  
## ns(displacement, 9)3  ns(displacement, 9)4  ns(displacement, 9)5  
##               0.9362               -4.0868               -6.0371  
## ns(displacement, 9)6  ns(displacement, 9)7  ns(displacement, 9)8  
##              -9.9299              -15.4379               -1.9057  
## ns(displacement, 9)9  
##             -18.2925
attr(ns(displacement, df=9),"knots")
## 11.11111% 22.22222% 33.33333% 44.44444% 55.55556% 66.66667% 77.77778% 88.88889% 
##   91.0000   98.0000  119.0000  140.0000  173.0000  232.0000  302.2222  350.0000
ps2 <- ggplot(mydata,aes(x=displacement, y=mpg)) + 
  geom_point(color="deepskyblue1") +
  stat_smooth(method = "lm", 
              formula = y~ns(x,9), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi NCS Knot 9") +
  xlab("Displacement") + 
  ylab("mile per gallon")+ 
  geom_vline(xintercept = c(91.0,98.0,119,140,173,232,302.2,350.0), col="grey50", lty=2)

Mpg ~ Acceleration

mo_sp3 = lm(mpg ~ ns(acceleration,8),
                     data=mydata)
mo_sp3
## 
## Call:
## lm(formula = mpg ~ ns(acceleration, 8), data = mydata)
## 
## Coefficients:
##          (Intercept)  ns(acceleration, 8)1  ns(acceleration, 8)2  
##               13.206                 5.055                15.322  
## ns(acceleration, 8)3  ns(acceleration, 8)4  ns(acceleration, 8)5  
##                8.708                12.649                13.588  
## ns(acceleration, 8)6  ns(acceleration, 8)7  ns(acceleration, 8)8  
##                9.310                20.286                20.275
attr(ns(acceleration, df=8),"knots")
##   12.5%     25%   37.5%     50%   62.5%     75%   87.5% 
## 12.5000 13.7750 14.5000 15.5000 16.2000 17.0250 18.7125
ps3 <- ggplot(mydata,aes(x=acceleration, y=mpg)) + 
  geom_point(color="red") +
  stat_smooth(method = "lm", 
              formula = y~ns(x,8), 
              lty = 1, col = "black",se = F)+
  theme_bw()+
  ggtitle("Regresi NCS Knot 8") +
  xlab("Displacement") + 
  ylab("mile per gallon")+ 
  geom_vline(xintercept = c(12.5,13.7,14.5,15.5,16.2,17.02,18.7), col="grey50", lty=2)

plot_grid(ps1,ps2,ps3)

Model Terbaik

Mpg ~ Housepower

ggplot(mydata,aes(x = horsepower, y = mpg)) +
  geom_point(color ="hotpink")+
  stat_smooth(method = "lm", 
              formula = y~poly(x,7,raw=T),lty = 1,
              col = "red",se = F)+
  stat_smooth(method = "lm", 
              formula = y~cut(x,8,raw=T),lty = 1,
              col = "green",se = F)+
  stat_smooth(method = "lm",
              formula =y ~ ns(x,7),
              lty = 1,col = "black",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan Horse Power")+
  xlab("Horse Power")+
  ylab("Mpg") 

RMSE <- rbind(cv_hp,cv_ht,cv_hs)
Model <- c("poli 7","Tangga 8","NSC(7)")
best_model<- data.frame(Model,RMSE)
datatable(best_model)

Model terbaik pada x peubah Horsepower dengan RMSE paling minium yaitu pada metode NCS dengan break 7 dengan RMSE 4.277

Mpg ~ Displacement

ggplot(mydata,aes(x =displacement, y = mpg)) +
  geom_point(color ="hotpink")+
  stat_smooth(method = "lm", 
              formula = y~poly(x,9,raw=T),lty = 1,
              col = "red",se = F)+
  stat_smooth(method = "lm", 
              formula = y~cut(x,9,raw=T),lty = 1,
              col = "green",se = F)+
  stat_smooth(method = "lm",
              formula =y ~ ns(x,7),
              lty = 1,col = "black",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan Displacement")+
  xlab("Acceleration")+
  ylab("Mpg") 

RMSE  <- rbind(cv_dp,cv_dt,cv_ds)
Model <- c("Poly 9","Tangga 9","NSC(7)")
best_model <- data.frame(Model,RMSE)
datatable(best_model)

Model terbaik pada x peubah displacement dengan RMSE paling minium yaitu pada metode NCS dengan RMSE 4.16


#### Mpg ~ Acceleration


```r
ggplot(mydata,aes(x =acceleration, y = mpg)) +
  geom_point(color ="hotpink")+
  stat_smooth(method = "lm", 
              formula = y~poly(x,4,raw=T),lty = 1,
              col = "red",se = F)+
  stat_smooth(method = "lm", 
              formula = y~cut(x,6,raw=T),lty = 1,
              col = "green",se = F)+
  stat_smooth(method = "lm",
              formula =y ~ ns(x,8),
              lty = 1,col = "black",se = F)+
  theme_bw()+
  ggtitle("Scatter Plot Mpg dan Acceleration")+
  xlab("Acceleration")+
  ylab("Mpg") 

RMSE <- rbind(cv_ap,cv_at,cv_as)
Model <- c("Poly 4","Tangga 6","NSC(8)")
best_model <- data.frame(Model,RMSE)
datatable(best_model)

Jika dilihat dari hasil plot 3 metode saling berhimpitan dan sukar untuk dicari kesimpulan model mana yang merupakan model terbaik. Jika dilihat dari nilai RMSE maka, model terbaik pada peubah x acceleration adalah dengan metode NCS dengan break = 8

Referensi

```