Data

library yang digunakan

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(splines)
library(rsample)
## Warning: package 'rsample' was built under R version 4.0.5
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
attach(Auto)
## The following object is masked from package:ggplot2:
## 
##     mpg
head(cbind(mpg, horsepower))
##      mpg horsepower
## [1,]  18        130
## [2,]  15        165
## [3,]  18        150
## [4,]  16        150
## [5,]  17        140
## [6,]  15        198

Data Auto (Data Lengkap)

plot(horsepower, mpg, pch=19, col="cadetblue",
     xlab="Horse Power", ylab="mile per gallon")

Regresi Polinomial

set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")
d<-1:10
best_poly<-map_dfr(d, function(i){
 metric_poly <- map_dfr(cross_val$splits,
                        function(x){
                           mod <- lm(mpg ~ poly(horsepower, d = i),
                                     data=Auto[x$in_id,])
                           pred <- predict(mod,newdata=Auto[-x$in_id,])
                           truth <- Auto[-x$in_id,]$mpg
                           rmse <- mlr3measures::rmse(truth = truth,response = pred)
                           mae <- mlr3measures::mae(truth = truth,response = pred)
                           metric <- c(rmse,mae)
                           names(metric) <- c("rmse","mae")
                           return(metric)
                        } 
                        )
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
##     d     rmse      mae
## 1   1 4.908935 3.858832
## 2   2 4.352806 3.261129
## 3   3 4.356807 3.264025
## 4   4 4.354783 3.274987
## 5   5 4.307652 3.238723
## 6   6 4.301354 3.241417
## 7   7 4.287438 3.222540
## 8   8 4.302908 3.240966
## 9   9 4.321277 3.247392
## 10 10 4.336836 3.257891
polinom = lm(mpg ~ poly(horsepower,7), data=Auto)
prediksi = predict( polinom , data.frame(Auto), interval="predict" )
plot(horsepower, mpg, pch=19, col="cadetblue1",
     xlab="Horse Power", ylab="mile per gallon",
     main="Regresi Polinomial Ordo 4")
ix <- sort(horsepower,index.return=T)$ix
lines(horsepower[ix], prediksi[ix], 
      main = "Polynomial Regression", col="red",
      type="l", lwd=2)

Regresi Fungsi Tangga

set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")

breaks <- 2:10

best_tangga <- map_dfr(breaks, function(i){

metric_tangga <- map_dfr(cross_val$splits,
    function(x){

training <- Auto[x$in_id,]
training$horsepower <- cut(training$horsepower,i)

mod <- lm(mpg ~ horsepower,
         data=training)

labs_x <- levels(mod$model[,2])

labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))


testing <- Auto[-x$in_id,]

horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))

pred <- predict(mod,
               newdata=list(horsepower=horsepower_new))
truth <- testing$mpg

data_eval <- na.omit(data.frame(truth,pred))

rmse <- mlr3measures::rmse(truth = data_eval$truth,
                           response = data_eval$pred
                           )
mae <- mlr3measures::mae(truth = data_eval$truth,
                           response = data_eval$pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_tangga



# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)

mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
##    d     rmse      mae
## 1  2 6.122422 4.776980
## 2  3 5.766535 4.520709
## 3  4 5.020265 3.805185
## 4  5 4.706711 3.572772
## 5  6 4.658138 3.539010
## 6  7 4.572175 3.405770
## 7  8 4.438716 3.398562
## 8  9 4.498963 3.435017
## 9 10 4.569136 3.437797
ggplot(Auto,aes(x=horsepower, y=mpg)) +
                 geom_point(alpha=0.55, color="cadetblue4") +
  stat_smooth(method = "lm", 
               formula = y~cut(x,8), 
               lty = 1, col = "red",se = F)+
  theme_bw()

Regresi Spline

set.seed(1234)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")

df <- 3:12

best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
    function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
         data=Auto[x$in_id,])
pred <- predict(mod,
               newdata=Auto[-x$in_id,])
truth <- Auto[-x$in_id,]$mpg

rmse <- mlr3measures::rmse(truth = truth,
                           response = pred
                           )
mae <- mlr3measures::mae(truth = truth,
                           response = pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_spline3

# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)

mean_metric_spline3
}
)
best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
##     d     rmse      mae
## 1   3 4.342637 3.259033
## 2   4 4.312147 3.242972
## 3   5 4.300381 3.235026
## 4   6 4.298908 3.229285
## 5   7 4.275443 3.217522
## 6   8 4.303139 3.250454
## 7   9 4.293479 3.231677
## 8  10 4.277135 3.199777
## 9  11 4.341622 3.255280
## 10 12 4.314328 3.216725
attr(ns(Auto$horsepower, df=10),"knots")
##   10%   20%   30%   40%   50%   60%   70%   80%   90% 
##  67.0  72.0  80.0  88.0  93.5 100.0 110.0 140.0 157.7
ggplot(Auto,aes(x=horsepower, y=mpg)) +
                 geom_point(alpha=0.55, color="steelblue") +
  stat_smooth(method = "lm", 
               formula = y~ns(x, knots = c(67.0,  72.0,  80.0, 88.0,  93.5, 100.0, 110.0, 140.0, 157.7 )), 
               lty = 1,se = F, col="red")

Model Terbaik

nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
                      best_tangga %>% select(-1) %>% slice_min(mae),
                      best_spline3 %>%  select(-1)%>% slice_min(mae)
                      )

nama_model <- c("Polynomial7","Piecewise constant(breaks=8)",
                "Natural cubic spline(df=10)"
)

data.frame(nama_model,nilai_metric)
##                     nama_model     rmse      mae
## 1                  Polynomial7 4.287438 3.222540
## 2 Piecewise constant(breaks=8) 4.438716 3.398562
## 3  Natural cubic spline(df=10) 4.277135 3.199777

fungsi regresi terbaik yaitu spline dengan df=12

Data Amerika

id.A<-Auto$origin==1
Amerika<-Auto[id.A,]
plot(Amerika$horsepower, Amerika$mpg, pch=19, col="aquamarine4",
     xlab="Horse Power", ylab="mile per gallon")

Polinom Amerika

set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")
d<-1:10
best_poly<-map_dfr(d, function(i){
 metric_poly <- map_dfr(cross_val$splits,
                        function(x){
                           mod <- lm(mpg ~ poly(horsepower, d = i),
                                     data=Amerika[x$in_id,])
                           pred <- predict(mod,newdata=Amerika[-x$in_id,])
                           truth <- Amerika[-x$in_id,]$mpg
                           rmse <- mlr3measures::rmse(truth = truth,response = pred)
                           mae <- mlr3measures::mae(truth = truth,response = pred)
                           metric <- c(rmse,mae)
                           names(metric) <- c("rmse","mae")
                           return(metric)
                        } 
                        )
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
##     d     rmse      mae
## 1   1 4.268396 3.361861
## 2   2 3.806887 2.894027
## 3   3 3.824819 2.908566
## 4   4 3.835479 2.901702
## 5   5 3.806815 2.895982
## 6   6 3.800663 2.889671
## 7   7 3.793874 2.863246
## 8   8 3.806333 2.870046
## 9   9 3.819989 2.879042
## 10 10 4.155978 3.010712

Fungsi Tangga Amerika

set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")

breaks <- 2:11

best_tangga <- map_dfr(breaks, function(i){

metric_tangga <- map_dfr(cross_val$splits,
    function(x){

training <- Amerika[x$in_id,]
training$horsepower <- cut(training$horsepower,i)

mod <- lm(mpg ~ horsepower,
         data=training)

labs_x <- levels(mod$model[,2])

labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))


testing <- Amerika[-x$in_id,]

horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))

pred <- predict(mod,
               newdata=list(horsepower=horsepower_new))
truth <- testing$mpg

data_eval <- na.omit(data.frame(truth,pred))

rmse <- mlr3measures::rmse(truth = data_eval$truth,
                           response = data_eval$pred
                           )
mae <- mlr3measures::mae(truth = data_eval$truth,
                           response = data_eval$pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_tangga



# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)

mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
##     d     rmse      mae
## 1   2 4.984048 3.826708
## 2   3 4.685397 3.651166
## 3   4 4.162464 3.158257
## 4   5 4.014042 3.098955
## 5   6 4.234363 3.198962
## 6   7 4.091327 3.149676
## 7   8 3.903648 2.955931
## 8   9 3.744940 2.820420
## 9  10 3.954484 2.993423
## 10 11 3.901662 2.936554

Spline Amerika

set.seed(1234)
cross_val <- vfold_cv(Amerika,v=10,strata = "mpg")

df <- 2:12

best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
    function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
         data=Amerika[x$in_id,])
pred <- predict(mod,
               newdata=Amerika[-x$in_id,])
truth <- Amerika[-x$in_id,]$mpg

rmse <- mlr3measures::rmse(truth = truth,
                           response = pred
                           )
mae <- mlr3measures::mae(truth = truth,
                           response = pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_spline3

# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)

mean_metric_spline3
}
)

best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
##     d     rmse      mae
## 1   2 3.793072 2.884487
## 2   3 3.816322 2.901572
## 3   4 3.831471 2.911463
## 4   5 3.788643 2.846129
## 5   6 3.810135 2.874073
## 6   7 3.787742 2.831774
## 7   8 3.784609 2.824976
## 8   9 3.790436 2.839366
## 9  10 3.780952 2.820648
## 10 11 3.804090 2.849770
## 11 12 3.816358 2.839208

Best metode Amerika

nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
                      best_tangga %>% select(-1) %>% slice_min(mae),
                      best_spline3 %>%  select(-1)%>% slice_min(mae)
                      )

nama_model <- c("Polynomial7","Piecewise constant(breaks=9)",
                "Natural cubic spline(df=10)"
)

data.frame(nama_model,nilai_metric)
##                     nama_model     rmse      mae
## 1                  Polynomial7 3.793874 2.863246
## 2 Piecewise constant(breaks=9) 3.744940 2.820420
## 3  Natural cubic spline(df=10) 3.780952 2.820648

model terbaik bagi data Amerika adalah fungsi tangga dengan breaks=9

Data Eropa

id.E<-Auto$origin==2
Eropa<-Auto[id.E,]
plot(Eropa$horsepower, Eropa$mpg, pch=19, col="aquamarine4",
     xlab="Horse Power", ylab="mile per gallon")

Polinom Eropa

set.seed(1234)
cross_val <- vfold_cv(Eropa,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
d<-1:10
best_poly<-map_dfr(d, function(i){
 metric_poly <- map_dfr(cross_val$splits,
                        function(x){
                           mod <- lm(mpg ~ poly(horsepower, d = i),
                                     data=Eropa[x$in_id,])
                           pred <- predict(mod,newdata=Eropa[-x$in_id,])
                           truth <- Eropa[-x$in_id,]$mpg
                           rmse <- mlr3measures::rmse(truth = truth,response = pred)
                           mae <- mlr3measures::mae(truth = truth,response = pred)
                           metric <- c(rmse,mae)
                           names(metric) <- c("rmse","mae")
                           return(metric)
                        } 
                        )
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
##     d      rmse      mae
## 1   1  4.748730 3.809553
## 2   2  4.825216 3.870409
## 3   3  4.890907 3.933474
## 4   4  4.951956 4.035466
## 5   5  5.112665 4.153303
## 6   6  6.750648 4.942009
## 7   7  7.098575 5.014004
## 8   8  6.338606 4.610701
## 9   9 16.228101 8.698614
## 10 10 10.059234 6.019488

Fungsi Tangga Eropa

set.seed(1234)
cross_val <- vfold_cv(Eropa,v=9,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
breaks <- 2:9

best_tangga <- map_dfr(breaks, function(i){

metric_tangga <- map_dfr(cross_val$splits,
    function(x){

training <- Eropa[x$in_id,]
training$horsepower <- cut(training$horsepower,i)

mod <- lm(mpg ~ horsepower,
         data=training)

labs_x <- levels(mod$model[,2])

labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))


testing <- Eropa[-x$in_id,]

horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))

pred <- predict(mod,
               newdata=list(horsepower=horsepower_new))
truth <- testing$mpg

data_eval <- na.omit(data.frame(truth,pred))

rmse <- mlr3measures::rmse(truth = data_eval$truth,
                           response = data_eval$pred
                           )
mae <- mlr3measures::mae(truth = data_eval$truth,
                           response = data_eval$pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_tangga



# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)

mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
##   d     rmse      mae
## 1 2 5.371208 4.250614
## 2 3 5.578173 4.533519
## 3 4 5.144853 4.183493
## 4 5 5.087614 4.003546
## 5 6 5.236109 4.136374
## 6 7 5.102072 3.975827
## 7 8 5.156002 4.146550
## 8 9 5.374641 4.286823

Spline Eropa

set.seed(1234)
cross_val <- vfold_cv(Eropa,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
df <- 2:12

best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
    function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
         data=Eropa[x$in_id,])
pred <- predict(mod,
               newdata=Eropa[-x$in_id,])
truth <- Eropa[-x$in_id,]$mpg

rmse <- mlr3measures::rmse(truth = truth,
                           response = pred
                           )
mae <- mlr3measures::mae(truth = truth,
                           response = pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_spline3

# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)

mean_metric_spline3
}
)

best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
##     d     rmse      mae
## 1   2 4.824097 3.870036
## 2   3 4.893425 3.935614
## 3   4 4.922407 3.953072
## 4   5 5.007375 4.017736
## 5   6 5.047184 3.998508
## 6   7 5.177158 4.099570
## 7   8 5.245169 4.180753
## 8   9 5.378872 4.295886
## 9  10 5.462338 4.369747
## 10 11 5.557164 4.404541
## 11 12 5.657355 4.486990

Best metode Eropa

nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
                      best_tangga %>% select(-1) %>% slice_min(mae),
                      best_spline3 %>%  select(-1)%>% slice_min(mae)
                      )

nama_model <- c("Polynomial1","Piecewise constant(breaks=7)",
                "Natural cubic spline(df=2)"
)

data.frame(nama_model,nilai_metric)
##                     nama_model     rmse      mae
## 1                  Polynomial1 4.748730 3.809553
## 2 Piecewise constant(breaks=7) 5.102072 3.975827
## 3   Natural cubic spline(df=2) 4.824097 3.870036

model terbaik bagi data Amerika adalah polinom dengan ordo 1

Data Jepang

id.J<-Auto$origin==3
Jepang<-Auto[id.J,]
plot(Jepang$horsepower, Jepang$mpg, pch=19, col="aquamarine4",
     xlab="Horse Power", ylab="mile per gallon")

Polinom Jepang

set.seed(1234)
cross_val <- vfold_cv(Jepang,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
d<-1:10
best_poly<-map_dfr(d, function(i){
 metric_poly <- map_dfr(cross_val$splits,
                        function(x){
                           mod <- lm(mpg ~ poly(horsepower, d = i),
                                     data=Jepang[x$in_id,])
                           pred <- predict(mod,newdata=Jepang[-x$in_id,])
                           truth <- Jepang[-x$in_id,]$mpg
                           rmse <- mlr3measures::rmse(truth = truth,response = pred)
                           mae <- mlr3measures::mae(truth = truth,response = pred)
                           metric <- c(rmse,mae)
                           names(metric) <- c("rmse","mae")
                           return(metric)
                        } 
                        )
metric_poly
#menghitung rata rata untuk 10 folds
mean_metric_poly<-colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(d=d,best_poly)
best_poly
##     d      rmse      mae
## 1   1  4.379588 3.511243
## 2   2  4.407709 3.514377
## 3   3  3.891911 2.990732
## 4   4  4.507924 3.295015
## 5   5  3.933976 3.050013
## 6   6  4.189597 3.219087
## 7   7  7.020382 4.241221
## 8   8 14.692013 6.826629
## 9   9 18.866965 8.179589
## 10 10 15.366406 6.988463

Fungsi Tangga Jepang

set.seed(1234)
cross_val <- vfold_cv(Jepang,v=5,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
breaks <- 2:5

best_tangga <- map_dfr(breaks, function(i){

metric_tangga <- map_dfr(cross_val$splits,
    function(x){

training <- Jepang[x$in_id,]
training$horsepower <- cut(training$horsepower,i)

mod <- lm(mpg ~ horsepower,
         data=training)

labs_x <- levels(mod$model[,2])

labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))


testing <- Jepang[-x$in_id,]

horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))

pred <- predict(mod,
               newdata=list(horsepower=horsepower_new))
truth <- testing$mpg

data_eval <- na.omit(data.frame(truth,pred))

rmse <- mlr3measures::rmse(truth = data_eval$truth,
                           response = data_eval$pred
                           )
mae <- mlr3measures::mae(truth = data_eval$truth,
                           response = data_eval$pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_tangga



# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)

mean_metric_tangga
})
best_tangga <- cbind(d=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
##   d     rmse      mae
## 1 2 4.657624 3.419243
## 2 3 4.276138 3.306548
## 3 4 4.236296 3.341741
## 4 5 4.310206 3.423978

Spline Jepang

set.seed(1234)
cross_val <- vfold_cv(Jepang,v=10,strata = "mpg")
## Warning: The number of observations in each quantile is below the recommended
## threshold of 20. Stratification will be done with 3 breaks instead.
df <- 2:12

best_spline3 <- map_dfr(df, function(i){
metric_spline3 <- map_dfr(cross_val$splits,
    function(x){
mod <- lm(mpg ~ ns(horsepower,df=i),
         data=Jepang[x$in_id,])
pred <- predict(mod,
               newdata=Jepang[-x$in_id,])
truth <- Jepang[-x$in_id,]$mpg

rmse <- mlr3measures::rmse(truth = truth,
                           response = pred
                           )
mae <- mlr3measures::mae(truth = truth,
                           response = pred
                           )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)

metric_spline3

# menghitung rata-rata untuk 10 folds
mean_metric_spline3 <- colMeans(metric_spline3)

mean_metric_spline3
}
)

best_spline3 <- cbind(d=df,best_spline3)
# menampilkan hasil all breaks
best_spline3
##     d     rmse      mae
## 1   2 4.420154 3.531408
## 2   3 4.075356 3.131341
## 3   4 4.143401 3.186480
## 4   5 4.114731 3.094869
## 5   6 4.219534 3.191374
## 6   7 4.274199 3.246116
## 7   8 4.341063 3.307820
## 8   9 4.340789 3.318587
## 9  10 4.471304 3.385200
## 10 11 4.515272 3.443309
## 11 12 4.599257 3.540302

Best metode Jepang

nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(mae),
                      best_tangga %>% select(-1) %>% slice_min(mae),
                      best_spline3 %>%  select(-1)%>% slice_min(mae)
                      )

nama_model <- c("Polynomial3","Piecewise constant(breaks=3)",
                "Natural cubic spline(df=5)"
)

data.frame(nama_model,nilai_metric)
##                     nama_model     rmse      mae
## 1                  Polynomial3 3.891911 2.990732
## 2 Piecewise constant(breaks=3) 4.276138 3.306548
## 3   Natural cubic spline(df=5) 4.114731 3.094869

model terbaik bagi data jepang adalah polinomial ordo 3

Visualisasi Model Terbaik Dari Setiap Negara

par(mfrow=c(1,3))
 ggplot(Amerika,aes(x=horsepower, y=mpg)) +
  geom_point(alpha=0.55, color="aquamarine4") +
  stat_smooth(method = "lm", 
               formula = y~cut(x,9), 
               lty = 1, col = "red",se = F)+
  theme_bw() 

 ggplot(Eropa,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="aquamarine4") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,1,raw=T), 
               lty = 1, col = "red",se = F)+
  theme_bw()

  ggplot(Jepang,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="aquamarine4") +
  stat_smooth(method = "lm", 
               formula = y~poly(x,3,raw=T), 
               lty = 1, col = "red",se = F)+
  theme_bw()