Soal
Lakukan cross validation untuk menghasilkan pemodelan mpg vs horsepower optimal menggunakan 3 metode berikut, kemudian bandingkan hasilnya:
Regrsi Polinomial
Piecewise Constant
Natural Cubic Splines
Lakukan hal di atas untuk masing-masing sub-set data berdasarkan asal negara produsennya (Amerika, Eropa, Jepang).
Plot model terbaik dalam satu frame.
Berikan ulasan terhadap hasil Anda.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(splines)
library(rsample)
library(mlr3measures)
## In order to avoid name clashes, do not attach 'mlr3measures'. Instead, only load the namespace with `requireNamespace("mlrmeasures")` and access the measures directly via `::`, e.g. `mlr3measures::auc()`.
library(ISLR)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:mlr3measures':
##
## precision, recall, sensitivity, specificity
## The following object is masked from 'package:purrr':
##
## lift
library(DT)
library(cowplot)
library(ggpmisc)
## Loading required package: ggpp
##
## Attaching package: 'ggpp'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(tibble)
Eksplorasi Data
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
head(Auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
= Auto
data
<- ggplot(Auto,aes(x=horsepower,y=mpg),group = origin) +
A geom_point(aes(color = factor(origin)))+
stat_smooth(aes(x=horsepower,y=mpg),method = "lm",
formula = y~x,lty = 1,
col = "red",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
<- ggplot(Auto, aes(x = horsepower, y = mpg, colour = factor(origin))) +
B geom_point() + stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
plot_grid(A,B)
= Auto$origin == "1"
x1 <- Auto[x1,]
data1
<- Auto$origin =="2"
x2 <- Auto[x2,]
data2
<- Auto$origin == "3"
x3 <- Auto[x3,]
data3
<-ggplot(data1, aes(x = horsepower, y = mpg),color="orange") +
a geom_point(color="orange") +
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
<- ggplot(data2, aes(x = horsepower, y = mpg),color="deeppink4") +
b geom_point(color="deeppink4")+
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
<- ggplot(data3, aes(x = horsepower, y = mpg),color="green4")+
c geom_point(color="green4")+
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
plot_grid(a,b,c,labels = c( "Amerika", "Eropa","Jepang"))
Data Seluruhnya
Regresi Polinomial
set.seed(039)
<- function(data){
cv.poly <- vfold_cv(data,v=5)
cv <- 2:10
ordo <- map_dfr(ordo, function(i) {
best_poly <- map_dfr(cv$splits,
metric_poly function(x) {
<- data[x$in_id,]
train <- data[-x$in_id,]
test <- lm(mpg ~ poly(horsepower,ordo=i),data=train)
mod <- predict(mod, newdata=test)
pred <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric_poly<- colMeans(metric_poly)
mean_metric_poly
mean_metric_poly
}
)<- cbind(ordo=ordo,best_poly)
best_poly
}
<- cv.poly(Auto)
best_poly datatable(best_poly)
<- ggplot(data=best_poly, aes(x=ordo, y=rmse, group=1)) +
poly geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
.2 <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
polygeom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
.7 <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
polygeom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~poly(x,7,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 7") +
xlab("Horse Power") +
ylab("mile per gallon")
poly
plot_grid(poly.2,poly.7, label_size = 6)
Fungsi Tangga(Picewise Constant)
set.seed(123)
<- function(data){
cv.pc <- vfold_cv(data,v=5)
cv <- 2:10
breaks <- map_dfr(breaks, function(i){
cv_tangga <- map_dfr(cv$splits,
metric_tangga function(x){
<- data[x$in_id,]
training $horsepower <- cut(training$horsepower,i)
training<- lm(mpg ~ horsepower, data=training)
mod <- levels(mod$model[,2])
labs_x <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
labs_x_breaks upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
<- data[-x$in_id,]
testing <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
horsepower_new <- predict(mod,newdata=list(horsepower=horsepower_new))
pred <- testing$mpg
truth <- na.omit(data.frame(truth,pred))
data_eval <- rmse(truth = data_eval$truth,response = data_eval$pred)
rmse <- mae(truth = data_eval$truth,response = data_eval$pred)
mae <- rsq(truth = data_eval$truth,response = data_eval$pred)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric_tangga<- colMeans(metric_tangga)
mean_metric_tangga
mean_metric_tangga
})<- cbind(breaks=breaks,cv_tangga)
cv_tangga
}<- cv.pc(Auto)
cv_tangga datatable(cv_tangga)
<- ggplot(data=cv_tangga, aes(x=breaks, y=rmse,group=1)) +
cv_pice_rmse geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
<- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
pice8 geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~cut(x,8,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi PC Breaks 8") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_pice_rmse,pice8)
Natural Cubic Spline
set.seed(123)
<- function(data){
cv.sp <- vfold_cv(data,v=5)
cv <- 2:10
df <- map_dfr(df, function(i){
cv_spline <- map_dfr(cv$splits,
metric.spline3 function(x){
<- data[x$in_id,]
train <- data[-x$in_id,]
test <- lm(mpg ~ ns(horsepower,df=i),data=train)
mod <- predict(mod, newdata=test)
pred <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric.spline3<- colMeans(metric.spline3) #rata-rata untuk 5 folds
mean.metric.spline3
mean.metric.spline3
}
)
<- cbind(df=df,cv_spline)
cv_spline
}<- cv.sp(Auto)
cv_spline datatable(cv_spline)
attr(ns(Auto$horsepower, df=7),"knots")
## 14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429%
## 68.85714 79.71429 89.57143 98.00000 113.57143 150.00000
set.seed(123)
<- vfold_cv(Auto,v=5)
cross.val
<- map_dfr(cross.val$splits,
knot10function(x){
<- lm(mpg ~ ns(horsepower, knots = c(67,72,80,88,93,100,110,140,157)),
mod data=Auto[x$in_id,])
<- predict(mod,newdata=Auto[-x$in_id,])
pred <- Auto[-x$in_id,]
test <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
<- colMeans(knot10)
mat_knot10 mat_knot10
## rmse mae R2
## 4.3030765 3.1982022 0.6991418
<- ggplot(data=cv_spline, aes(x=df, y=rmse,group=1)) +
cv_spline_rmse geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
<- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
pspline geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(67,72,80,88,93,100,110,140,157)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(67,72,80,88,93,100,110,140,157), col="grey50", lty=2)
plot_grid(cv_spline_rmse,pspline)
Perbandingan Model
plot_grid(poly.2,pice8,pspline)
ggplot(Auto,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,8,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,10),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
<- rbind(best_poly %>% select(-1) %>% slice_min(rmse),
nilai_metric %>% select(-1) %>% slice_min(rmse),
cv_tangga %>% select(-1) %>% slice_min(rmse))
cv_spline <- c("Poly 2","Tangga 8","NSC(10)")
nama_model <- data.frame(nama_model,nilai_metric)
best_model datatable(best_model)
Data Amerika
Regresi Polinomial
set.seed(355)
<- cv.poly(data1)
best_poly1 datatable(best_poly)
<- ggplot(data=best_poly1, aes(x=ordo, y=rmse, group=1)) +
cv_poly1 geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
<- ggplot(data1,aes(x=horsepower, y=mpg)) +
poly_data1geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly1, poly_data1, labels = c( "cv_poly1", "poly_data1"), label_size = 4)
Fungsi Tangga(Picewise Constant)
set.seed(0366)
<- cv.pc(data1)
cv_tangga1 datatable(cv_tangga1)
<- ggplot(data=cv_tangga1, aes(x=breaks, y=rmse,group=1)) +
cv_pice1 geom_line(linetype = "dashed")+
geom_point(color = "orchid3")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
<- ggplot(data1,aes(x=horsepower, y=mpg)) +
pice1 geom_point(color="orchid2") +
stat_smooth(method = "lm",
formula = y~cut(x,9,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Picewise Constant Breaks 9") +
xlab("Horsepower") +
ylab("mile per gallon")
plot_grid(cv_pice1, pice1, labels = c("cv_pice1","pice1"), label_size = 4)
Natural Cubic Spline
set.seed(035)
<- cv.sp(data1)
cv_spline1 datatable(cv_spline)
<- ggplot(data=cv_spline1, aes(x=df, y=rmse,group=1)) +
cv_splin1 geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue1")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data1$horsepower, df=5),"knots")
## 20% 40% 60% 80%
## 85.0 99.2 122.0 150.0
set.seed(123)
<- vfold_cv(data1,v=5)
cross.val
<- map_dfr(cross.val$splits,
knot5function(x){
<- lm(mpg ~ ns(horsepower, knots = c(85,99.2,122,150)),
mod data=Auto[x$in_id,])
<- predict(mod,newdata=Auto[-x$in_id,])
pred <- Auto[-x$in_id,]
test <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
<- colMeans(knot5)
pspline1
<- ggplot(data1,aes(x=horsepower, y=mpg)) +
pspline1 geom_point(color="deepskyblue1") +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(85,99.2,122,150)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Df 5") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(85,99.2,122,150), col="grey50", lty=2)
plot_grid(cv_splin1, pspline1, labels = c("CV spline","Plot Spline"), label_size = 4)
Perbandingan Model
plot_grid(poly_data1,pice1,pspline1)
ggplot(data1,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,9,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,5),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
<- rbind(best_poly1 %>% select(-1) %>% slice_min(rmse),
nilai_metric %>% select(-1) %>% slice_min(rmse),
cv_tangga1 %>% select(-1) %>% slice_min(rmse))
cv_spline1 <- c("Poly 2","Tangga 9","NSC(5)")
nama_model <- data.frame(nama_model,nilai_metric)
best_model datatable(best_model)
Data Eropa
Regresi Polinomial
set.seed(031)
<- cv.poly(data2)
best_poly2 datatable(best_poly2)
<- ggplot(data=best_poly2, aes(x=ordo, y=rmse, group=1)) +
cv_poly2 geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
<- ggplot(data2,aes(x=horsepower, y=mpg)) +
poly_data2geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly2, poly_data2, labels = c( "cv_poly2", "poly_data2"), label_size = 4)
Fungsi Tangga(Picewise Constant)
Natural Cubic Spline
<- cv.sp(data2)
cv_splined datatable(cv_splined)
<- ggplot(data=cv_splined, aes(x=df, y=rmse,group=1)) +
splin2 geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue2")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Knot of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data2$horsepower, df=3),"knots")
## 33.33333% 66.66667%
## 71 87
set.seed(123)
<- vfold_cv(data2,v=5)
cross.val
<- map_dfr(cross.val$splits,
knot3function(x){
<- lm(mpg ~ ns(horsepower, knots = c(71,87)),
mod data=Auto[x$in_id,])
<- predict(mod,newdata=data2[-x$in_id,])
pred <- data2[-x$in_id,]
test <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
<- colMeans(knot3)
pspline2
<- ggplot(data2,aes(x=horsepower, y=mpg)) +
p_sline2 geom_point(color="deepskyblue2") +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(71,87)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Df 3") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(90,140), col="grey50", lty=2)
plot_grid(splin2,p_sline2)
Perbandingan Model
plot_grid(poly_data2,pice2,p_sline2)
ggplot(data2,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,4,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,3),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
<- rbind(best_poly2 %>% select(-1) %>% slice_min(rmse),
nilai_metric %>% select(-1) %>% slice_min(rmse),
cv_tangga2 %>% select(-1) %>% slice_min(rmse))
cv_splined <- c("Poly 2","Tangga 4","NSC(3)")
nama_model <- data.frame(nama_model,nilai_metric)
best_model datatable(best_model)
Data Jepang
Regresi Polinomial
set.seed(031)
<- cv.poly(data3)
best_poly3 datatable(best_poly3)
<- ggplot(data=best_poly3, aes(x=ordo, y=rmse, group=1)) +
cv_poly3 geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
<- ggplot(data3,aes(x=horsepower, y=mpg)) +
poly_data3geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 3 ") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly3, poly_data3, labels = c( "cv_poly3", "poly_data2"), label_size = 4)
Fungsi Tangga(Picewise Constant)
set.seed(039)
<- cv.pce(data3)
cv_tangga3 datatable(cv_tangga3)
<- ggplot(data=cv_tangga3, aes(x=breaks, y=rmse,group=1)) +
cv_pice3 geom_line(linetype = "dashed")+
geom_point(color = "orchid4")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
<- ggplot(data3,aes(x=horsepower, y=mpg)) +
pice3 geom_point(color="orchid4") +
stat_smooth(method = "lm",
formula = y~cut(x,5,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Picewise Constant Breaks 5") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_pice3, pice3, labels = c("cv_pice2","pice2"), label_size = 4)
Natural Cubic Spline
set.seed(039)
<- cv.sp(data3)
cv_spline3
<- ggplot(data=cv_spline3, aes(x=df, y=rmse,group=1)) +
cv_splin3 geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue1")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data3$horsepower, df=3),"knots")
## 33.33333% 66.66667%
## 68 90
.3<- map_dfr(cross.val$splits,
knot3function(x){
<- lm(mpg ~ ns(horsepower, knots = c(68,90)),
mod data=Auto[x$in_id,])
<- predict(mod,newdata=data2[-x$in_id,])
pred <- data2[-x$in_id,]
test <- RMSE(pred,test$mpg)
rmse <- MAE(pred,test$mpg)
mae <- R2(pred,test$mpg)
R2 <- c(rmse,mae,R2)
metric names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
<- colMeans(knot3.3)
pspline2
<- ggplot(data3,aes(x=horsepower, y=mpg)) +
psline3 geom_point(color="deepskyblue1") +
stat_smooth(method = "lm",
formula = y~ns(x,3),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Knot 3") +
xlab("Horse Power 3") +
ylab("mile per gallon")+
geom_vline(xintercept = c(68,90), col="grey50", lty=2)
plot_grid(cv_splin3, psline3, labels = c("CV spline","Plot Spline"), label_size = 4)
Perbandingan Model
plot_grid(poly_data3,pice3,psline3)
ggplot(data3,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,5,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,3),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
<- rbind(best_poly3 %>% select(-1) %>% slice_min(rmse),
nilai_metric %>% select(-1) %>% slice_min(rmse),
cv_tangga3 %>% select(-1) %>% slice_min(rmse))
cv_spline3 <- c("Poly 3","Tangga 5","NCS(3)")
nama_model <- data.frame(nama_model,nilai_metric)
best_model datatable(best_model)
Model Terbaik
<- rbind(cv_spline%>% select(-1) %>% slice_min(rmse),
nilai_metric %>% select(-1) %>% slice_min(rmse),
cv_spline1%>% select(-1) %>% slice_min(rmse),
best_poly2 %>% select(-1) %>% slice_min(rmse))
best_poly3 <- c("Seluruh dataset (NCS 10)","Amerika (NCS 5)","Eropa (Poly 2)","Jepang (Poly 3)")
nama_model <- data.frame(nama_model,nilai_metric)
best_model datatable(best_model)
plot_grid(pspline,pspline1,poly_data2,poly_data3)