Soal
Lakukan cross validation untuk menghasilkan pemodelan mpg vs horsepower optimal menggunakan 3 metode berikut, kemudian bandingkan hasilnya:
Regrsi Polinomial
Piecewise Constant
Natural Cubic Splines
Lakukan hal di atas untuk masing-masing sub-set data berdasarkan asal negara produsennya (Amerika, Eropa, Jepang).
Plot model terbaik dalam satu frame.
Berikan ulasan terhadap hasil Anda.
library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(splines)
library(rsample)
library(mlr3measures)## In order to avoid name clashes, do not attach 'mlr3measures'. Instead, only load the namespace with `requireNamespace("mlrmeasures")` and access the measures directly via `::`, e.g. `mlr3measures::auc()`.
library(ISLR)
library(caret)## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:mlr3measures':
##
## precision, recall, sensitivity, specificity
## The following object is masked from 'package:purrr':
##
## lift
library(DT)
library(cowplot)
library(ggpmisc)## Loading required package: ggpp
##
## Attaching package: 'ggpp'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(tibble)Eksplorasi Data
attach(Auto)## The following object is masked from package:ggplot2:
##
## mpg
head(Auto)## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
data= Auto
A <- ggplot(Auto,aes(x=horsepower,y=mpg),group = origin) +
geom_point(aes(color = factor(origin)))+
stat_smooth(aes(x=horsepower,y=mpg),method = "lm",
formula = y~x,lty = 1,
col = "red",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg")
B <- ggplot(Auto, aes(x = horsepower, y = mpg, colour = factor(origin))) +
geom_point() + stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
plot_grid(A,B)x1 = Auto$origin == "1"
data1 <- Auto[x1,]
x2 <- Auto$origin =="2"
data2 <- Auto[x2,]
x3 <- Auto$origin == "3"
data3 <- Auto[x3,]
a <-ggplot(data1, aes(x = horsepower, y = mpg),color="orange") +
geom_point(color="orange") +
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
b <- ggplot(data2, aes(x = horsepower, y = mpg),color="deeppink4") +
geom_point(color="deeppink4")+
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
c <- ggplot(data3, aes(x = horsepower, y = mpg),color="green4")+
geom_point(color="green4")+
stat_smooth(method = "lm", formula = y ~ x, se = FALSE)
plot_grid(a,b,c,labels = c( "Amerika", "Eropa","Jepang"))Data Seluruhnya
Regresi Polinomial
set.seed(039)
cv.poly <- function(data){
cv <- vfold_cv(data,v=5)
ordo <- 2:10
best_poly <- map_dfr(ordo, function(i) {
metric_poly <- map_dfr(cv$splits,
function(x) {
train <- data[x$in_id,]
test <- data[-x$in_id,]
mod <- lm(mpg ~ poly(horsepower,ordo=i),data=train)
pred <- predict(mod, newdata=test)
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric_poly
mean_metric_poly <- colMeans(metric_poly)
mean_metric_poly
}
)
best_poly <- cbind(ordo=ordo,best_poly)
}
best_poly <- cv.poly(Auto)
datatable(best_poly)poly <- ggplot(data=best_poly, aes(x=ordo, y=rmse, group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
poly.2 <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
poly.7 <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~poly(x,7,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 7") +
xlab("Horse Power") +
ylab("mile per gallon")
polyplot_grid(poly.2,poly.7, label_size = 6)Fungsi Tangga(Picewise Constant)
set.seed(123)
cv.pc <- function(data){
cv <- vfold_cv(data,v=5)
breaks <- 2:10
cv_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cv$splits,
function(x){
training <- data[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower, data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x)))
testing <- data[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- rmse(truth = data_eval$truth,response = data_eval$pred)
mae <- mae(truth = data_eval$truth,response = data_eval$pred)
R2 <- rsq(truth = data_eval$truth,response = data_eval$pred)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric_tangga
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
})
cv_tangga <- cbind(breaks=breaks,cv_tangga)
}
cv_tangga <- cv.pc(Auto)
datatable(cv_tangga)cv_pice_rmse <- ggplot(data=cv_tangga, aes(x=breaks, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
pice8 <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~cut(x,8,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi PC Breaks 8") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_pice_rmse,pice8)Natural Cubic Spline
set.seed(123)
cv.sp <- function(data){
cv <- vfold_cv(data,v=5)
df <- 2:10
cv_spline <- map_dfr(df, function(i){
metric.spline3 <- map_dfr(cv$splits,
function(x){
train <- data[x$in_id,]
test <- data[-x$in_id,]
mod <- lm(mpg ~ ns(horsepower,df=i),data=train)
pred <- predict(mod, newdata=test)
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
metric.spline3
mean.metric.spline3 <- colMeans(metric.spline3) #rata-rata untuk 5 folds
mean.metric.spline3
}
)
cv_spline <- cbind(df=df,cv_spline)
}
cv_spline <- cv.sp(Auto)
datatable(cv_spline)attr(ns(Auto$horsepower, df=7),"knots")## 14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429%
## 68.85714 79.71429 89.57143 98.00000 113.57143 150.00000
set.seed(123)
cross.val <- vfold_cv(Auto,v=5)
knot10<- map_dfr(cross.val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower, knots = c(67,72,80,88,93,100,110,140,157)),
data=Auto[x$in_id,])
pred <- predict(mod,newdata=Auto[-x$in_id,])
test <- Auto[-x$in_id,]
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
mat_knot10 <- colMeans(knot10)
mat_knot10## rmse mae R2
## 4.3030765 3.1982022 0.6991418
cv_spline_rmse <- ggplot(data=cv_spline, aes(x=df, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
pspline <- ggplot(Auto,aes(x=horsepower, y=mpg),group=origin) +
geom_point(aes(color=factor(origin)),alpha=0.55) +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(67,72,80,88,93,100,110,140,157)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(67,72,80,88,93,100,110,140,157), col="grey50", lty=2)
plot_grid(cv_spline_rmse,pspline)Perbandingan Model
plot_grid(poly.2,pice8,pspline)ggplot(Auto,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,8,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,10),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg") nilai_metric <- rbind(best_poly %>% select(-1) %>% slice_min(rmse),
cv_tangga %>% select(-1) %>% slice_min(rmse),
cv_spline %>% select(-1) %>% slice_min(rmse))
nama_model <- c("Poly 2","Tangga 8","NSC(10)")
best_model <- data.frame(nama_model,nilai_metric)
datatable(best_model)Data Amerika
Regresi Polinomial
set.seed(355)
best_poly1 <- cv.poly(data1)
datatable(best_poly)cv_poly1 <- ggplot(data=best_poly1, aes(x=ordo, y=rmse, group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
poly_data1<- ggplot(data1,aes(x=horsepower, y=mpg)) +
geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly1, poly_data1, labels = c( "cv_poly1", "poly_data1"), label_size = 4)Fungsi Tangga(Picewise Constant)
set.seed(0366)
cv_tangga1 <- cv.pc(data1)
datatable(cv_tangga1)cv_pice1 <- ggplot(data=cv_tangga1, aes(x=breaks, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "orchid3")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
pice1 <- ggplot(data1,aes(x=horsepower, y=mpg)) +
geom_point(color="orchid2") +
stat_smooth(method = "lm",
formula = y~cut(x,9,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Picewise Constant Breaks 9") +
xlab("Horsepower") +
ylab("mile per gallon")
plot_grid(cv_pice1, pice1, labels = c("cv_pice1","pice1"), label_size = 4)Natural Cubic Spline
set.seed(035)
cv_spline1 <- cv.sp(data1)
datatable(cv_spline)cv_splin1 <- ggplot(data=cv_spline1, aes(x=df, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue1")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data1$horsepower, df=5),"knots")## 20% 40% 60% 80%
## 85.0 99.2 122.0 150.0
set.seed(123)
cross.val <- vfold_cv(data1,v=5)
knot5<- map_dfr(cross.val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower, knots = c(85,99.2,122,150)),
data=Auto[x$in_id,])
pred <- predict(mod,newdata=Auto[-x$in_id,])
test <- Auto[-x$in_id,]
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
pspline1 <- colMeans(knot5)
pspline1 <- ggplot(data1,aes(x=horsepower, y=mpg)) +
geom_point(color="deepskyblue1") +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(85,99.2,122,150)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Df 5") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(85,99.2,122,150), col="grey50", lty=2)
plot_grid(cv_splin1, pspline1, labels = c("CV spline","Plot Spline"), label_size = 4)Perbandingan Model
plot_grid(poly_data1,pice1,pspline1)ggplot(data1,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,9,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,5),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg") nilai_metric <- rbind(best_poly1 %>% select(-1) %>% slice_min(rmse),
cv_tangga1 %>% select(-1) %>% slice_min(rmse),
cv_spline1 %>% select(-1) %>% slice_min(rmse))
nama_model <- c("Poly 2","Tangga 9","NSC(5)")
best_model <- data.frame(nama_model,nilai_metric)
datatable(best_model)Data Eropa
Regresi Polinomial
set.seed(031)
best_poly2 <- cv.poly(data2)
datatable(best_poly2)cv_poly2 <- ggplot(data=best_poly2, aes(x=ordo, y=rmse, group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
poly_data2<- ggplot(data2,aes(x=horsepower, y=mpg)) +
geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 2") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly2, poly_data2, labels = c( "cv_poly2", "poly_data2"), label_size = 4)Fungsi Tangga(Picewise Constant)
Natural Cubic Spline
cv_splined <- cv.sp(data2)
datatable(cv_splined)splin2 <- ggplot(data=cv_splined, aes(x=df, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue2")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Knot of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data2$horsepower, df=3),"knots")## 33.33333% 66.66667%
## 71 87
set.seed(123)
cross.val <- vfold_cv(data2,v=5)
knot3<- map_dfr(cross.val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower, knots = c(71,87)),
data=Auto[x$in_id,])
pred <- predict(mod,newdata=data2[-x$in_id,])
test <- data2[-x$in_id,]
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
pspline2 <- colMeans(knot3)
p_sline2 <- ggplot(data2,aes(x=horsepower, y=mpg)) +
geom_point(color="deepskyblue2") +
stat_smooth(method = "lm",
formula = y~ns(x,knots = c(71,87)),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Df 3") +
xlab("Horse Power") +
ylab("mile per gallon")+
geom_vline(xintercept = c(90,140), col="grey50", lty=2)
plot_grid(splin2,p_sline2)Perbandingan Model
plot_grid(poly_data2,pice2,p_sline2)ggplot(data2,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,4,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,3),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg") nilai_metric <- rbind(best_poly2 %>% select(-1) %>% slice_min(rmse),
cv_tangga2 %>% select(-1) %>% slice_min(rmse),
cv_splined %>% select(-1) %>% slice_min(rmse))
nama_model <- c("Poly 2","Tangga 4","NSC(3)")
best_model <- data.frame(nama_model,nilai_metric)
datatable(best_model)Data Jepang
Regresi Polinomial
set.seed(031)
best_poly3 <- cv.poly(data3)
datatable(best_poly3)cv_poly3 <- ggplot(data=best_poly3, aes(x=ordo, y=rmse, group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "blue")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Degree of Polynomial") +
ylab("Root Mean Square Error")
poly_data3<- ggplot(data3,aes(x=horsepower, y=mpg)) +
geom_point(color="orange") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi Polinomial Ordo 3 ") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_poly3, poly_data3, labels = c( "cv_poly3", "poly_data2"), label_size = 4)Fungsi Tangga(Picewise Constant)
set.seed(039)
cv_tangga3 <- cv.pce(data3)
datatable(cv_tangga3)cv_pice3 <- ggplot(data=cv_tangga3, aes(x=breaks, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "orchid4")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Breaks of Picewise Constant") +
ylab("Root Mean Square Error")
pice3 <- ggplot(data3,aes(x=horsepower, y=mpg)) +
geom_point(color="orchid4") +
stat_smooth(method = "lm",
formula = y~cut(x,5,raw=T),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Picewise Constant Breaks 5") +
xlab("Horse Power") +
ylab("mile per gallon")
plot_grid(cv_pice3, pice3, labels = c("cv_pice2","pice2"), label_size = 4)Natural Cubic Spline
set.seed(039)
cv_spline3 <- cv.sp(data3)
cv_splin3 <- ggplot(data=cv_spline3, aes(x=df, y=rmse,group=1)) +
geom_line(linetype = "dashed")+
geom_point(color = "deepskyblue1")+
theme_bw()+
ggtitle("K-Fold Cross Validation") +
xlab("Df of Natural Cubic Spline ") +
ylab("Root Mean Square Error")
attr(ns(data3$horsepower, df=3),"knots")## 33.33333% 66.66667%
## 68 90
knot3.3<- map_dfr(cross.val$splits,
function(x){
mod <- lm(mpg ~ ns(horsepower, knots = c(68,90)),
data=Auto[x$in_id,])
pred <- predict(mod,newdata=data2[-x$in_id,])
test <- data2[-x$in_id,]
rmse <- RMSE(pred,test$mpg)
mae <- MAE(pred,test$mpg)
R2 <- R2(pred,test$mpg)
metric <- c(rmse,mae,R2)
names(metric) <- c("rmse","mae","R2")
return(metric)
}
)
pspline2 <- colMeans(knot3.3)
psline3 <- ggplot(data3,aes(x=horsepower, y=mpg)) +
geom_point(color="deepskyblue1") +
stat_smooth(method = "lm",
formula = y~ns(x,3),
lty = 1, col = "black",se = F)+
theme_bw()+
ggtitle("Regresi NCS Knot 3") +
xlab("Horse Power 3") +
ylab("mile per gallon")+
geom_vline(xintercept = c(68,90), col="grey50", lty=2)
plot_grid(cv_splin3, psline3, labels = c("CV spline","Plot Spline"), label_size = 4)Perbandingan Model
plot_grid(poly_data3,pice3,psline3)ggplot(data3,aes(x = horsepower, y = mpg)) +
geom_point(color ="hotpink")+
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),lty = 1,
col = "red",se = F)+
stat_smooth(method = "lm",
formula = y~cut(x,5,raw=T),lty = 1,
col = "green",se = F)+
stat_smooth(method = "lm",
formula =y ~ ns(x,3),
lty = 1,col = "black",se = F)+
theme_bw()+
ggtitle("Scatter Plot Mpg dan Horse Power")+
xlab("Horse Power")+
ylab("Mpg") nilai_metric <- rbind(best_poly3 %>% select(-1) %>% slice_min(rmse),
cv_tangga3 %>% select(-1) %>% slice_min(rmse),
cv_spline3 %>% select(-1) %>% slice_min(rmse))
nama_model <- c("Poly 3","Tangga 5","NCS(3)")
best_model <- data.frame(nama_model,nilai_metric)
datatable(best_model)Model Terbaik
nilai_metric <- rbind(cv_spline%>% select(-1) %>% slice_min(rmse),
cv_spline1%>% select(-1) %>% slice_min(rmse),
best_poly2 %>% select(-1) %>% slice_min(rmse),
best_poly3 %>% select(-1) %>% slice_min(rmse))
nama_model <- c("Seluruh dataset (NCS 10)","Amerika (NCS 5)","Eropa (Poly 2)","Jepang (Poly 3)")
best_model <- data.frame(nama_model,nilai_metric)
datatable(best_model)plot_grid(pspline,pspline1,poly_data2,poly_data3)