Soal
Lakukan cross validation untuk menghasilkan pemodelan mpg vs horsepower optimal menggunakan 3 metode berikut, kemudian bandingkan hasilnya:
Reg polinomial
Piecewise constant
Natural cubil splines
Lakukan hal di atas untuk masing-masing sub-set data berdasarkan asal negara produsennya origin (Amerika, Eropa, Jepang).
Plot model terbaik dalam satu frame.
Berikan ulasan terhadap hasil Anda.
mpg cylinders displacement horsepower weight acceleration year origin
1 18 8 307 130 3504 12.0 70 1
2 15 8 350 165 3693 11.5 70 1
3 18 8 318 150 3436 11.0 70 1
4 16 8 304 150 3433 12.0 70 1
5 17 8 302 140 3449 10.5 70 1
6 15 8 429 198 4341 10.0 70 1
name
1 chevrolet chevelle malibu
2 buick skylark 320
3 plymouth satellite
4 amc rebel sst
5 ford torino
6 ford galaxie 500
Pada tugas ini hanya digunakan 3 variabel yaitu mpg, horsepower, dan origin.
mpg: miles per gallon
horsepower: Engine horsepower
origin: Origin of car (1. American, 2. European, 3. Japanese)
Scatter Plot mpg Vs horsepower
Regresi Linier
model_linear=lm(mpg~horsepower,data=Auto)
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.50, color="blue") +
stat_smooth(method = "lm",
formula = y~x,lty = 1,
col = "red",se = F)+
theme_bw() Hubungan linear tidak dapat merepresentrasikan sebaran data dengan baik. Nilai horsepower yang lebih tinggi lagi dapat berakibat mpg bernilai nol atau negatif. Hal Ini tidak mungkin. Oleh karena itu, beberapa model yang dapat digunakan untuk masalah di atas yaitu Regresi polinomial, Piecewise constant, atau Natural cubic splines.
Regresi Polinomial
Empiris
polinomial = function(dataset){
modelterbaik = c()
derajat = 1:10
for (i in derajat){
set.seed(1501211036)
cross_val = vfold_cv(dataset,v=10,strata = "mpg") #rsample
metric_poly = map_dfr(cross_val$splits, #tidiverse Iterasi pada suatu fungsi (Output data.frame)
function(x){
mod = lm(mpg ~ poly(horsepower,i,raw = T),
data=dataset[x$in_id,]) #in_id a/ data training
pred = predict(mod,
newdata=dataset[-x$in_id,])
# nilai prediksi untuk data testing
truth = dataset[-x$in_id,]$mpg
rmse = mlr3measures::rmse(truth = truth,
response = pred)
mae = mlr3measures::mae(truth = truth,
response = pred)
metric = c(rmse,mae)
names(metric) = c("rmse","mae")
return(metric)
}
)
rata_rata = colMeans(metric_poly)
modelterbaik = c(modelterbaik,rata_rata)
}
M = matrix(modelterbaik, byrow = T,ncol = 2)
colnames(M) = c("rmse","mae")
bestmpoly = as.data.frame(cbind(derajat,M))
bestmpoly = arrange(bestmpoly, bestmpoly$rmse)
return(bestmpoly)
}
bestmpoly = polinomial(Auto)
bestmpoly derajat rmse mae
1 7 4.328586 3.239488
2 5 4.343273 3.248301
3 6 4.345171 3.262537
4 2 4.347103 3.263367
5 8 4.351402 3.260481
6 3 4.354304 3.271791
7 4 4.374685 3.290646
8 9 4.380581 3.269677
9 10 4.415139 3.287987
10 1 4.898189 3.834290
Secara empiris regresi polinomial dengan derajat 7 memiliki rmse maupun mae terkecil. Namun, di sini dipilih regresi polinomial dengan derajat 2 karena jika lebih dariu dejarat 4, akan ada indikasi maalaahoverfitting.
Hal ini karena meskipun memiliki rmse atau mae yang kecil, polinomial dengan derajat lebih besar dari 4 cenderung memiliki pola yang tidak teratur, khususnya pada ujung-ujung kurvanya. Untuk lebih jelas, dapat dilihat kurva berikut.
Visual
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Derajat 2") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Derajat 3") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,7,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Derajat 7") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,8,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Derajat 8") +
theme_bw()Fungsi Tangga (Piecewise Constant)
Empiris
ftangga = function(dataset){
set.seed(1501211036)
cross_val = vfold_cv(dataset,v=10,strata = "mpg")
breaks = 2:15
best_tangga = map_dfr(breaks, function(i){
metric_tangga = map_dfr(cross_val$splits,
function(x){
training = dataset[x$in_id,]
training$horsepower = cut(training$horsepower,i) #Transformasi jadi factor
mod = lm(mpg ~ horsepower,data=training)
#Convert ke numeric lagi
labs_x = levels(mod$model[,2])
labs_x_breaks = cbind(lower = as.numeric( sub("\\((.+),.*",
"\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]",
"\\1", labs_x) ))
#Testing
testing = dataset[-x$in_id,]
horsepower_new = cut(testing$horsepower,c(labs_x_breaks[1,1],
labs_x_breaks[,2]))
pred = predict(mod,
newdata=list(horsepower=horsepower_new))
truth = testing$mpg
data_eval = na.omit(data.frame(truth,pred))
rmse = mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred)
mae = mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred)
metric = c(rmse,mae)
names(metric) = c("rmse","mae")
return(metric)
}
)
colMeans(metric_tangga) # menghitung rata-rata untuk 10 folds
}
)
best_tangga = cbind(breaks=breaks,best_tangga) # menampilkan hasil all breaks
best_tangga = arrange(best_tangga, best_tangga$rmse)
return(best_tangga)
}
tangga = ftangga(Auto)
tangga breaks rmse mae
1 15 4.343245 3.291338
2 14 4.401883 3.257563
3 8 4.436932 3.405170
4 12 4.451700 3.314559
5 7 4.499764 3.359511
6 13 4.511050 3.357282
7 9 4.526533 3.447286
8 11 4.538542 3.350069
9 10 4.578024 3.435559
10 6 4.655029 3.532977
11 5 4.705711 3.568395
12 4 4.990599 3.790641
13 3 5.771438 4.520390
14 2 6.151124 4.792086
Berdasarkan rmse terkecil banyaknya interval yang terbaik adalah 15 sedangkan berdasarkan mae terkecil banyaknya interval terbaik adalah 14.
visual
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,8),
lty = 1, col = "red",se = F)+
ggtitle("Cut = 8") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,12),
lty = 1, col = "red",se = F)+
ggtitle("Cut = 12") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,14),
lty = 1, col = "red",se = F)+
ggtitle("Cut = 14") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,15),
lty = 1, col = "red",se = F)+
ggtitle("Cut = 15") +
theme_bw()Berdasarkan empiris dan visual, diputuskan model terbaik dengan interbal sebanyak 15.
Natural Cubic Spline
Empiris
Menentukan Banyaknya Fungsi Basis
ncspline = function(dataset){
set.seed(1501211036)
cross.val = vfold_cv(dataset,v=10,strata = "mpg")
df = 3:20
best.spline3 = map_dfr(df, function(i){
metric.spline3 = map_dfr(cross.val$splits,
function(x){
mod = lm(mpg ~ splines::ns(horsepower,df=i),data=dataset[x$in_id,])
pred = predict(mod,newdata=dataset[-x$in_id,])
truth = dataset[-x$in_id,]$mpg
rmse = mlr3measures::rmse(truth = truth,
response = pred)
mae = mlr3measures::mae(truth = truth,
response = pred)
metric = c(rmse,mae)
names(metric) = c("rmse","mae")
return(metric)
}
)
metric.spline3
mean.metric.spline3 = colMeans(metric.spline3) #rata-rata untuk 10 folds
mean.metric.spline3
}
)
best.spline3 = cbind(df=df,best.spline3)
basis=data.frame(rbind(best.spline3 %>% slice_min(rmse), #berdasarkan rmse
best.spline3 %>% slice_min(mae))) #berdasarkan mae
df=basis[,"df"]
knot1 = attr(ns(dataset$horsepower, df=df[1]),"knots")
knot2 = attr(ns(dataset$horsepower, df=df[2]),"knots")
return(list(basis=basis,knot1=knot1,knot2=knot2))
}
basisAuto = ncspline(Auto)
basisAuto$basis
df rmse mae
1 7 4.320925 3.230518
2 10 4.323890 3.217920
$knot1
14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429%
68.85714 79.71429 89.57143 98.00000 113.57143 150.00000
$knot2
10% 20% 30% 40% 50% 60% 70% 80% 90%
67.0 72.0 80.0 88.0 93.5 100.0 110.0 140.0 157.7
Berdasarkan hasil di atas model natural spline terbaik memiliki 7 fungsi basis atau 6 knots
Visual
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(68.8, 79.7, 89.5, 98,
113.5, 150)),
lty = 1,col = "red",se=F)+
ggtitle("knot = 6")+
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(67.0, 72.0,80.0, 88.0, 93.5,
100.0,110.0,140.0,157.7)),
lty = 1,col = "red",se=F)+
ggtitle("knot = 9")+
theme_bw()Perbandingan Model
Empiris
a = as.matrix(bestmpoly[4,])
b = as.matrix(tangga[1,])
c = as.matrix(basisAuto$basis[1,])
perbandingan.model = rbind(a,b,c)
rownames(perbandingan.model) = c("Polinomial Derajat 2","Fungsi Tangga 15 Interval",
"Natural Cubic Spline 6 Knot")
as.data.frame(perbandingan.model[,-1]) rmse mae
Polinomial Derajat 2 4.347103 3.263367
Fungsi Tangga 15 Interval 4.343245 3.291338
Natural Cubic Spline 6 Knot 4.320925 3.230518
Visual
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Polinomial Derajat 2") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,15),
lty = 1, col = "red",se = F)+
ggtitle("Fungsi Tangga 15 Interval") +
theme_bw()
ggplot(Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(68.8, 79.7, 89.5, 98,
113.5, 150)),
lty = 1,col = "red",se=F)+
ggtitle("Natural Cubic Spline 6 Knot")+
theme_bw() Berdasarkan grafik Regresi Polinomial Derajat 2 terlihat lebih baik dibandingkan Natural Cubic Spline yang mana di ujungnya cenderung tidak stabil meskipun memiliki rmse dan mea yang lebih kecil. Sehingga dipilih Regresi Polinomial Derajat 2.
Subset-data: Amerika
Regresi Polinomial
derajat rmse mae
1 3 3.772860 2.897686
2 7 3.773650 2.870675
3 2 3.776399 2.889935
4 5 3.776736 2.887701
5 4 3.788496 2.902062
6 8 3.788557 2.882283
7 6 3.802001 2.913044
8 9 3.864852 2.932634
9 10 3.902782 2.946496
10 1 4.197004 3.332211
Fungsi Tangga
breaks rmse mae
1 9 3.796899 2.880849
2 12 3.814466 2.868998
3 11 3.861857 2.957272
4 13 3.865669 2.922589
5 14 3.909457 2.960533
6 10 3.920842 2.985516
7 15 3.945686 2.998896
8 8 3.958049 2.957988
9 5 4.056622 3.134471
10 7 4.095050 3.208085
11 4 4.118081 3.040479
12 6 4.131736 3.112986
13 3 4.732121 3.666719
14 2 4.895683 3.764649
Natural Cubic Spline
$basis
df rmse mae
1 20 3.658698 2.762393
2 20 3.658698 2.762393
$knot1
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65%
70.0 78.4 83.6 85.0 88.0 90.0 92.0 99.2 100.0 105.0 110.0 122.0 138.6
70% 75% 80% 85% 90% 95%
145.0 150.0 150.0 162.0 175.0 197.0
$knot2
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65%
70.0 78.4 83.6 85.0 88.0 90.0 92.0 99.2 100.0 105.0 110.0 122.0 138.6
70% 75% 80% 85% 90% 95%
145.0 150.0 150.0 162.0 175.0 197.0
Perbandingan
a = as.matrix(polyUS[1,])
b = as.matrix(ftUS[1,])
c = as.matrix(ncsUS$basis[1,])
perbandingan.model = rbind(a,b,c)
rownames(perbandingan.model) = c("Polinomial Derajat 3","Fungsi Tangga 9 Interval",
"Natural Cubic Spline 19 Knot")
as.data.frame(perbandingan.model[,-1]) rmse mae
Polinomial Derajat 3 3.772860 2.897686
Fungsi Tangga 9 Interval 3.796899 2.880849
Natural Cubic Spline 19 Knot 3.658698 2.762393
Visual
ggplot(Auto[Auto$origin==1,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Polinomial Derajat 3") +
theme_bw()
ggplot(Auto[Auto$origin==1,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,9),
lty = 1, col = "red",se = F)+
ggtitle("Fungsi Tangga 9 Interval") +
theme_bw()
ggplot(Auto[Auto$origin==1,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(70.0, 78.4, 83.6, 85.0, 88.0, 90.0,
92.0, 99.2, 100.0, 105.0, 110.0,
122.0, 138.6, 145.0, 150.0, 150.0,
162.0, 175.0, 197.0 )),
lty = 1,col = "red",se=F)+
ggtitle("Natural Cubic Spline 19 Knot")+
theme_bw()Subset-data: Eropa
Regresi Polinomial
derajat rmse mae
1 1 4.844209 3.815299
2 2 4.943533 3.875699
3 3 4.989606 3.908970
4 4 5.061847 3.980574
5 5 5.427366 4.303481
6 8 6.620034 4.674904
7 7 6.640237 4.743828
8 6 6.688415 4.900119
9 9 16.371862 8.625718
10 10 16.371862 8.625718
Fungsi Tangga
ftangga2 = function(dataset){
set.seed(1501211036)
cross_val = vfold_cv(dataset,v=10,strata = "mpg")
breaks = 2:8
best_tangga = map_dfr(breaks, function(i){
metric_tangga = map_dfr(cross_val$splits,
function(x){
training = dataset[x$in_id,]
training$horsepower = cut(training$horsepower,i)
mod = lm(mpg ~ horsepower,data=training)
labs_x = levels(mod$model[,2])
labs_x_breaks = cbind(lower = as.numeric( sub("\\((.+),.*",
"\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]",
"\\1", labs_x) ))
testing = dataset[-x$in_id,]
horsepower_new = cut(testing$horsepower,c(labs_x_breaks[1,1],
labs_x_breaks[,2]))
pred = predict(mod,
newdata=list(horsepower=horsepower_new))
truth = testing$mpg
data_eval = na.omit(data.frame(truth,pred))
rmse = mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred)
mae = mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred)
metric = c(rmse,mae)
names(metric) = c("rmse","mae")
return(metric)
}
)
colMeans(metric_tangga) # menghitung rata-rata untuk 10 folds
}
)
best_tangga = cbind(breaks=breaks,best_tangga) # menampilkan hasil all breaks
best_tangga = arrange(best_tangga, best_tangga$rmse)
return(best_tangga)
}
ftEU = ftangga2(Auto[Auto$origin==2,])
ftEU breaks rmse mae
1 5 5.055263 3.970937
2 7 5.092098 3.963517
3 8 5.236106 4.159720
4 6 5.246888 4.092438
5 2 5.374675 4.264126
6 4 5.380581 4.335569
7 3 5.439791 4.390227
Natural Cubic Spline
$basis
df rmse mae
1 3 5.001043 3.937553
2 3 5.001043 3.937553
$knot1
33.33333% 66.66667%
71 87
$knot2
33.33333% 66.66667%
71 87
Perbandingan
a = as.matrix(polyEU[1,])
b = as.matrix(ftEU[1,])
c = as.matrix(ncsEU$basis[1,])
perbandingan.model = rbind(a,b,c)
rownames(perbandingan.model) = c("Polinomial Derajat 2","Fungsi Tangga 5 Interval",
"Natural Cubic Spline 2 Knot")
as.data.frame(perbandingan.model[,-1]) rmse mae
Polinomial Derajat 2 4.844209 3.815299
Fungsi Tangga 5 Interval 5.055263 3.970937
Natural Cubic Spline 2 Knot 5.001043 3.937553
Visual
ggplot(Auto[Auto$origin==2,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Polinomial Derajat 2") +
theme_bw()
ggplot(Auto[Auto$origin==2,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "red",se = F)+
ggtitle("Fungsi Tangga 5 Interval") +
theme_bw()
ggplot(Auto[Auto$origin==2,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(71, 87 )),
lty = 1,col = "red",se=F)+
ggtitle("Natural Cubic Spline 2 Knot")+
theme_bw()Subset-data: Jepang
Regresi Polinomial
derajat rmse mae
1 3 3.955656 2.998039
2 6 3.987467 3.049899
3 4 4.069171 3.097866
4 1 4.247876 3.400810
5 2 4.352083 3.396524
6 5 4.598146 3.378687
7 7 5.009655 3.508123
8 8 13.618007 6.364834
9 9 22.733143 9.415607
10 10 22.733143 9.415607
Fungsi Tangga
ftangga3 = function(dataset){
set.seed(1501211036)
cross_val = vfold_cv(dataset,v=10,strata = "mpg")
breaks = 2:5
best_tangga = map_dfr(breaks, function(i){
metric_tangga = map_dfr(cross_val$splits,
function(x){
training = dataset[x$in_id,]
training$horsepower = cut(training$horsepower,i)
mod = lm(mpg ~ horsepower,data=training)
labs_x = levels(mod$model[,2])
labs_x_breaks = cbind(lower = as.numeric( sub("\\((.+),.*",
"\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]",
"\\1", labs_x) ))
testing = dataset[-x$in_id,]
horsepower_new = cut(testing$horsepower,c(labs_x_breaks[1,1],
labs_x_breaks[,2]))
pred = predict(mod,
newdata=list(horsepower=horsepower_new))
truth = testing$mpg
data_eval = na.omit(data.frame(truth,pred))
rmse = mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred)
mae = mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred)
metric = c(rmse,mae)
names(metric) = c("rmse","mae")
return(metric)
}
)
colMeans(metric_tangga) # menghitung rata-rata untuk 10 folds
}
)
best_tangga = cbind(breaks=breaks,best_tangga) # menampilkan hasil all breaks
best_tangga = arrange(best_tangga, best_tangga$rmse)
return(best_tangga)
}
ftJP = ftangga3(Auto[Auto$origin==3,])
ftJP breaks rmse mae
1 4 4.118326 3.267480
2 3 4.134643 3.314040
3 5 4.172699 3.365922
4 2 4.416488 3.322464
Natural Cubic Spline
$basis
df rmse mae
1 7 3.993262 3.010772
2 7 3.993262 3.010772
$knot1
14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429%
65.00000 67.00000 70.00000 75.00000 93.71429 97.00000
$knot2
14.28571% 28.57143% 42.85714% 57.14286% 71.42857% 85.71429%
65.00000 67.00000 70.00000 75.00000 93.71429 97.00000
Perbandingan
a = as.matrix(polyJP[1,])
b = as.matrix(ftJP[1,])
c = as.matrix(ncsJP$basis[1,])
perbandingan.model = rbind(a,b,c)
rownames(perbandingan.model) = c("Polinomial Derajat 3","Fungsi Tangga 4 Interval",
"Natural Cubic Spline 6 Knot")
as.data.frame(perbandingan.model[,-1]) rmse mae
Polinomial Derajat 3 3.955656 2.998039
Fungsi Tangga 4 Interval 4.118326 3.267480
Natural Cubic Spline 6 Knot 3.993262 3.010772
Visual
ggplot(Auto[Auto$origin==3,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "red",se = F)+
ggtitle("Polinomial Derajat 3") +
theme_bw()
ggplot(Auto[Auto$origin==3,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~cut(x,4),
lty = 1, col = "red",se = F)+
ggtitle("Fungsi Tangga 4 Interval") +
theme_bw()
ggplot(Auto[Auto$origin==3,],aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue")+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c( 65, 67, 70, 75,
93.7, 97 )),
lty = 1,col = "red",se=F)+
ggtitle("Natural Cubic Spline 6 Knot")+
theme_bw()