#install.packages("MultiKink")
library(MultiKink)
library(MatrixModels)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.10
## v tidyr 1.2.1 v stringr 1.4.1
## v readr 2.1.2 v forcats 0.5.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(purrr)
library(rsample)
library(mlr3measures)
## In order to avoid name clashes, do not attach 'mlr3measures'. Instead, only load the namespace with `requireNamespace("mlrmeasures")` and access the measures directly via `::`, e.g. `mlr3measures::auc()`.
library(splines)
library(cowplot)
library(DT)
set.seed(120)
peubah.x <- rnorm(1000,2,1)
error <- rnorm(1000)
y <- 2+1*peubah.x+4*peubah.x^2+error
plot(peubah.x,y)
lin.mod <-lm( y~peubah.x)
plot(peubah.x,y)
lines(peubah.x,lin.mod$fitted.values,col="red")
summary(lin.mod)
##
## Call:
## lm(formula = y ~ peubah.x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.656 -3.689 -2.091 1.478 59.779
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.1075 0.4217 -26.34 <2e-16 ***
## peubah.x 17.6424 0.1857 94.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6 on 998 degrees of freedom
## Multiple R-squared: 0.9004, Adjusted R-squared: 0.9003
## F-statistic: 9022 on 1 and 998 DF, p-value: < 2.2e-16
pol.mod <- lm( y~peubah.x+I(peubah.x^2)) #ordo 2
ix <- sort(peubah.x,index.return=T)$ix #ix=observasi keberapa
plot(peubah.x,y,xlim=c(-2,5), ylim=c(-10,70))
lines(peubah.x[ix], pol.mod$fitted.values[ix],col="blue", cex=2)
summary(pol.mod)
##
## Call:
## lm(formula = y ~ peubah.x + I(peubah.x^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.84314 -0.64438 -0.00555 0.63566 2.87286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.80844 0.09535 18.97 <2e-16 ***
## peubah.x 1.13236 0.09051 12.51 <2e-16 ***
## I(peubah.x^2) 3.98865 0.02063 193.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9678 on 997 degrees of freedom
## Multiple R-squared: 0.9974, Adjusted R-squared: 0.9974
## F-statistic: 1.921e+05 on 2 and 997 DF, p-value: < 2.2e-16
#regresi fungsi tangga
range(peubah.x) #nilai min dan max
## [1] -1.083235 6.089670
step.mod <- lm(y~ cut(peubah.x,5))
plot(peubah.x,y)
lines(peubah.x,lin.mod$fitted.values,col="red")
lines(peubah.x[ix], step.mod$fitted.values[ix],col="Blue")
summary(step.mod)
##
## Call:
## lm(formula = y ~ cut(peubah.x, 5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.669 -5.373 -0.731 4.152 42.954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.274 1.108 2.053 0.0403 *
## cut(peubah.x, 5)(0.351,1.79] 7.360 1.173 6.272 5.32e-10 ***
## cut(peubah.x, 5)(1.79,3.22] 26.045 1.160 22.443 < 2e-16 ***
## cut(peubah.x, 5)(3.22,4.66] 58.630 1.293 45.345 < 2e-16 ***
## cut(peubah.x, 5)(4.66,6.1] 110.880 3.876 28.604 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.43 on 995 degrees of freedom
## Multiple R-squared: 0.8477, Adjusted R-squared: 0.8471
## F-statistic: 1385 on 4 and 995 DF, p-value: < 2.2e-16
MSE=function(pred,actual){
mean((pred-actual)^2)
}
model<- data.frame("Model" = c("Linear", "Polynomial (2)", "Step (6)"), "AIC" = c(AIC(lin.mod), AIC(pol.mod), AIC(step.mod)), "MSE" = c(MSE(predict(lin.mod), y), MSE(predict(pol.mod), y), MSE(predict(step.mod), y)))
knitr::kable(model)
| Model | AIC | MSE |
|---|---|---|
| Linear | 6425.359 | 35.9267479 |
| Polynomial (2) | 2777.312 | 0.9337327 |
| Step (6) | 6855.818 | 54.9234939 |
Model terbaik bisa dilihat dari nilai AIC dan MSE yang paling kecil. Berdasarkan ketiga hasil pemodelan pada Materi Nonlinier Regression I didapatkan bahwa nilai AIC dan MSE-nya yang paling kecil terdapat pada model polynomial dengan derajat 2, dimana nilainya adalah (2777.312 dan 0.9337327). Dapat diketahui juga bahwa model tersebut tidak linear.
# Membagi data menjadi 2 bagian berdasarkan mean (mean = 3)
data <- cbind(peubah.x, y)
data1 <- data[peubah.x <= 3,]
data2 <- data[peubah.x > 3,]
# Plotting
plot(peubah.x, y, main="Piecewise Cubic Polynomial\nData Bangkitan")
abline(v=3, col="red", lty=3)
cub.mod.data1 <- lm(data1[,2] ~ data1[,1]+I(data1[,1]^2)+I(data1[,1]^3))
ix.data1 <- sort(data1[,1], index.return=T)$ix
lines(data1[ix.data1,1],cub.mod.data1$fitted.values[ix.data1],col="blue", lwd=2)
cub.mod.data2 <- lm(data2[,2] ~ data2[,1]+I(data2[,1]^2)+I(data2[,1]^3))
ix.data2 <- sort(data2[,1], index.return=T)$ix
lines(data2[ix.data2,1],cub.mod.data2$fitted.values[ix.data2],col="blue", lwd=2)
#1. Menentukan banyaknya fungsi basis dan knots
data <- as.data.frame(data)
knots.val <- attr(bs(data$peubah.x, df=6), "knots")
knots.val
## 25% 50% 75%
## 1.320316 2.036762 2.667127
#2. Pemodelan regresi spline, df=2
spline <- lm(y ~ bs(peubah.x, knots=knots.val), data=data)
summary(spline)
##
## Call:
## lm(formula = y ~ bs(peubah.x, knots = knots.val), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.84657 -0.65390 -0.00086 0.64618 2.89459
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4530 0.6168 8.841 < 2e-16 ***
## bs(peubah.x, knots = knots.val)1 -6.2864 0.9744 -6.452 1.73e-10 ***
## bs(peubah.x, knots = knots.val)2 -3.9714 0.5611 -7.077 2.78e-12 ***
## bs(peubah.x, knots = knots.val)3 14.0370 0.6469 21.700 < 2e-16 ***
## bs(peubah.x, knots = knots.val)4 45.9965 0.6324 72.732 < 2e-16 ***
## bs(peubah.x, knots = knots.val)5 94.0344 0.8746 107.522 < 2e-16 ***
## bs(peubah.x, knots = knots.val)6 151.0685 1.0978 137.613 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9691 on 993 degrees of freedom
## Multiple R-squared: 0.9974, Adjusted R-squared: 0.9974
## F-statistic: 6.385e+04 on 6 and 993 DF, p-value: < 2.2e-16
ggplot(data, aes(x=peubah.x, y=y)) + geom_point(color="Blue") + stat_smooth(method="lm", formula=y~bs(peubah.x, knots=knots), lty=1, se=F)
## Warning: Computation failed in `stat_smooth()`:
## 'x' must be atomic
## Smooting Spline
mod<- smooth.spline(peubah.x, y, all.knots=T)
mod1 <- with(data=data, smooth.spline(peubah.x, y))
mod1
## Call:
## smooth.spline(x = peubah.x, y = y)
##
## Smoothing Parameter spar= 0.9182199 lambda= 0.0004650269 (16 iterations)
## Equivalent Degrees of Freedom (Df): 12.31604
## Penalized Criterion (RSS): 924.07
## GCV: 0.9497684
plot(peubah.x, y, pch=19, main="Smoothing Spline Data Bangkitan")
lines(mod, col="orange", lwd=1.5)
## LOESS Function
loess <- loess(formula=y~peubah.x, data=data)
summary(loess)
## Call:
## loess(formula = y ~ peubah.x, data = data)
##
## Number of Observations: 1000
## Equivalent Number of Parameters: 5.4
## Residual Standard Error: 0.9682
## Trace of smoother matrix: 5.91 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE
loess1 <- data.frame(span=seq(0.1,2,by=0.1)) %>%
group_by(span) %>%
do(broom::augment(loess(y~peubah.x, data=data,span=.$span)))
ggplot(loess1, aes(x=peubah.x, y=y)) + geom_line(aes(y=.fitted), col="blue", lty=1) + facet_wrap(~span)
## Perbandingan Seluruh Model
ggplot(data, aes(x=peubah.x, y=y)) + geom_point(color="black") + stat_smooth(method="lm", formula=y~x, col="blue", show.legend=T) + stat_smooth(method="lm", formula=y~poly(x,2,raw=T), col="red", lwd=3.5, show.legend=T) + stat_smooth(method="lm", formula=y~cut(x,6), col="darkgreen", show.legend=T) + stat_smooth(method="lm", formula=y~bs(x, knots=knots), col="green", lwd=2, show.legend=T) + stat_smooth(method="loess", formula=y~x, col="orange", show.legend=T)
## Warning: Computation failed in `stat_smooth()`:
## 'x' must be atomic
MSE=function(pred,actual){
mean((pred-actual)^2)
}
model<- data.frame("Model" = c("Linear", "Polynomial (2)", "Step (6)"), "AIC" = c(AIC(lin.mod), AIC(pol.mod), AIC(step.mod)), "MSE" = c(MSE(predict(lin.mod), y), MSE(predict(pol.mod), y), MSE(predict(step.mod), y)))
knitr::kable(model)
| Model | AIC | MSE |
|---|---|---|
| Linear | 6425.359 | 35.9267479 |
| Polynomial (2) | 2777.312 | 0.9337327 |
| Step (6) | 6855.818 | 54.9234939 |
Pemodelan terbaik yaitu Polynomial (2) berdasarkan nilai MSE dan AIC terkecil
MSE=function(pred,actual){
mean((pred-actual)^2)
}
model.data2 <- data.frame(Model = c("Linear", "Polynomial (2)", "Step-Function (6)", "Spline Regression", "LOESS Function"), MSE = c(MSE(predict(lin.mod), y), MSE(predict(pol.mod), y), MSE(predict(step.mod), y), MSE(predict(spline), y), MSE(predict(loess), y)))
knitr::kable(model.data2)
| Model | MSE |
|---|---|
| Linear | 35.9267479 |
| Polynomial (2) | 0.9337327 |
| Step-Function (6) | 54.9234939 |
| Spline Regression | 0.9325520 |
| LOESS Function | 0.9314191 |
Pemodelan terbaik yaitu LOESS Function berdasarkan nilai MSE terkecil
library(ISLR)
AutoData = Auto %>% select(mpg, horsepower, origin)
data_Auto <- tibble(AutoData)
head(data_Auto)
## # A tibble: 6 x 3
## mpg horsepower origin
## <dbl> <dbl> <dbl>
## 1 18 130 1
## 2 15 165 1
## 3 18 150 1
## 4 16 150 1
## 5 17 140 1
## 6 15 198 1
mpg <- data_Auto$mpg
horsepower <- data_Auto$horsepower
origin <- data_Auto$origin
Jika kita gambarkan dalam bentuk scatterplot
ggplot(data_Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
theme_bw()
X= horsepower Y= mpg (mile per gallon).
Berdasarkan scatter plot tersebut dapat di katakan bahwa kendaraan dengan horsepower yang besar akan membutuhkan bahan bakar dalam jumlah yang besar pula. Semakin rendah horsepower maka akan semakin tinggi jarak tempuh kendaraan untuk setiap galon bahan bakarnya.
Selanjutnya adalah mencari model yang bisa merepresentasikan pola hubungan tersebut dengan baik.
ggplot(data_Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~x,lty = 1,
col = "black",se = F)+
theme_bw()
set.seed(123)
cross_val <- vfold_cv(data_Auto,v=10,strata = "mpg")
metric_linear <- map_dfr(cross_val$splits,
function(x){
mod <- lm(mpg ~ horsepower,data=data_Auto[x$in_id,])
pred <- predict(mod,newdata=data_Auto[-x$in_id,])
truth <- data_Auto[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_linear
## # A tibble: 10 x 2
## rmse mae
## <dbl> <dbl>
## 1 4.52 3.73
## 2 4.86 3.80
## 3 4.54 3.35
## 4 4.67 3.99
## 5 5.12 4.12
## 6 4.39 3.25
## 7 5.93 4.65
## 8 5.05 3.95
## 9 4.71 3.75
## 10 5.10 3.73
# menghitung rata-rata untuk 10 folds
mean_metric_lin <- colMeans(metric_linear)
mean_metric_lin
## rmse mae
## 4.889086 3.832328
set.seed(123)
cross_val <- vfold_cv(data_Auto,v=10,strata = "mpg")
df <- 1:10
best_polinomial <- map_dfr(df,function(i){
metric_polinomial <- map_dfr(cross_val$splits,function(x){
mod <- lm(mpg ~ poly(horsepower,df=i),data=data_Auto[x$in_id,])
#Mempresdiksi dengan data testing
pred <- predict(mod,newdata=data_Auto[-x$in_id,])
truth <- data_Auto[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_polinomial
#Rata-rata 10 folds
mean_Polinomial<-colMeans(metric_polinomial)
mean_Polinomial
})
best_polinomial<-cbind(df=df,best_polinomial)
best_polinomial
## df rmse mae
## 1 1 4.889086 3.832328
## 2 2 4.351129 3.266460
## 3 3 4.355430 3.266945
## 4 4 4.367009 3.280452
## 5 5 4.318767 3.249003
## 6 6 4.317431 3.257272
## 7 7 4.293242 3.233376
## 8 8 4.309511 3.251758
## 9 9 4.323321 3.244919
## 10 10 4.371903 3.274887
Dari output diatas diketahui rmse dan mae terkecil terdapat pada point nomor 7 yaitu 4.293242 dan 3.233376
ggplot(data_Auto,aes(x=horsepower,y=mpg)) +
geom_point(alpha=0.55,color="blue") +
stat_smooth(method = "lm",
formula = y~poly(x,7,raw=T),
lty = 1,col="black",se=F) +
ggtitle("Regresi Polinomial Ordo = 7") +
ylab("mpg") +
xlab("horsepower") +
theme(plot.title = element_text(hjust = 0.5))
Plot di atas merupakan kurva regresi polinomial dengan ordo = 7. diketahui plot tersebut memiliki hubungan negatif yang dapat diartikan, semakin tinggi horsepower maka akan semakin rendah jarak tempuh kendaraan tersebut untuk setiap galon bahan bakarnya.
set.seed(123)
cross_val <- vfold_cv(data_Auto,v=10,strata = "mpg")
breaks <- 3:20
best_tangga <- map_dfr(breaks, function(i){
metric_tangga <- map_dfr(cross_val$splits,
function(x){
training <- data_Auto[x$in_id,]
training$horsepower <- cut(training$horsepower,i)
mod <- lm(mpg ~ horsepower,data=training)
labs_x <- levels(mod$model[,2])
labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
testing <- data_Auto[-x$in_id,]
horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
pred <- predict(mod,newdata=list(horsepower=horsepower_new))
truth <- testing$mpg
data_eval <- na.omit(data.frame(truth,pred))
rmse <- mlr3measures::rmse(truth = data_eval$truth,response = data_eval$pred)
mae <- mlr3measures::mae(truth = data_eval$truth,response = data_eval$pred)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
metric_tangga
# menghitung rata-rata untuk 10 folds
mean_metric_tangga <- colMeans(metric_tangga)
mean_metric_tangga
}
)
best_tangga <- cbind(breaks=breaks,best_tangga)
# menampilkan hasil all breaks
best_tangga
## breaks rmse mae
## 1 3 5.775792 4.521829
## 2 4 4.985270 3.774549
## 3 5 4.712623 3.571614
## 4 6 4.644383 3.548375
## 5 7 4.554116 3.379980
## 6 8 4.437597 3.405433
## 7 9 4.549668 3.471999
## 8 10 4.589172 3.455531
## 9 11 4.531039 3.380931
## 10 12 4.442697 3.321151
## 11 13 4.527126 3.375170
## 12 14 4.428824 3.270424
## 13 15 4.313162 3.286289
## 14 16 4.336661 3.302031
## 15 17 4.414343 3.347532
## 16 18 4.340715 3.221099
## 17 19 4.331798 3.224404
## 18 20 4.480710 3.320605
#berdasarkan rmse
best_tangga %>% slice_min(rmse)
## breaks rmse mae
## 1 15 4.313162 3.286289
#berdasarkan mae
best_tangga %>% slice_min(mae)
## breaks rmse mae
## 1 18 4.340715 3.221099
Model fungsi tangga terbaik yaitu nilai rmse dan mae terkecil. diperoleh, RMSE terkecil terletak pada point 15 dan MAE paling kecil terletak pada point 18
ggplot(data_Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",formula = y~cut(x,15),
lty = 1, col = "black",se = F)+
ggtitle("Fungsi Tangga breaks=15") +
ylab("mpg") +
xlab("horsepower") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(data_Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",formula = y~cut(x,18),
lty = 1, col = "black",se = F)+
ggtitle("Fungsi Tangga breaks=18") +
ylab("mpg") +
xlab("horsepower") +
theme(plot.title = element_text(hjust = 0.5))
Plot diatas menunjukkan kurva fungsi tangga yang berhenti pada break ke-15 dan breaks = 18. Berdasarakan plot tersebut, terlihat bahwa plot fungsi tangga dengan breaks 15 atau knots = 14 cenderung lebih mulus dibandingkan dnegan breaks 18 atau knots = 17. Sehingga fungsi tangga yang dipilih adalah plot fungsi tangga dengan 15 interval dengan 14 knots.
set.seed(123)
cross_val <- vfold_cv(data_Auto,v=10,strata = "mpg")
df <- 1:10
best_nspline <- map_dfr(df, function(i){
nspline <- map_dfr(cross_val$splits,function(x){
mod <- lm(mpg ~ horsepower,df=i,data=data_Auto[x$in_id,])
pred <- predict(mod,newdata=data_Auto[-x$in_id,])
truth <- data_Auto[-x$in_id,]$mpg
rmse <- mlr3measures::rmse(truth = truth,response = pred)
mae <- mlr3measures::mae(truth = truth,response = pred )
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
}
)
nspline
# menghitung rata-rata untuk 10 folds
mean_nspline <- colMeans(nspline)
mean_nspline
}
)
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'df' will be disregarded
best_nspline <- cbind(df=df,best_nspline)
best_nspline
## df rmse mae
## 1 1 4.889086 3.832328
## 2 2 4.889086 3.832328
## 3 3 4.889086 3.832328
## 4 4 4.889086 3.832328
## 5 5 4.889086 3.832328
## 6 6 4.889086 3.832328
## 7 7 4.889086 3.832328
## 8 8 4.889086 3.832328
## 9 9 4.889086 3.832328
## 10 10 4.889086 3.832328
#berdasarkan rmse
best_nspline %>% slice_min(rmse)
## df rmse mae
## 1 1 4.889086 3.832328
## 2 2 4.889086 3.832328
## 3 3 4.889086 3.832328
## 4 4 4.889086 3.832328
## 5 5 4.889086 3.832328
## 6 6 4.889086 3.832328
## 7 7 4.889086 3.832328
## 8 8 4.889086 3.832328
## 9 9 4.889086 3.832328
## 10 10 4.889086 3.832328
#berdasarkan mae
best_nspline %>% slice_min(mae)
## df rmse mae
## 1 1 4.889086 3.832328
## 2 2 4.889086 3.832328
## 3 3 4.889086 3.832328
## 4 4 4.889086 3.832328
## 5 5 4.889086 3.832328
## 6 6 4.889086 3.832328
## 7 7 4.889086 3.832328
## 8 8 4.889086 3.832328
## 9 9 4.889086 3.832328
## 10 10 4.889086 3.832328
#Menentukan nilai knots dengan komputer
knots <- attr(ns(data_Auto$horsepower,df=10),"knots")
ggplot(data_Auto,aes(x=horsepower, y=mpg)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = knots),
lty = 1,se = F)+
ggtitle("Regresi Natural Spline df=10")+
ylab("mpg")+
xlab("horsepower")+
theme(plot.title = element_text(hjust = 0.5))
set.seed(123)
cross_val <- vfold_cv(data_Auto,v=10,strata = "mpg")
span <- seq(0.1,1,length.out=50)
best_loess <- map_dfr(span, function(i){
metric_loess <- map_dfr(cross_val$splits,
function(x){
mod <- loess(mpg ~ horsepower,span = i,
data=data_Auto[x$in_id,])
pred <- predict(mod,
newdata=data_Auto[-x$in_id,])
truth <- data_Auto[-x$in_id,]$mpg
data_eval <- na.omit(data.frame(pred=pred,
truth=truth))
rmse <- mlr3measures::rmse(truth = data_eval$truth,
response = data_eval$pred
)
mae <- mlr3measures::mae(truth = data_eval$truth,
response = data_eval$pred
)
metric <- c(rmse,mae)
names(metric) <- c("rmse","mae")
return(metric)
})
head(metric_loess,20)
# menghitung rata-rata untuk 10 folds
mean_metric_loess <- colMeans(metric_loess)
mean_metric_loess
})
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 88
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.4554e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 89
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 88
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.099e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4
best_loess <- cbind(span=span,best_loess)
# menampilkan hasil all breaks
best_loess
## span rmse mae
## 1 0.1000000 4.279961 3.212481
## 2 0.1183673 4.279930 3.199382
## 3 0.1367347 4.282181 3.206301
## 4 0.1551020 4.271935 3.206052
## 5 0.1734694 4.280129 3.216263
## 6 0.1918367 4.291612 3.230315
## 7 0.2102041 4.304326 3.241927
## 8 0.2285714 4.306384 3.242725
## 9 0.2469388 4.299509 3.226658
## 10 0.2653061 4.294184 3.215434
## 11 0.2836735 4.295722 3.222993
## 12 0.3020408 4.289119 3.217525
## 13 0.3204082 4.292602 3.222667
## 14 0.3387755 4.284627 3.213171
## 15 0.3571429 4.284646 3.210649
## 16 0.3755102 4.286424 3.214093
## 17 0.3938776 4.286272 3.214380
## 18 0.4122449 4.290260 3.218785
## 19 0.4306122 4.295797 3.225056
## 20 0.4489796 4.291814 3.224207
## 21 0.4673469 4.293737 3.225118
## 22 0.4857143 4.298144 3.228398
## 23 0.5040816 4.301433 3.232241
## 24 0.5224490 4.302167 3.232329
## 25 0.5408163 4.303161 3.232579
## 26 0.5591837 4.304828 3.234132
## 27 0.5775510 4.307357 3.236528
## 28 0.5959184 4.308655 3.237146
## 29 0.6142857 4.307372 3.236599
## 30 0.6326531 4.306337 3.235772
## 31 0.6510204 4.305561 3.234744
## 32 0.6693878 4.306828 3.235237
## 33 0.6877551 4.307049 3.236061
## 34 0.7061224 4.308953 3.237984
## 35 0.7244898 4.310339 3.239400
## 36 0.7428571 4.318304 3.244223
## 37 0.7612245 4.324715 3.247615
## 38 0.7795918 4.330090 3.250112
## 39 0.7979592 4.331711 3.249172
## 40 0.8163265 4.335098 3.249201
## 41 0.8346939 4.338779 3.250354
## 42 0.8530612 4.338608 3.249875
## 43 0.8714286 4.339232 3.249140
## 44 0.8897959 4.340280 3.249661
## 45 0.9081633 4.345057 3.253641
## 46 0.9265306 4.349224 3.257563
## 47 0.9448980 4.352061 3.259012
## 48 0.9632653 4.354034 3.260748
## 49 0.9816327 4.355639 3.260118
## 50 1.0000000 4.355334 3.260488
#berdasarkan rmse
best_loess %>% slice_min(rmse)
## span rmse mae
## 1 0.155102 4.271935 3.206052
#berdasarkan mae
best_loess %>% slice_min(mae)
## span rmse mae
## 1 0.1183673 4.27993 3.199382
ggplot(data_Auto, aes(horsepower,mpg)) +
geom_point(alpha=0.5,color="black") +
stat_smooth(method='loess',
formula=y~x,
span = 0.4306122,
col="blue",
lty=1,
se=F)+
ggtitle("Local Regression")
nilai_metric <- rbind(best_polinomial %>% select(-1) %>% slice_min(rmse),
best_tangga %>% select(-1) %>% slice_min(rmse),
best_nspline %>% select(-1)%>% slice_min(rmse))
nama_model <- c("Polinomial (df=7)",
"Fungsi Tangga (breaks=15)",
"Natural Spline (df=10)")
gabungan_model <- data.frame(nama_model,nilai_metric)
gabungan_model
## nama_model rmse mae
## 1 Polinomial (df=7) 4.293242 3.233376
## 2 Fungsi Tangga (breaks=15) 4.313162 3.286289
## 3 Natural Spline (df=10) 4.889086 3.832328
## 4 Polinomial (df=7) 4.889086 3.832328
## 5 Fungsi Tangga (breaks=15) 4.889086 3.832328
## 6 Natural Spline (df=10) 4.889086 3.832328
## 7 Polinomial (df=7) 4.889086 3.832328
## 8 Fungsi Tangga (breaks=15) 4.889086 3.832328
## 9 Natural Spline (df=10) 4.889086 3.832328
## 10 Polinomial (df=7) 4.889086 3.832328
## 11 Fungsi Tangga (breaks=15) 4.889086 3.832328
## 12 Natural Spline (df=10) 4.889086 3.832328
ggplot(data_Auto,aes(x=horsepower, y=mpg))+
geom_point(alpha=0.05, color="blue")+
stat_smooth(method="lm",formula=y~poly(x,7,raw=T),
lty=1, aes(col="Regresi Polinomial"),col="red",se=F)+
stat_smooth(method="lm", formula=y~cut(x,15),
lty=1, aes(col="Fungsi Tangga"),col="black",se=F)+
stat_smooth(method="lm", formula=y~ns(x,df=10),
lty=1, aes(col="Regresi Natural Spline"),col="orange",se=F)+
scale_color_manual(values = c("Regresi Polynomial"="red","Fungsi Tangga"="darkblue","Regresi Natural Spline"="yellow"))+
ggtitle("Perbandingan Model")
Model yang terbaik adalah regresi natural spline bisa dilihihat dari rmse dan mae terkecil. Diperoleh rmse dan mae terkecil terdapat pada df = 10.
Model regresi nya adalah natural spline dengan df = 10.