Package
Silahkan install jika belum ada
install.packages("tidyverse")
install.packages("MultiKink")
install.packages("splines")library(tidyverse)
library(splines)Data
data("triceps",package="MultiKink")ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
theme_bw() Regresi linear
mod_linear = lm(triceps~age,data=triceps)
summary(mod_linear)##
## Call:
## lm(formula = triceps ~ age, data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9512 -2.3965 -0.5154 1.5822 25.1233
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.19717 0.21244 29.17 <2e-16 ***
## age 0.21584 0.01014 21.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.007 on 890 degrees of freedom
## Multiple R-squared: 0.3372, Adjusted R-squared: 0.3365
## F-statistic: 452.8 on 1 and 890 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~x,lty = 1, col = "blue",se = F)+
theme_bw()Regresi polinomial
mod_polinomial2 = lm(triceps ~ poly(age,2,raw = T),data=triceps)summary(mod_polinomial2)##
## Call:
## lm(formula = triceps ~ poly(age, 2, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5677 -2.4401 -0.4587 1.6368 24.9961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0229191 0.3063806 19.658 < 2e-16 ***
## poly(age, 2, raw = T)1 0.2434733 0.0364403 6.681 4.17e-11 ***
## poly(age, 2, raw = T)2 -0.0006257 0.0007926 -0.789 0.43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.008 on 889 degrees of freedom
## Multiple R-squared: 0.3377, Adjusted R-squared: 0.3362
## F-statistic: 226.6 on 2 and 889 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()mod_polinomial3 = lm(triceps ~ poly(age,3,raw = T),data=triceps)
summary(mod_polinomial3)##
## Call:
## lm(formula = triceps ~ poly(age, 3, raw = T), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5832 -1.9284 -0.5415 1.3283 24.4440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.004e+00 3.831e-01 20.893 < 2e-16 ***
## poly(age, 3, raw = T)1 -3.157e-01 7.721e-02 -4.089 4.73e-05 ***
## poly(age, 3, raw = T)2 3.101e-02 3.964e-03 7.824 1.45e-14 ***
## poly(age, 3, raw = T)3 -4.566e-04 5.612e-05 -8.135 1.38e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.868 on 888 degrees of freedom
## Multiple R-squared: 0.3836, Adjusted R-squared: 0.3815
## F-statistic: 184.2 on 3 and 888 DF, p-value: < 2.2e-16
Regresi Fungsi Tangga
mod_tangga = lm(triceps ~ cut(age,5),data=triceps)
summary(mod_tangga)##
## Call:
## lm(formula = triceps ~ cut(age, 5), data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5474 -2.0318 -0.4465 1.3682 23.3759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.2318 0.1994 36.260 < 2e-16 ***
## cut(age, 5)(10.6,20.9] 1.6294 0.3244 5.023 6.16e-07 ***
## cut(age, 5)(20.9,31.2] 5.9923 0.4222 14.192 < 2e-16 ***
## cut(age, 5)(31.2,41.5] 7.5156 0.4506 16.678 < 2e-16 ***
## cut(age, 5)(41.5,51.8] 7.4561 0.5543 13.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.939 on 887 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.3588
## F-statistic: 125.7 on 4 and 887 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()Regresi Spline
mod_spline3 = lm(triceps ~ bs(age, knots = c(5, 10, 20, 30, 40)),data=triceps)
summary(mod_spline3)##
## Call:
## lm(formula = triceps ~ bs(age, knots = c(5, 10, 20, 30, 40)),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5234 -1.6912 -0.2917 1.1356 23.0922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.9598 0.9729 7.154 1.77e-12 ***
## bs(age, knots = c(5, 10, 20, 30, 40))1 2.5367 1.7154 1.479 0.1396
## bs(age, knots = c(5, 10, 20, 30, 40))2 -0.3032 0.9629 -0.315 0.7529
## bs(age, knots = c(5, 10, 20, 30, 40))3 -1.9092 1.2993 -1.469 0.1421
## bs(age, knots = c(5, 10, 20, 30, 40))4 7.4056 1.2179 6.081 1.78e-09 ***
## bs(age, knots = c(5, 10, 20, 30, 40))5 6.1050 1.4043 4.347 1.54e-05 ***
## bs(age, knots = c(5, 10, 20, 30, 40))6 10.1770 1.5427 6.597 7.23e-11 ***
## bs(age, knots = c(5, 10, 20, 30, 40))7 3.9428 1.9082 2.066 0.0391 *
## bs(age, knots = c(5, 10, 20, 30, 40))8 10.1473 1.7545 5.784 1.01e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.743 on 883 degrees of freedom
## Multiple R-squared: 0.4261, Adjusted R-squared: 0.4209
## F-statistic: 81.94 on 8 and 883 DF, p-value: < 2.2e-16
mod_spline3ns = lm(triceps ~ ns(age, knots = c(5, 10, 20, 30, 40)),data=triceps)
summary(mod_spline3ns)##
## Call:
## lm(formula = triceps ~ ns(age, knots = c(5, 10, 20, 30, 40)),
## data = triceps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4875 -1.6873 -0.3665 1.1146 22.8643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.3811 0.5219 16.059 < 2e-16 ***
## ns(age, knots = c(5, 10, 20, 30, 40))1 -3.5592 0.6712 -5.303 1.44e-07 ***
## ns(age, knots = c(5, 10, 20, 30, 40))2 5.7803 1.0379 5.569 3.39e-08 ***
## ns(age, knots = c(5, 10, 20, 30, 40))3 5.5118 0.9416 5.853 6.78e-09 ***
## ns(age, knots = c(5, 10, 20, 30, 40))4 6.9070 0.9050 7.632 5.99e-14 ***
## ns(age, knots = c(5, 10, 20, 30, 40))5 5.4136 1.3783 3.928 9.24e-05 ***
## ns(age, knots = c(5, 10, 20, 30, 40))6 6.6460 1.0829 6.137 1.27e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.759 on 885 degrees of freedom
## Multiple R-squared: 0.4199, Adjusted R-squared: 0.416
## F-statistic: 106.8 on 6 and 885 DF, p-value: < 2.2e-16
ggplot(triceps,aes(x=age, y=triceps)) +
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(5, 10, 20, 30, 40)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(5, 10, 20, 30, 40)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw() ## Komparasi model
membuat fungsi MSE
MSE = function(pred,actual){
mean((pred-actual)^2)
}Membandingkan model
data.frame(Linear=MSE(predict(mod_linear),triceps$triceps),
Poly2=MSE(predict(mod_polinomial2),triceps$triceps),
Poly3=MSE(predict(mod_polinomial3),triceps$triceps),
Tangga=MSE(predict(mod_tangga),triceps$triceps),
Spline=MSE(predict(mod_spline3),triceps$triceps),
NSpline=MSE(predict(mod_spline3ns),triceps$triceps)
)## Linear Poly2 Poly3 Tangga Spline NSpline
## 1 16.01758 16.00636 14.89621 15.42602 13.87026 14.01904