library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(splines)
library(ISLR)
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
summary(Auto)
## mpg cylinders displacement horsepower weight
## Min. : 9.00 Min. :3.000 Min. : 68.0 Min. : 46.0 Min. :1613
## 1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0 1st Qu.: 75.0 1st Qu.:2225
## Median :22.75 Median :4.000 Median :151.0 Median : 93.5 Median :2804
## Mean :23.45 Mean :5.472 Mean :194.4 Mean :104.5 Mean :2978
## 3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8 3rd Qu.:126.0 3rd Qu.:3615
## Max. :46.60 Max. :8.000 Max. :455.0 Max. :230.0 Max. :5140
##
## acceleration year origin name
## Min. : 8.00 Min. :70.00 Min. :1.000 amc matador : 5
## 1st Qu.:13.78 1st Qu.:73.00 1st Qu.:1.000 ford pinto : 5
## Median :15.50 Median :76.00 Median :1.000 toyota corolla : 5
## Mean :15.54 Mean :75.98 Mean :1.577 amc gremlin : 4
## 3rd Qu.:17.02 3rd Qu.:79.00 3rd Qu.:2.000 amc hornet : 4
## Max. :24.80 Max. :82.00 Max. :3.000 chevrolet chevette: 4
## (Other) :365
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower")+
geom_point(alpha=0.6, color="blue") +
theme_bw()
mod_linear = lm(mpg~horsepower,data=Auto)
summary(mod_linear)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## horsepower -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower Regresi Linear")+
geom_point(alpha=0.55, color="blue") +
stat_smooth(method = "lm",
formula = y~x,lty = 1, col = "red",se = F)+
theme_bw()
mod_polinomial2 = lm(mpg ~ poly(horsepower,2,raw = T),data=Auto) #Polinomial berderajat 2
summary(mod_polinomial2)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7135 -2.5943 -0.0859 2.2868 15.8961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.9000997 1.8004268 31.60 <2e-16 ***
## poly(horsepower, 2, raw = T)1 -0.4661896 0.0311246 -14.98 <2e-16 ***
## poly(horsepower, 2, raw = T)2 0.0012305 0.0001221 10.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared: 0.6876, Adjusted R-squared: 0.686
## F-statistic: 428 on 2 and 389 DF, p-value: < 2.2e-16
pplot.d2<-ggplot(Auto,aes(x=horsepower, y=mpg)) + #plot polinomial derajat 2
ggtitle("Plot mpg vs. horsepower Polinomial Derajat 2")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d2
mod_polinomial3 = lm(mpg ~ poly(horsepower,3,raw = T),data=Auto) #polinomial berderajat 3
summary(mod_polinomial3)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7039 -2.4491 -0.1519 2.2035 15.8159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.068e+01 4.563e+00 13.298 < 2e-16 ***
## poly(horsepower, 3, raw = T)1 -5.689e-01 1.179e-01 -4.824 2.03e-06 ***
## poly(horsepower, 3, raw = T)2 2.079e-03 9.479e-04 2.193 0.0289 *
## poly(horsepower, 3, raw = T)3 -2.147e-06 2.378e-06 -0.903 0.3673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.375 on 388 degrees of freedom
## Multiple R-squared: 0.6882, Adjusted R-squared: 0.6858
## F-statistic: 285.5 on 3 and 388 DF, p-value: < 2.2e-16
pplot.d3<-ggplot(Auto,aes(x=horsepower, y=mpg)) + #plot polinomial derajat 3
ggtitle("Plot mpg vs. horsepower Polinomial Derajat 3
")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d3
mod_polinomial4 = lm(mpg ~ poly(horsepower,4,raw = T),data=Auto) #polinomial berderajat 4
summary(mod_polinomial4)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8820 -2.5802 -0.1682 2.2100 16.1434
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.757e+01 1.196e+01 3.977 8.32e-05 ***
## poly(horsepower, 4, raw = T)1 -7.667e-02 4.313e-01 -0.178 0.859
## poly(horsepower, 4, raw = T)2 -4.345e-03 5.497e-03 -0.790 0.430
## poly(horsepower, 4, raw = T)3 3.245e-05 2.926e-05 1.109 0.268
## poly(horsepower, 4, raw = T)4 -6.530e-08 5.504e-08 -1.186 0.236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.373 on 387 degrees of freedom
## Multiple R-squared: 0.6893, Adjusted R-squared: 0.6861
## F-statistic: 214.7 on 4 and 387 DF, p-value: < 2.2e-16
pplot.d4<-ggplot(Auto,aes(x=horsepower, y=mpg)) + #plot polinomial derajat 4
ggtitle("Plot mpg vs. horsepower Polinomial Derajat 4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,4,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d4
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(Linear=MSE(predict(mod_linear),Auto$mpg),
Poly2=MSE(predict(mod_polinomial2),Auto$mpg),
Poly3=MSE(predict(mod_polinomial3),Auto$mpg),
Poly4=MSE(predict(mod_polinomial4),Auto$mpg)
)
## Linear Poly2 Poly3 Poly4
## 1 23.94366 18.98477 18.94499 18.87633
mod_tangga4 = lm(mpg ~ cut(horsepower,4),data=Auto)
summary(mod_tangga4)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.0533 -2.7826 -0.2927 2.8593 17.5467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.0533 0.3581 81.12 <2e-16 ***
## cut(horsepower, 4)(92,138] -8.4506 0.5946 -14.21 <2e-16 ***
## cut(horsepower, 4)(138,184] -14.2707 0.7005 -20.37 <2e-16 ***
## cut(horsepower, 4)(184,230] -16.2004 1.2647 -12.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.001 on 388 degrees of freedom
## Multiple R-squared: 0.5926, Adjusted R-squared: 0.5894
## F-statistic: 188.1 on 3 and 388 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=4")+
geom_point(alpha=0.55, color="azure4") +
stat_smooth(method = "lm",
formula = y~cut(x,4),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga5 = lm(mpg ~ cut(horsepower,5),data=Auto)
summary(mod_tangga5)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.3033 -3.0220 -0.5413 2.4074 16.6394
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.3033 0.4272 73.27 <2e-16 ***
## cut(horsepower, 5)(82.8,120] -8.2813 0.5642 -14.68 <2e-16 ***
## cut(horsepower, 5)(120,156] -15.2427 0.7210 -21.14 <2e-16 ***
## cut(horsepower, 5)(156,193] -17.5922 1.0036 -17.53 <2e-16 ***
## cut(horsepower, 5)(193,230] -18.5340 1.3767 -13.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.719 on 387 degrees of freedom
## Multiple R-squared: 0.6382, Adjusted R-squared: 0.6345
## F-statistic: 170.7 on 4 and 387 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=5")+
geom_point(alpha=0.55, color="azure4") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga6= lm(mpg ~ cut(horsepower,6),data=Auto)
summary(mod_tangga6)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.0252 -2.9344 -0.1298 2.5172 14.5748
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.0252 0.4604 69.56 <2e-16 ***
## cut(horsepower, 6)(76.7,107] -8.0908 0.5947 -13.60 <2e-16 ***
## cut(horsepower, 6)(107,138] -12.2742 0.8109 -15.14 <2e-16 ***
## cut(horsepower, 6)(138,169] -16.9697 0.7850 -21.62 <2e-16 ***
## cut(horsepower, 6)(169,199] -18.3824 1.1187 -16.43 <2e-16 ***
## cut(horsepower, 6)(199,230] -19.3889 1.4821 -13.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.672 on 386 degrees of freedom
## Multiple R-squared: 0.6462, Adjusted R-squared: 0.6416
## F-statistic: 141 on 5 and 386 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower Fungsi Tangga k=6
")+
geom_point(alpha=0.55, color="azure4") +
stat_smooth(method = "lm",
formula = y~cut(x,6),
lty = 1, col = "blue",se = F)+
theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(Tangga4=MSE(predict(mod_tangga4),Auto$mpg),
Tangga5=MSE(predict(mod_tangga5),Auto$mpg),
Tangga6=MSE(predict(mod_tangga6),Auto$mpg)
)
## Tangga4 Tangga5 Tangga6
## 1 24.75519 21.98227 21.49764
mod_spline4ns = lm(mpg ~ ns(horsepower, knots = c(80,120,160,200)),data=Auto) #knot=4
summary(mod_spline4ns)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)),
## data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9354 -2.5877 -0.2446 2.2605 15.9892
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 37.975 1.128 33.677
## ns(horsepower, knots = c(80, 120, 160, 200))1 -21.281 1.254 -16.969
## ns(horsepower, knots = c(80, 120, 160, 200))2 -22.882 2.010 -11.386
## ns(horsepower, knots = c(80, 120, 160, 200))3 -24.045 2.238 -10.743
## ns(horsepower, knots = c(80, 120, 160, 200))4 -34.239 2.950 -11.607
## ns(horsepower, knots = c(80, 120, 160, 200))5 -17.341 2.189 -7.921
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))1 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))2 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))3 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))4 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))5 2.54e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.369 on 386 degrees of freedom
## Multiple R-squared: 0.6907, Adjusted R-squared: 0.6867
## F-statistic: 172.4 on 5 and 386 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Natural Cubic Spline k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(80,120,160,200)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(80,120,160,200)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
mod_spline5ns = lm(mpg ~ ns(horsepower, knots = c(80,110,140,170,200)),data=Auto) #knot=5
summary(mod_spline5ns)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 110, 140, 170,
## 200)), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.1552 -2.6065 -0.2325 2.2470 15.4384
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 37.302 1.193 31.263
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1 -19.217 1.220 -15.747
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2 -20.006 1.982 -10.092
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3 -24.965 2.137 -11.684
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 -22.110 2.766 -7.995
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5 -31.488 3.085 -10.207
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 -18.837 2.384 -7.903
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1 < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2 < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3 < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 1.53e-14 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5 < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 2.90e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.36 on 385 degrees of freedom
## Multiple R-squared: 0.6927, Adjusted R-squared: 0.6879
## F-statistic: 144.7 on 6 and 385 DF, p-value: < 2.2e-16
ggplot(Auto,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Natural Cubic Spline k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(80,110,140,170,200)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(80,110,140,170,200)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(NSpline4=MSE(predict(mod_spline4ns),Auto$mpg),
NSpline5=MSE(predict(mod_spline5ns),Auto$mpg)
)
## NSpline4 NSpline5
## 1 18.79342 18.67029
data.frame(Linear=MSE(predict(mod_linear),Auto$mpg),
Poly2=MSE(predict(mod_polinomial2),Auto$mpg),
Poly3=MSE(predict(mod_polinomial3),Auto$mpg),
Poly4=MSE(predict(mod_polinomial4),Auto$mpg),
Tangga4=MSE(predict(mod_tangga4),Auto$mpg),
Tangga5=MSE(predict(mod_tangga5),Auto$mpg),
Tangga6=MSE(predict(mod_tangga6),Auto$mpg),
NSpline4=MSE(predict(mod_spline4ns),Auto$mpg),
NSpline5=MSE(predict(mod_spline5ns),Auto$mpg)
)
## Linear Poly2 Poly3 Poly4 Tangga4 Tangga5 Tangga6 NSpline4
## 1 23.94366 18.98477 18.94499 18.87633 24.75519 21.98227 21.49764 18.79342
## NSpline5
## 1 18.67029
Amerika<-Auto%>%
select(mpg, horsepower, origin)%>%
filter(origin == 1)
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower")+
geom_point(alpha=0.6, color="navy") +
theme_bw()
mod_linearA = lm(mpg~horsepower,data=Amerika)
summary(mod_linearA)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7415 -3.1643 -0.6389 2.4849 13.8357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.476496 0.857484 40.21 <2e-16 ***
## horsepower -0.121320 0.006831 -17.76 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.257 on 243 degrees of freedom
## Multiple R-squared: 0.5649, Adjusted R-squared: 0.5631
## F-statistic: 315.4 on 1 and 243 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Linear")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~x,lty = 1, col = "blue",se = F)+
theme_bw()
mod_polinomial2A = lm(mpg ~ poly(horsepower,2,raw = T),data=Amerika) #Polinomial berderajat 2
summary(mod_polinomial2A)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9497 -2.5745 -0.0895 2.1583 13.1866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.9150173 2.3893530 21.73 < 2e-16 ***
## poly(horsepower, 2, raw = T)1 -0.4104369 0.0379990 -10.80 < 2e-16 ***
## poly(horsepower, 2, raw = T)2 0.0010776 0.0001398 7.71 3.26e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.822 on 242 degrees of freedom
## Multiple R-squared: 0.6507, Adjusted R-squared: 0.6478
## F-statistic: 225.4 on 2 and 242 DF, p-value: < 2.2e-16
pplot.d2A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) + #polinomial derajat 2
ggtitle("mpg vs. horsepower Polinomial Derajat 2")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d2A
mod_polinomial3A = lm(mpg ~ poly(horsepower,3,raw = T),data=Amerika) #polinomial berderajat 3
summary(mod_polinomial3A)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3092 -2.3199 -0.2081 2.0794 13.2928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.224e+01 7.181e+00 8.667 6.56e-16 ***
## poly(horsepower, 3, raw = T)1 -6.634e-01 1.703e-01 -3.896 0.000127 ***
## poly(horsepower, 3, raw = T)2 2.995e-03 1.266e-03 2.366 0.018796 *
## poly(horsepower, 3, raw = T)3 -4.531e-06 2.974e-06 -1.524 0.128867
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.812 on 241 degrees of freedom
## Multiple R-squared: 0.654, Adjusted R-squared: 0.6497
## F-statistic: 151.8 on 3 and 241 DF, p-value: < 2.2e-16
pplot.d3A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Polinomial Derajat 3")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d3A
mod_polinomial4A = lm(mpg ~ poly(horsepower,4,raw = T),data=Amerika) #polinomial berderajat 4
summary(mod_polinomial4A)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3162 -2.3080 -0.2287 2.0895 13.3711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.080e+01 1.953e+01 3.625 0.000352 ***
## poly(horsepower, 4, raw = T)1 -9.562e-01 6.440e-01 -1.485 0.138931
## poly(horsepower, 4, raw = T)2 6.540e-03 7.623e-03 0.858 0.391827
## poly(horsepower, 4, raw = T)3 -2.252e-05 3.827e-05 -0.588 0.556777
## poly(horsepower, 4, raw = T)4 3.244e-08 6.879e-08 0.471 0.637715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.818 on 240 degrees of freedom
## Multiple R-squared: 0.6543, Adjusted R-squared: 0.6486
## F-statistic: 113.6 on 4 and 240 DF, p-value: < 2.2e-16
pplot.d4A<-ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Polinomial Derajat 4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,4,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d4A
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(LinearA=MSE(predict(mod_linearA),Amerika$mpg),
Poly2A=MSE(predict(mod_polinomial2A),Amerika$mpg),
Poly3A=MSE(predict(mod_polinomial3A),Amerika$mpg),
Poly4A=MSE(predict(mod_polinomial4A),Amerika$mpg)
)
## LinearA Poly2A Poly3A Poly4A
## 1 17.97539 14.43097 14.29326 14.28003
mod_tangga4A = lm(mpg ~ cut(horsepower,4),data=Amerika)
summary(mod_tangga4A)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.805 -2.005 -0.455 1.821 13.195
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.8053 0.4230 61.00 <2e-16 ***
## cut(horsepower, 4)(96.5,141] -7.0258 0.6418 -10.95 <2e-16 ***
## cut(horsepower, 4)(141,186] -11.3503 0.6800 -16.69 <2e-16 ***
## cut(horsepower, 4)(186,230] -12.9523 1.0859 -11.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.123 on 241 degrees of freedom
## Multiple R-squared: 0.5951, Adjusted R-squared: 0.5901
## F-statistic: 118.1 on 3 and 241 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,4),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga5A = lm(mpg ~ cut(horsepower,5),data=Amerika)
summary(mod_tangga5A)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.4268 -2.4712 -0.4712 2.2615 12.7220
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.4268 0.5438 50.436 <2e-16 ***
## cut(horsepower, 5)(87.6,123] -6.1488 0.6912 -8.896 <2e-16 ***
## cut(horsepower, 5)(123,159] -11.9556 0.7592 -15.747 <2e-16 ***
## cut(horsepower, 5)(159,194] -13.6883 0.9657 -14.174 <2e-16 ***
## cut(horsepower, 5)(194,230] -14.6576 1.2528 -11.700 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.069 on 240 degrees of freedom
## Multiple R-squared: 0.6073, Adjusted R-squared: 0.6008
## F-statistic: 92.79 on 4 and 240 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga6A = lm(mpg ~ cut(horsepower,6),data=Amerika)
summary(mod_tangga6A)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3353 -2.6560 -0.3852 1.8235 15.7935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.3353 0.7353 38.535 < 2e-16 ***
## cut(horsepower, 6)(81.7,111] -6.1288 0.8441 -7.261 5.38e-12 ***
## cut(horsepower, 6)(111,141] -10.4501 1.1052 -9.455 < 2e-16 ***
## cut(horsepower, 6)(141,171] -13.6793 0.9531 -14.353 < 2e-16 ***
## cut(horsepower, 6)(171,200] -15.1588 1.2736 -11.902 < 2e-16 ***
## cut(horsepower, 6)(200,230] -15.4353 1.5424 -10.007 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.288 on 239 degrees of freedom
## Multiple R-squared: 0.5659, Adjusted R-squared: 0.5568
## F-statistic: 62.31 on 5 and 239 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=6")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,6),
lty = 1, col = "blue",se = F)+
theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(Tangga4A=MSE(predict(mod_tangga4),Amerika$mpg),
Tangga5A=MSE(predict(mod_tangga5),Amerika$mpg),
TanggaA=MSE(predict(mod_tangga6),Amerika$mpg)
)
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Tangga4A Tangga5A TanggaA
## 1 93.45857 93.3432 96.5047
mod_spline4nsA = lm(mpg ~ ns(horsepower, knots = c(80,120,160,200)),data=Amerika) #knot=4
summary(mod_spline4nsA)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)),
## data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4671 -2.1836 -0.3698 2.0508 13.2883
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 35.039 1.813 19.322
## ns(horsepower, knots = c(80, 120, 160, 200))1 -18.270 1.748 -10.454
## ns(horsepower, knots = c(80, 120, 160, 200))2 -20.386 2.556 -7.975
## ns(horsepower, knots = c(80, 120, 160, 200))3 -20.800 2.173 -9.574
## ns(horsepower, knots = c(80, 120, 160, 200))4 -30.835 4.178 -7.380
## ns(horsepower, knots = c(80, 120, 160, 200))5 -14.921 1.967 -7.586
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))1 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))2 6.29e-14 ***
## ns(horsepower, knots = c(80, 120, 160, 200))3 < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))4 2.61e-12 ***
## ns(horsepower, knots = c(80, 120, 160, 200))5 7.33e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.819 on 239 degrees of freedom
## Multiple R-squared: 0.6557, Adjusted R-squared: 0.6485
## F-statistic: 91.02 on 5 and 239 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Natural Cubic Spline k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(80,120,160,200)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(80,120,160,200)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
mod_spline5nsA = lm(mpg ~ ns(horsepower, knots = c(80,110,140,170,200)),data=Amerika) #knot=5
summary(mod_spline5nsA)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 110, 140, 170,
## 200)), data = Amerika)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5219 -2.1716 -0.3143 2.0250 13.1723
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 34.495 1.961 17.589
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1 -16.343 1.841 -8.880
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2 -18.485 2.534 -7.296
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3 -20.965 2.471 -8.486
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 -20.274 2.681 -7.562
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5 -28.611 4.475 -6.393
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 -15.750 2.164 -7.279
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))1 < 2e-16 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))2 4.39e-12 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))3 2.31e-15 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))4 8.58e-13 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))5 8.51e-10 ***
## ns(horsepower, knots = c(80, 110, 140, 170, 200))6 4.86e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.823 on 238 degrees of freedom
## Multiple R-squared: 0.6564, Adjusted R-squared: 0.6477
## F-statistic: 75.78 on 6 and 238 DF, p-value: < 2.2e-16
ggplot(Amerika,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Natural Cubic Spline k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(80,110,140,170,200)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(80,110,140,170,200)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(NSpline4A=MSE(predict(mod_spline4nsA),Amerika$mpg),
NSpline5A=MSE(predict(mod_spline5nsA),Amerika$mpg)
)
## NSpline4A NSpline5A
## 1 14.2242 14.19415
data.frame(LinearA=MSE(predict(mod_linearA),Amerika$mpg),
Poly2A=MSE(predict(mod_polinomial2A),Amerika$mpg),
Poly3A=MSE(predict(mod_polinomial3A),Amerika$mpg),
Poly4A=MSE(predict(mod_polinomial4A),Amerika$mpg),
Tangga4A=MSE(predict(mod_tangga4A),Amerika$mpg),
Tangga5A=MSE(predict(mod_tangga5A),Amerika$mpg),
Tangga6A=MSE(predict(mod_tangga6A),Amerika$mpg),
NSpline4A=MSE(predict(mod_spline4nsA),Amerika$mpg),
NSpline5A=MSE(predict(mod_spline5nsA),Amerika$mpg)
)
## LinearA Poly2A Poly3A Poly4A Tangga4A Tangga5A Tangga6A NSpline4A
## 1 17.97539 14.43097 14.29326 14.28003 16.72448 16.22219 17.93286 14.2242
## NSpline5A
## 1 14.19415
Eropa<-Auto%>%
select(mpg, horsepower, origin)%>%
filter(origin == 2)
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower")+
geom_point(alpha=0.6, color="black") +
theme_bw()
mod_linearE = lm(mpg~horsepower,data=Eropa)
summary(mod_linearE)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4946 -2.8387 -0.4171 2.2148 12.8858
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.47373 2.44679 18.585 < 2e-16 ***
## horsepower -0.22184 0.02948 -7.526 1.87e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.864 on 66 degrees of freedom
## Multiple R-squared: 0.4618, Adjusted R-squared: 0.4537
## F-statistic: 56.64 on 1 and 66 DF, p-value: 1.869e-10
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Linear")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~x,lty = 1, col = "blue",se = F)+
theme_bw()
mod_polinomial2E = lm(mpg ~ poly(horsepower,2,raw = T),data=Eropa) #Polinomial berderajat 2
summary(mod_polinomial2E)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6282 -2.7724 -0.4052 2.2773 12.9676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.1311209 8.2852287 5.689 3.3e-07 ***
## poly(horsepower, 2, raw = T)1 -0.2631496 0.1994015 -1.320 0.192
## poly(horsepower, 2, raw = T)2 0.0002425 0.0011574 0.210 0.835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.899 on 65 degrees of freedom
## Multiple R-squared: 0.4622, Adjusted R-squared: 0.4456
## F-statistic: 27.93 on 2 and 65 DF, p-value: 1.76e-09
pplot.d2E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) + #polinomial derajat 2
ggtitle("mpg vs. horsepower Polinomial Derajat 2")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d2E
mod_polinomial3E = lm(mpg ~ poly(horsepower,3,raw = T),data=Eropa) #polinomial berderajat 3
summary(mod_polinomial3E)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6346 -2.6497 -0.8141 2.1774 12.8935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.060e+01 2.781e+01 1.460 0.149
## poly(horsepower, 3, raw = T)1 -7.245e-03 1.058e+00 -0.007 0.995
## poly(horsepower, 3, raw = T)2 -2.924e-03 1.291e-02 -0.226 0.822
## poly(horsepower, 3, raw = T)3 1.241e-05 5.039e-05 0.246 0.806
##
## Residual standard error: 4.935 on 64 degrees of freedom
## Multiple R-squared: 0.4627, Adjusted R-squared: 0.4375
## F-statistic: 18.37 on 3 and 64 DF, p-value: 1.041e-08
pplot.d3E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Polinomial Derajat 3")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d3E
mod_polinomial4E = lm(mpg ~ poly(horsepower,4,raw = T),data=Eropa) #polinomial berderajat 4
summary(mod_polinomial4E)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0736 -2.8215 -0.5424 2.4147 12.9708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.497e+01 1.109e+02 -0.225 0.823
## poly(horsepower, 4, raw = T)1 3.362e+00 5.617e+00 0.599 0.552
## poly(horsepower, 4, raw = T)2 -6.496e-02 1.024e-01 -0.635 0.528
## poly(horsepower, 4, raw = T)3 4.998e-04 7.994e-04 0.625 0.534
## poly(horsepower, 4, raw = T)4 -1.384e-06 2.266e-06 -0.611 0.543
##
## Residual standard error: 4.959 on 63 degrees of freedom
## Multiple R-squared: 0.4659, Adjusted R-squared: 0.4319
## F-statistic: 13.74 on 4 and 63 DF, p-value: 4.134e-08
pplot.d4E<-ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Polinomial Derajat 4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,4,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d4E
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(LinearE=MSE(predict(mod_linearE),Eropa$mpg),
Poly2E=MSE(predict(mod_polinomial2E),Eropa$mpg),
Poly3E=MSE(predict(mod_polinomial3E),Eropa$mpg),
Poly4E=MSE(predict(mod_polinomial4E),Eropa$mpg)
)
## LinearE Poly2E Poly3E Poly4E
## 1 22.95978 22.94428 22.92256 22.78757
mod_tangga4E = lm(mpg ~ cut(horsepower,4),data=Eropa)
summary(mod_tangga4E)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0000 -3.2531 -0.7048 2.9455 13.0029
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.000 1.346 24.526 < 2e-16 ***
## cut(horsepower, 4)(67.8,89.5] -4.503 1.615 -2.788 0.00698 **
## cut(horsepower, 4)(89.5,111] -9.927 2.069 -4.799 9.94e-06 ***
## cut(horsepower, 4)(111,133] -13.087 2.281 -5.736 2.84e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.211 on 64 degrees of freedom
## Multiple R-squared: 0.4009, Adjusted R-squared: 0.3728
## F-statistic: 14.28 on 3 and 64 DF, p-value: 3.171e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=
4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,4),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga5E = lm(mpg ~ cut(horsepower,5),data=Eropa)
summary(mod_tangga5E)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7818 -3.6655 -0.1383 2.2481 11.8345
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.782 1.506 22.434 < 2e-16 ***
## cut(horsepower, 5)(63.4,80.8] -4.116 1.769 -2.328 0.0232 *
## cut(horsepower, 5)(80.8,98.2] -8.782 1.956 -4.489 3.11e-05 ***
## cut(horsepower, 5)(98.2,116] -12.071 2.245 -5.377 1.18e-06 ***
## cut(horsepower, 5)(116,133] -17.215 3.253 -5.292 1.63e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.994 on 63 degrees of freedom
## Multiple R-squared: 0.4583, Adjusted R-squared: 0.4239
## F-statistic: 13.33 on 4 and 63 DF, p-value: 6.335e-08
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga6E = lm(mpg ~ cut(horsepower,6),data=Eropa)
summary(mod_tangga6E)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1800 -3.1476 -0.0983 2.3604 13.6316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.180 1.623 21.061 < 2e-16 ***
## cut(horsepower, 6)(60.5,75] -4.550 1.988 -2.289 0.02549 *
## cut(horsepower, 6)(75,89.5] -6.312 2.005 -3.148 0.00253 **
## cut(horsepower, 6)(89.5,104] -11.036 2.358 -4.680 1.60e-05 ***
## cut(horsepower, 6)(104,118] -12.023 2.529 -4.754 1.23e-05 ***
## cut(horsepower, 6)(118,133] -17.613 3.378 -5.214 2.26e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.132 on 62 degrees of freedom
## Multiple R-squared: 0.4371, Adjusted R-squared: 0.3917
## F-statistic: 9.628 on 5 and 62 DF, p-value: 7.756e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("mpg vs. horsepower Fungsi Tangga k=6")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,6),
lty = 1, col = "blue",se = F)+
theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(Tangga4E=MSE(predict(mod_tangga4E),Eropa$mpg),
Tangga5E=MSE(predict(mod_tangga5E),Eropa$mpg),
Tangga6E=MSE(predict(mod_tangga6E),Eropa$mpg)
)
## Tangga4E Tangga5E Tangga6E
## 1 25.55912 23.10967 24.01507
mod_spline4nsE = lm(mpg ~ ns(horsepower, knots = c(60,80,100,120)),data=Eropa) #knot=4
summary(mod_spline4nsE)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 80, 100, 120)),
## data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1994 -3.2453 -0.1622 1.9845 12.4669
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 35.567 2.211 16.090
## ns(horsepower, knots = c(60, 80, 100, 120))1 -6.052 3.219 -1.880
## ns(horsepower, knots = c(60, 80, 100, 120))2 -16.032 4.813 -3.331
## ns(horsepower, knots = c(60, 80, 100, 120))3 -11.640 4.156 -2.801
## ns(horsepower, knots = c(60, 80, 100, 120))4 -22.488 6.278 -3.582
## ns(horsepower, knots = c(60, 80, 100, 120))5 -17.760 4.793 -3.705
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(60, 80, 100, 120))1 0.064829 .
## ns(horsepower, knots = c(60, 80, 100, 120))2 0.001462 **
## ns(horsepower, knots = c(60, 80, 100, 120))3 0.006789 **
## ns(horsepower, knots = c(60, 80, 100, 120))4 0.000670 ***
## ns(horsepower, knots = c(60, 80, 100, 120))5 0.000452 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.975 on 62 degrees of freedom
## Multiple R-squared: 0.4711, Adjusted R-squared: 0.4285
## F-statistic: 11.05 on 5 and 62 DF, p-value: 1.246e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("Natural Cubic Spline k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(60,80,100,120)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(60,80,100,120)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
mod_spline5nsE = lm(mpg ~ ns(horsepower, knots = c(60,75,90,105,120)),data=Eropa) #knot=5
summary(mod_spline5nsE)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 75, 90, 105,
## 120)), data = Eropa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0509 -3.2520 -0.3632 2.0788 12.4257
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 35.654 2.305 15.466
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1 -5.394 3.004 -1.796
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2 -11.603 4.074 -2.848
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3 -14.717 5.232 -2.813
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4 -12.711 5.096 -2.494
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5 -22.634 6.906 -3.277
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6 -17.803 5.175 -3.440
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1 0.07749 .
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2 0.00598 **
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3 0.00659 **
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4 0.01534 *
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5 0.00173 **
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6 0.00105 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.019 on 61 degrees of freedom
## Multiple R-squared: 0.4704, Adjusted R-squared: 0.4183
## F-statistic: 9.029 on 6 and 61 DF, p-value: 4.636e-07
ggplot(Eropa,aes(x=horsepower, y=mpg)) +
ggtitle("Natural Cubic Spline k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(60,75,90,105,120)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(60,75,90,105,120)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(NSpline4E=MSE(predict(mod_spline4nsE),Eropa$mpg),
NSpline5E=MSE(predict(mod_spline5nsE),Eropa$mpg)
)
## NSpline4E NSpline5E
## 1 22.56312 22.59495
data.frame(LinearE=MSE(predict(mod_linearE),Eropa$mpg),
Poly2E=MSE(predict(mod_polinomial2E),Eropa$mpg),
Poly3E=MSE(predict(mod_polinomial3E),Eropa$mpg),
Poly4E=MSE(predict(mod_polinomial4E),Eropa$mpg),
Tangga4E=MSE(predict(mod_tangga4E),Eropa$mpg),
Tangga5E=MSE(predict(mod_tangga5E),Eropa$mpg),
Tangga6E=MSE(predict(mod_tangga6E),Eropa$mpg),
NSpline4E=MSE(predict(mod_spline4nsE),Eropa$mpg),
NSpline5E=MSE(predict(mod_spline5nsE),Eropa$mpg)
)
## LinearE Poly2E Poly3E Poly4E Tangga4E Tangga5E Tangga6E NSpline4E
## 1 22.95978 22.94428 22.92256 22.78757 25.55912 23.10967 24.01507 22.56312
## NSpline5E
## 1 22.59495
Jepang<-Auto%>%
select(mpg, horsepower, origin)%>%
filter(origin == 3)
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower")+
geom_point(alpha=0.6, color="black") +
theme_bw()
mod_linearJ = lm(mpg~horsepower,data=Jepang)
summary(mod_linearJ)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.112 -2.758 -0.751 2.563 14.249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.8162 2.3555 20.724 < 2e-16 ***
## horsepower -0.2300 0.0288 -7.986 1.08e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.533 on 77 degrees of freedom
## Multiple R-squared: 0.4531, Adjusted R-squared: 0.446
## F-statistic: 63.78 on 1 and 77 DF, p-value: 1.08e-11
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Plot mpg vs. horsepower Linear")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~x,lty = 1, col = "blue",se = F)+
theme_bw()
mod_polinomial2J = lm(mpg ~ poly(horsepower,2,raw = T),data=Jepang) #Polinomial berderajat 2
summary(mod_polinomial2J)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.1949 -2.5568 -0.6165 2.2408 12.5211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.401527 9.731163 7.029 7.74e-10 ***
## poly(horsepower, 2, raw = T)1 -0.710534 0.233642 -3.041 0.00323 **
## poly(horsepower, 2, raw = T)2 0.002808 0.001355 2.072 0.04169 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.439 on 76 degrees of freedom
## Multiple R-squared: 0.4823, Adjusted R-squared: 0.4687
## F-statistic: 35.4 on 2 and 76 DF, p-value: 1.365e-11
pplot.d2J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) + #polinomial derajat 2
ggtitle("Polinomial derajat 2")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,2,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d2J
mod_polinomial3J = lm(mpg ~ poly(horsepower,3,raw = T),data=Jepang) #polinomial berderajat 3
summary(mod_polinomial3J)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 3, raw = T), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8602 -2.7115 -0.5224 2.1985 11.6985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.163e+01 3.494e+01 -2.050 0.04385 *
## poly(horsepower, 3, raw = T)1 4.308e+00 1.230e+00 3.503 0.00078 ***
## poly(horsepower, 3, raw = T)2 -5.498e-02 1.400e-02 -3.926 0.00019 ***
## poly(horsepower, 3, raw = T)3 2.142e-04 5.170e-05 4.142 8.92e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.031 on 75 degrees of freedom
## Multiple R-squared: 0.5787, Adjusted R-squared: 0.5618
## F-statistic: 34.34 on 3 and 75 DF, p-value: 4.485e-14
pplot.d3J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Polinomial derajat 3")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,3,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d3J
mod_polinomial4J = lm(mpg ~ poly(horsepower,4,raw = T),data=Jepang) #polinomial berderajat 4
summary(mod_polinomial4J)
##
## Call:
## lm(formula = mpg ~ poly(horsepower, 4, raw = T), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6872 -2.5927 -0.4482 2.1121 11.5842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.218e+02 1.338e+02 -0.910 0.366
## poly(horsepower, 4, raw = T)1 6.792e+00 6.518e+00 1.042 0.301
## poly(horsepower, 4, raw = T)2 -9.980e-02 1.163e-01 -0.858 0.394
## poly(horsepower, 4, raw = T)3 5.626e-04 8.990e-04 0.626 0.533
## poly(horsepower, 4, raw = T)4 -9.851e-07 2.537e-06 -0.388 0.699
##
## Residual standard error: 4.054 on 74 degrees of freedom
## Multiple R-squared: 0.5795, Adjusted R-squared: 0.5568
## F-statistic: 25.5 on 4 and 74 DF, p-value: 2.684e-13
pplot.d4J<-ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Polinomial derajat 4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~poly(x,4,raw=T),
lty = 1, col = "blue",se = F)+
theme_bw()
pplot.d4J
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(LinearJ=MSE(predict(mod_linearJ),Jepang$mpg),
Poly2J=MSE(predict(mod_polinomial2J),Jepang$mpg),
Poly3J=MSE(predict(mod_polinomial3J),Jepang$mpg),
Poly4J=MSE(predict(mod_polinomial4J),Jepang$mpg)
)
## LinearJ Poly2J Poly3J Poly4J
## 1 20.02862 18.95803 15.42813 15.39677
mod_tangga4J = lm(mpg ~ cut(horsepower,4),data=Jepang)
summary(mod_tangga4J)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 4), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.444 -2.644 -0.590 2.579 11.803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.7973 0.6901 50.425 < 2e-16 ***
## cut(horsepower, 4)(72,92] -5.3529 1.2063 -4.438 3.07e-05 ***
## cut(horsepower, 4)(92,112] -10.5073 1.1650 -9.019 1.37e-13 ***
## cut(horsepower, 4)(112,132] -9.2223 2.2093 -4.174 7.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.198 on 75 degrees of freedom
## Multiple R-squared: 0.5432, Adjusted R-squared: 0.5249
## F-statistic: 29.73 on 3 and 75 DF, p-value: 9.028e-13
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Fungsi Tangga k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,4),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga5J = lm(mpg ~ cut(horsepower,5),data=Jepang)
summary(mod_tangga5J)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 5), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7444 -2.8774 -0.7774 2.1891 11.7226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.8774 0.7651 45.585 < 2e-16 ***
## cut(horsepower, 5)(68,84] -2.4441 1.3399 -1.824 0.072169 .
## cut(horsepower, 5)(84,100] -9.1330 1.1214 -8.144 6.91e-12 ***
## cut(horsepower, 5)(100,116] -12.9108 2.5758 -5.012 3.56e-06 ***
## cut(horsepower, 5)(116,132] -9.2441 2.5758 -3.589 0.000594 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.26 on 74 degrees of freedom
## Multiple R-squared: 0.5358, Adjusted R-squared: 0.5107
## F-statistic: 21.35 on 4 and 74 DF, p-value: 9.702e-12
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Fungsi Tangga k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,5),
lty = 1, col = "blue",se = F)+
theme_bw()
mod_tangga6J = lm(mpg ~ cut(horsepower,6),data=Jepang)
summary(mod_tangga6J)
##
## Call:
## lm(formula = mpg ~ cut(horsepower, 6), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7556 -2.4667 -0.7556 1.9276 11.4107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.4667 0.9915 35.769 < 2e-16 ***
## cut(horsepower, 6)(65.3,78.7] -2.2774 1.2709 -1.792 0.077286 .
## cut(horsepower, 6)(78.7,92] -7.7111 1.7174 -4.490 2.61e-05 ***
## cut(horsepower, 6)(92,105] -10.7278 1.4023 -7.650 6.38e-11 ***
## cut(horsepower, 6)(105,119] -13.5000 2.6234 -5.146 2.16e-06 ***
## cut(horsepower, 6)(119,132] -9.8333 2.6234 -3.748 0.000353 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.207 on 73 degrees of freedom
## Multiple R-squared: 0.5534, Adjusted R-squared: 0.5229
## F-statistic: 18.09 on 5 and 73 DF, p-value: 1.233e-11
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Fungsi Tangga k=6")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~cut(x,6),
lty = 1, col = "blue",se = F)+
theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(Tangga4J=MSE(predict(mod_tangga4J),Jepang$mpg),
Tangga5J=MSE(predict(mod_tangga5J),Jepang$mpg),
Tangga6J=MSE(predict(mod_tangga6J),Jepang$mpg)
)
## Tangga4J Tangga5J Tangga6J
## 1 16.72759 16.99883 16.35272
mod_spline4nsJ = lm(mpg ~ ns(horsepower, knots = c(60,80,100,120)),data=Jepang) #knot=4
summary(mod_spline4nsJ)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 80, 100, 120)),
## data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2865 -2.8742 -0.3009 1.8593 11.0460
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 32.676 2.022 16.163
## ns(horsepower, knots = c(60, 80, 100, 120))1 -6.328 3.188 -1.985
## ns(horsepower, knots = c(60, 80, 100, 120))2 -6.721 3.444 -1.951
## ns(horsepower, knots = c(60, 80, 100, 120))3 -16.242 3.971 -4.090
## ns(horsepower, knots = c(60, 80, 100, 120))4 0.768 5.220 0.147
## ns(horsepower, knots = c(60, 80, 100, 120))5 -4.158 3.979 -1.045
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(60, 80, 100, 120))1 0.05091 .
## ns(horsepower, knots = c(60, 80, 100, 120))2 0.05485 .
## ns(horsepower, knots = c(60, 80, 100, 120))3 0.00011 ***
## ns(horsepower, knots = c(60, 80, 100, 120))4 0.88343
## ns(horsepower, knots = c(60, 80, 100, 120))5 0.29940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.018 on 73 degrees of freedom
## Multiple R-squared: 0.5926, Adjusted R-squared: 0.5647
## F-statistic: 21.24 on 5 and 73 DF, p-value: 4.75e-13
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Natural Cubic Spline k=4")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(60,80,100,120)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(60,80,100,120)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
mod_spline5nsJ = lm(mpg ~ ns(horsepower, knots = c(60,75,90,105,120)),data=Jepang) #knot=5
summary(mod_spline5nsJ)
##
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(60, 75, 90, 105,
## 120)), data = Jepang)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7575 -2.7975 -0.2206 1.5872 10.8295
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 32.376 2.080 15.568
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1 -2.868 2.876 -0.997
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2 -5.189 3.498 -1.483
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3 -10.090 4.199 -2.403
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4 -14.030 4.285 -3.274
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5 1.516 5.393 0.281
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6 -4.142 4.164 -0.995
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(horsepower, knots = c(60, 75, 90, 105, 120))1 0.32202
## ns(horsepower, knots = c(60, 75, 90, 105, 120))2 0.14238
## ns(horsepower, knots = c(60, 75, 90, 105, 120))3 0.01883 *
## ns(horsepower, knots = c(60, 75, 90, 105, 120))4 0.00163 **
## ns(horsepower, knots = c(60, 75, 90, 105, 120))5 0.77942
## ns(horsepower, knots = c(60, 75, 90, 105, 120))6 0.32327
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.034 on 72 degrees of freedom
## Multiple R-squared: 0.595, Adjusted R-squared: 0.5613
## F-statistic: 17.63 on 6 and 72 DF, p-value: 1.905e-12
ggplot(Jepang,aes(x=horsepower, y=mpg)) +
ggtitle("Natural Cubic Spline k=5")+
geom_point(alpha=0.55, color="black") +
stat_smooth(method = "lm",
formula = y~bs(x, knots = c(60,75,90,105,120)),
lty = 1, aes(col = "Cubic Spline"),se = F)+
stat_smooth(method = "lm",
formula = y~ns(x, knots = c(60,75,90,105,120)),
lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
scale_color_manual(values = c("Natural Cubic Spline"="red","Cubic Spline"="blue"))+theme_bw()
MSE=function(pred,actual){
mean((pred-actual)^2)
}
data.frame(NSpline4J=MSE(predict(mod_spline4nsJ),Jepang$mpg),
NSpline5J=MSE(predict(mod_spline5nsJ),Jepang$mpg)
)
## NSpline4J NSpline5J
## 1 14.91692 14.83045
Jepang
data.frame(LinearJ=MSE(predict(mod_linearJ),Jepang$mpg),
Poly2J=MSE(predict(mod_polinomial2J),Jepang$mpg),
Poly3J=MSE(predict(mod_polinomial3J),Jepang$mpg),
Poly4J=MSE(predict(mod_polinomial4J),Jepang$mpg),
Tangga4J=MSE(predict(mod_tangga4J),Jepang$mpg),
Tangga5J=MSE(predict(mod_tangga5J),Jepang$mpg),
Tangga6J=MSE(predict(mod_tangga6J),Jepang$mpg),
NSpline4J=MSE(predict(mod_spline4nsJ),Jepang$mpg),
NSpline5J=MSE(predict(mod_spline5nsJ),Jepang$mpg)
)
## LinearJ Poly2J Poly3J Poly4J Tangga4J Tangga5J Tangga6J NSpline4J
## 1 20.02862 18.95803 15.42813 15.39677 16.72759 16.99883 16.35272 14.91692
## NSpline5J
## 1 14.83045