Polynomial Regression, Peacewise Constant Function Regression and Spline Regression in R

Load Package

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(splines)
library(rsample)
library(mlr3measures)
## In order to avoid name clashes, do not attach 'mlr3measures'. Instead, only load the namespace with `requireNamespace("mlrmeasures")` and access the measures directly via `::`, e.g. `mlr3measures::auc()`.
library(ISLR)
library(grid)      
##import data
data("Auto",package="ISLR")
attach(Auto)   #data("Auto",package="ISLR")
## The following object is masked from package:ggplot2:
## 
##     mpg

Data Auto merupakan data frame dengan 392 Observasi yag terdiri dari 9 Variabel

#Menampilkan sebagian data
head(cbind(mpg,horsepower,origin))
##      mpg horsepower origin
## [1,]  18        130      1
## [2,]  15        165      1
## [3,]  18        150      1
## [4,]  16        150      1
## [5,]  17        140      1
## [6,]  15        198      1
# LM Plot
lmAuto <- ggplot(Auto,aes(x=horsepower, y=mpg)) +
  geom_point(alpha=0.7, color="orange") + 
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1,
              col = "brown",se = F)+
  labs(title="Non Linear Model Data Auto")+
  xlab("HorsePower")+
  ylab("mpg")
lmAuto

#a. Polynomial regression

##Empirically
##Polynomial regression degree 3 dengan Cross Validation

set.seed(1501211001)
cross_val <- vfold_cv(Auto,v= 10, strata = "horsepower")

breaksp <- 2:8

best_poly_Auto <- map_dfr (breaksp, function(i) {
metric_poly <- map_dfr(cross_val$splits,
                        function(x){
                          mod <- lm(mpg ~ poly(horsepower,breaksp=i,raw = T),
                                    data=Auto[x$in_id,])
                          pred <- predict(mod,
                                          newdata=Auto[-x$in_id,])
                          truth <- Auto[-x$in_id,]$mpg
                          
                          rmse <- mlr3measures::rmse(truth = truth,
                                                     response = pred
                          )
                          mae <- mlr3measures::mae(truth = truth,
                                                   response = pred
                          )
                          metric <- c(rmse,mae)
                          names(metric) <- c("rmse","mae")
                          return(metric)
                        }
)

metric_poly

# menghitung rata-rata untuk 10 folds
mean_metric_poly <- colMeans(metric_poly)

mean_metric_poly
 }
)

best_poly_Auto <- cbind(breaksp,best_poly_Auto)
best_poly_Auto
##   breaksp     rmse      mae
## 1       2 4.352578 3.278945
## 2       3 4.353783 3.276225
## 3       4 4.351293 3.285103
## 4       5 4.297097 3.240996
## 5       6 4.280785 3.243448
## 6       7 4.262132 3.220544
## 7       8 4.280944 3.245170

Berdasarkan nilai rmse dan mae yang diperoleh saya memilih derajat 2 untuk dimodelkan dalam regresi polinomial karena memiliki nilai yang minimum dan lebih kecil dari derajat 4.

##Visually
##2nd degree polynomial regression

polimodel <-  lm( mpg ~ poly(horsepower,2,raw = T),data= Auto)
summary(polimodel)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 2, raw = T), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7135  -2.5943  -0.0859   2.2868  15.8961 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   56.9000997  1.8004268   31.60   <2e-16 ***
## poly(horsepower, 2, raw = T)1 -0.4661896  0.0311246  -14.98   <2e-16 ***
## poly(horsepower, 2, raw = T)2  0.0012305  0.0001221   10.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared:  0.6876, Adjusted R-squared:  0.686 
## F-statistic:   428 on 2 and 389 DF,  p-value: < 2.2e-16
Auto_poly <- ggplot(Auto,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="orange") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Polinomial Derajat 2")+
  xlab("HorsePower ")+
  ylab("mpg")
Auto_poly

##b. Piecewise Constant Function 

### Empirically

set.seed(1001)
cross_valta <- vfold_cv(Auto,v=10,strata = "mpg")

breaks <- 3:15

best_tangga_Auto <- map_dfr(breaks, function(i){
  
  metric_tangga <- map_dfr(cross_valta$splits,
                            function(x){
                              
                              training <- Auto[x$in_id,]
                              training$horsepower <- cut(training$horsepower,i)
                              
                              mod <- lm(mpg ~ horsepower,
                                        data=training)
                              
                              labs_x <- levels(mod$model[,2])
                              
                              labs_x_breaks <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", labs_x) ),
                                                     upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs_x) ))
                              
                              
                              testing <- Auto[-x$in_id,]
                              
                             horsepower_new <- cut(testing$horsepower,c(labs_x_breaks[1,1],labs_x_breaks[,2]))
                              
                              pred <- predict(mod,
                                              newdata=list(horsepower=horsepower_new))
                              truth <- testing$mpg
                              
                              data_eval <- na.omit(data.frame(truth,pred))
                              
                              rmse <- mlr3measures::rmse(truth = data_eval$truth,
                                                         response = data_eval$pred
                              )
                              mae <- mlr3measures::mae(truth = data_eval$truth,
                                                       response = data_eval$pred
                              )
                              metric <- c(rmse,mae)
                              names(metric) <- c("rmse","mae")
                              return(metric)
                            }
  )
  
  metric_tangga
  
  # menghitung rata-rata untuk 10 folds
  mean_metric_tangga <- colMeans(metric_tangga)
  
  mean_metric_tangga
})
best_tangga_Auto <- cbind(breaks=breaks,best_tangga_Auto)
# menampilkan hasil all breaks
best_tangga_Auto
##    breaks     rmse      mae
## 1       3 5.776608 4.522114
## 2       4 4.981764 3.782906
## 3       5 4.713393 3.578057
## 4       6 4.649392 3.537827
## 5       7 4.570616 3.392555
## 6       8 4.457050 3.407758
## 7       9 4.536498 3.441833
## 8      10 4.585155 3.455845
## 9      11 4.552872 3.374287
## 10     12 4.456176 3.318251
## 11     13 4.520986 3.356401
## 12     14 4.403887 3.241835
## 13     15 4.300994 3.249859
#Visually
tangga1 = lm(mpg ~ cut(horsepower,13),data= Auto)

Auto_tangga <- ggplot(Auto,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.7, color="orange") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,13), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Fungsi Tangga Knot 12")+
  xlab("HorsePower")+
  ylab("mpg")
Auto_tangga

#c. Natural Cubic Spline

#nilai knots yang ditentukan oleh komputer
attr(ns(Auto$horsepower, df=10),"knots")
##   10%   20%   30%   40%   50%   60%   70%   80%   90% 
##  67.0  72.0  80.0  88.0  93.5 100.0 110.0 140.0 157.7
#Plot BSpline
ggplot(Auto,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="orange") +
  stat_smooth(method = "lm", 
              formula = y~bs(x, knots = c(60, 80,100, 120)),
              lty = 1, col = "brown",se = F)+
  labs(title="Regresi Cubic Spline Knot 60, 80,100, 120")+
  xlab("HorsePower")+
  ylab("mpg")

Dapat dilihat bahwa model plot yang dihasilkan memiliki ujung-ujung yang cenderung liar dalam hal ini ragamnya yg cenderung besar. Oleh karena itu, untuk mengurangi ketidakstabilan ujung-ujung model tersebut akan digunakan pemodelan Natural Cubic Spline.

Natural Cubic Spline

## Empirically

set.seed(1501211001)
cross_val <- vfold_cv(Auto,v=10,strata = "mpg")

df <- 2:15

best_spline_Auto <- map_dfr(df, function(i){
  metric_spline <- map_dfr(cross_val$splits,
                            function(x){
                              mod <- lm(mpg ~ ns(horsepower,df=i),
                                        data=Auto[x$in_id,])
                              pred <- predict(mod,
                                              newdata=Auto[-x$in_id,])
                              truth <- Auto[-x$in_id,]$mpg
                              
                              rmse <- mlr3measures::rmse(truth = truth,
                                                         response = pred
                              )
                              mae <- mlr3measures::mae(truth = truth,
                                                       response = pred
                              )
                              metric <- c(rmse,mae)
                              names(metric) <- c("rmse","mae")
                              return(metric)
                            }
  )
  
  metric_spline
  
  # menghitung rata-rata untuk 10 folds
  mean_spline1 <- colMeans(metric_spline)
  
  mean_spline1
}
)
best_spline_Auto <- cbind(df=df,best_spline_Auto)
# menampilkan hasil all breaks
best_spline_Auto
##    df     rmse      mae
## 1   2 4.353554 3.267611
## 2   3 4.394696 3.296135
## 3   4 4.388334 3.295952
## 4   5 4.384292 3.286203
## 5   6 4.381250 3.278629
## 6   7 4.356872 3.257715
## 7   8 4.361673 3.263892
## 8   9 4.357740 3.252422
## 9  10 4.346026 3.233591
## 10 11 4.380925 3.264800
## 11 12 4.364587 3.238694
## 12 13 4.376466 3.255618
## 13 14 4.371661 3.272694
## 14 15 4.349977 3.275905

Jika merujuk pada nilai rmse dan mae, nilai terendah yaitu pada df 10 (knot 9) dan terendah kedua adalah pada df 2 (knot 1).

##Visually dengan 9 Konts

splineAns = lm(mpg ~ ns(horsepower, knots = c(67,72,80,88,94,100,110,140,160)),data = Auto)

Auto_ncspline9 <- ggplot(Auto,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="orange") +
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(60,80,90,100,110,140,160,180,200)),
              lty = 1, col = "100",se = F)+
  labs(title="Natural Cubic Spline Knot 60,80,90,100,110,140,160,180,200")+
  xlab("HorsePower ")+
  ylab("mpg")
Auto_ncspline9

##Visually dengan 1 Konts

splineAns1 = lm(mpg ~ ns(horsepower, knots = c(120)),data = Auto)

Auto_ncspline1 <- ggplot(Auto,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="orange") +
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(120)),
              lty = 1, col = "100",se = F)+
  labs(title="Natural Cubic Spline Knot 120")+
  xlab("HorsePower ")+
  ylab("mpg")
Auto_ncspline1

Karena plot yang dihasilkan dengan 1 knot lebih mulus, maka saya menggunakan 1 knot pada pemodelan natural cubic spline.

#Perbandingan Model
vp.layout <- function(x, y) viewport(layout.pos.row=x, layout.pos.col=y)
arrange <- function(..., nrow=NULL, ncol=NULL, as.table=FALSE) {
  dots <- list(...)
  n <- length(dots)
  if(is.null(nrow) & is.null(ncol)) { nrow = floor(n/2) ; ncol = ceiling(n/nrow)}
  if(is.null(nrow)) { nrow = ceiling(n/ncol)}
  if(is.null(ncol)) { ncol = ceiling(n/nrow)}
  ## NOTE see n2mfrow in grDevices for possible alternative
  grid.newpage()
  pushViewport(viewport(layout=grid.layout(nrow,ncol) ) )
  ii.p <- 1
  for(ii.row in seq(1, nrow)){
    ii.table.row <- ii.row
    if(as.table) {ii.table.row <- nrow - ii.table.row + 1}
    for(ii.col in seq(1, ncol)){
      ii.table <- ii.p
      if(ii.p > n) break
      print(dots[[ii.table]], vp=vp.layout(ii.table.row, ii.col))
      ii.p <- ii.p + 1
    }
  }
}

plot_Amrik <- arrange(lmAuto, Auto_poly, Auto_tangga, Auto_ncspline1)

# 1. Data Amerika
#________________

# Menampilkan data mpg dan horsepower berdasarkan mesin asal Amerika
data_amerika <-subset(Auto,origin == 1)

auto_amerika <- data_amerika[,c("mpg","horsepower")]

Data Auto_amerika ini terdiri dari 245 Observasi.

# ScatterPlot
lmAm <- ggplot(auto_amerika,aes(x=horsepower, y=mpg)) +
  geom_point(alpha=0.7, color="magenta") + 
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1,
              col = "brown",se = F)+
  labs(title="Non Linear Model Data Amerika")+
  xlab("HorsePower American")+
  ylab("mpg American")
lmAm

##Visually
##2nd degree polynomial regression

polinom1 <-  lm( mpg ~ poly(horsepower,2,raw = T),data= auto_amerika)

Am_poly <- ggplot(auto_amerika,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="magenta") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Polinomial Derajat 2")+
  xlab("HorsePower American")+
  ylab("mpg American")
Am_poly

#Visually
tangga2 = lm(mpg ~ cut(horsepower,9),data= auto_amerika)

Am_tangga <- ggplot(auto_amerika,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.7, color="magenta") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,9), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Fungsi Tangga Knot 8")+
  xlab("HorsePower American")+
  ylab("mpg American")
Am_tangga

#c. Natural Cubic Spline 
##Visually

spline1ns = lm(mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)),data = auto_amerika)

Am_spline <- ggplot(auto_amerika,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="magenta") +
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(80, 120, 160, 200)),
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Natural Spline Knot 80, 120, 160, 200")+
  xlab("HorsePower American")+
  ylab("mpg American")
Am_spline

#Perbandingan Model

plot_Amrik <- arrange(lmAm, Am_poly, Am_tangga, Am_spline)

#_______________________________________________________________________________

# 2. Data Eropa
#________________

# Menampilkan data mpg dan horsepower berdasarkan mesin asal Amerika
data_eropa <-subset(Auto,origin == 2)

auto_eropa <- data_eropa[,c("mpg","horsepower")]
# Terdiri dari 68 Observasi
# ScatterPlot
lmEr <- ggplot(auto_eropa, aes(x=horsepower, y=mpg)) +
  geom_point(alpha=0.55, color="coral") + 
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1,
              col = "brown",se = F)+
  labs(title="Linear Model Data eropa")+
  xlab("HorsePower European")+
  ylab("mpg European")
lmEr

#a. Polynomial regression ##2nd degree polynomial regression

# Visually
polinom2 <-  lm( mpg ~ poly(horsepower,2,raw = T),data= auto_eropa)

Er_poly <- ggplot(auto_eropa,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="coral") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,2,raw=T), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi POlinomial Derajat 2")+
  xlab("HorsePower European")+
  ylab("mpg European")
Er_poly

#Visually
tangga2 = lm(mpg ~ cut(horsepower,7),data= auto_eropa)

Er_tangga <- ggplot(auto_eropa,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.7, color="coral") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,7), 
              lty = 1, col = "100",se = F)+
  labs(title="Regresi Fungsi Tangga Knot 6")+
  xlab("HorsePower European")+
  ylab("mpg European")
Er_tangga

#c. Natural Cubic Spline

#nilai knots yang ditentukan oleh komputer
attr(ns(auto_eropa$horsepower, df=5),"knots")
##  20%  40%  60%  80% 
## 67.0 74.8 81.4 95.0
##Visually


spline2ns = lm(mpg ~ ns(horsepower, knots = c(70, 80, 100, 120)),data = auto_eropa)
summary(spline1ns)
## 
## Call:
## lm(formula = mpg ~ ns(horsepower, knots = c(80, 120, 160, 200)), 
##     data = auto_amerika)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4671  -2.1836  -0.3698   2.0508  13.2883 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                     35.039      1.813  19.322
## ns(horsepower, knots = c(80, 120, 160, 200))1  -18.270      1.748 -10.454
## ns(horsepower, knots = c(80, 120, 160, 200))2  -20.386      2.556  -7.975
## ns(horsepower, knots = c(80, 120, 160, 200))3  -20.800      2.173  -9.574
## ns(horsepower, knots = c(80, 120, 160, 200))4  -30.835      4.178  -7.380
## ns(horsepower, knots = c(80, 120, 160, 200))5  -14.921      1.967  -7.586
##                                               Pr(>|t|)    
## (Intercept)                                    < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))1  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))2 6.29e-14 ***
## ns(horsepower, knots = c(80, 120, 160, 200))3  < 2e-16 ***
## ns(horsepower, knots = c(80, 120, 160, 200))4 2.61e-12 ***
## ns(horsepower, knots = c(80, 120, 160, 200))5 7.33e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.819 on 239 degrees of freedom
## Multiple R-squared:  0.6557, Adjusted R-squared:  0.6485 
## F-statistic: 91.02 on 5 and 239 DF,  p-value: < 2.2e-16
Er_ncspline <- ggplot(auto_eropa,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="coral") +
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(70, 80, 100, 120)),
              lty = 1, col = "100",se = F)+
  labs(title="Natural Cubic Spline Knot 70,80,100,120")+
  xlab("HorsePower European")+
  ylab("mpg European")
Er_ncspline

#Perbandingan Model
plot_Eropa <- arrange(lmEr, Er_poly, Er_tangga, Er_ncspline)

#______________________________________________________________________________
# 3. Data Jepang
#________________

# Menampilkan data mpg dan horsepower berdasarkan mesin asal Amerika
data_jepang <-subset(Auto,origin == 3)

auto_jepang <- data_jepang[,c("mpg","horsepower")]

Data auto_jepang terdiri dari 79 data

lmJ <- ggplot(auto_jepang, aes(x=horsepower, y=mpg)) +
  geom_point(alpha=0.55, color="blue") + 
  stat_smooth(method = "lm", 
              formula = y~x,lty = 1,
              col = "brown",se = F)+
  labs(title="Linear Model Data Jepang")+
  xlab("HorsePower Japanese")+
  ylab("mpg Japanese")
lmJ

#a. Polynomial regression ##1st degree polynomial regression

# Visually
polinom3 <-  lm( mpg ~ poly(horsepower,1,raw = T),data= auto_jepang)
summary(polinom3)
## 
## Call:
## lm(formula = mpg ~ poly(horsepower, 1, raw = T), data = auto_jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.112  -2.758  -0.751   2.563  14.249 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   48.8162     2.3555  20.724  < 2e-16 ***
## poly(horsepower, 1, raw = T)  -0.2300     0.0288  -7.986 1.08e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.533 on 77 degrees of freedom
## Multiple R-squared:  0.4531, Adjusted R-squared:  0.446 
## F-statistic: 63.78 on 1 and 77 DF,  p-value: 1.08e-11
J_poly <- ggplot(auto_jepang,aes(x=horsepower, y=mpg)) + 
  geom_point(alpha=0.55, color="blue") +
  stat_smooth(method = "lm", 
              formula = y~poly(x,1,raw=T), 
              lty = 1, col = "orange",se = F)+
  labs(title="Regresi POlinomial Derajat 1")+
  xlab("HorsePower Japanese")+
  ylab("mpg Japanese")
J_poly

#Visually
tangga3 = lm(mpg ~ cut(horsepower,7),data= auto_jepang)
summary(tangga3)
## 
## Call:
## lm(formula = mpg ~ cut(horsepower, 7), data = auto_jepang)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5400 -2.7379 -0.8857  2.3143 11.9143 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    34.7900     1.3659  25.470  < 2e-16 ***
## cut(horsepower, 7)(63.4,74.9]  -0.1043     1.5913  -0.066 0.947929    
## cut(horsepower, 7)(74.9,86.3]  -3.7150     2.0489  -1.813 0.073976 .  
## cut(horsepower, 7)(86.3,97.7]  -9.2500     1.6162  -5.723 2.24e-07 ***
## cut(horsepower, 7)(97.7,109]   -9.5900     2.8434  -3.373 0.001201 ** 
## cut(horsepower, 7)(109,121]   -11.0900     2.8434  -3.900 0.000214 ***
## cut(horsepower, 7)(121,132]    -8.4400     3.3459  -2.523 0.013863 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.32 on 72 degrees of freedom
## Multiple R-squared:  0.5356, Adjusted R-squared:  0.4969 
## F-statistic: 13.84 on 6 and 72 DF,  p-value: 2.148e-10
J_tangga <- ggplot(auto_jepang,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.7, color="blue") +
  stat_smooth(method = "lm", 
              formula = y~cut(x,7), 
              lty = 1, col = "orange",se = F)+
  labs(title="Regresi Fungsi Tangga Knot 8")+
  xlab("HorsePower Japanese")+
  ylab("mpg Japanese")
J_tangga

#c. Natural Cubic Spline

##Visually

spline_ns <-  lm(mpg ~ ns(horsepower, knots = c(70,110)),data = auto_jepang)

J_ncspline <- ggplot(auto_jepang,aes(x=horsepower, y= mpg)) +
  geom_point(alpha=0.55, color="blue") +
  stat_smooth(method = "lm", 
              formula = y~ns(x, knots = c(70,110)),
              lty = 1, col = "orange",se = F)+
  labs(title="Regresi Natural Spline Knot 70, 110")+
  xlab("HorsePower Japanese")+
  ylab("mpg Japanese")
J_ncspline

#Perbandingan all model
plot_Eropa <- arrange(lmJ, J_poly, J_tangga, J_ncspline)