Non-linear Regression: Spline

Kelompok 4 PSD

2022-11-30

library(broom) #untuk merapikan tampilan data
library(glmnet) #metode seleksi LASSO dan RIDGE
library(glmnetUtils) #package tambahan dari glmnet yang memungkinkan syntax glmnet bisa dinput menggunakan object data.frame
library(leaps) #menggunakan fungsi regsubset sebagai metode seleksi peubah dengan bestforward
library(varbvs) #Data
library(ggplot2)
library(tidyverse)
library(readr)
library(caret)
library(repr)
library(MASS) #Data
library(MuMIn) #Model Averaging
library(purrr)
library(rsample)
library(splines)
library(readxl)

Input data

data <- read_excel("D:/Semester 5/NSC/Data Semifinal NSC (All) (1).xlsx", skip=1)
data$X3 <- as.numeric(data$X3)
data$X1 <- as.numeric(data$X1)
data$X2 <- as.numeric(data$X2)
datap <- data[-c(1,2)]
data21 <- data[which(data$Tahun==2021),]; data21 <- data21[-c(1:2)]
data21 <- na.omit(data21)
data21 <- as.data.frame(data21)
str(data21)
## 'data.frame':    514 obs. of  11 variables:
##  $ Y  : num  66.4 69.2 67.4 69.4 67.8 ...
##  $ X1 : num  19 20.4 13.2 13.4 14.4 ...
##  $ X2 : num  9.48 8.68 8.88 9.67 8.21 ...
##  $ X3 : num  7148 8776 8180 8030 8577 ...
##  $ X4 : num  65.3 67.4 64.4 68.2 68.7 ...
##  $ X5 : num  9.99 9.21 8.92 9.89 8.75 ...
##  $ X6 : num  71.6 69.6 62.6 62.7 66.7 ...
##  $ X7 : num  87.4 78.6 79.7 86.7 83.2 ...
##  $ X8 : num  5.71 8.36 6.46 6.43 7.13 ...
##  $ X9 : num  71.2 62.9 60.8 69.6 59.5 ...
##  $ X10: num  1648096 1780419 4345784 3487157 8433526 ...
data21 <- data21[c(1,3,5,11)]

colnames(data21) <- c("IPM","Rata-rata Lama Sekolah","Umur Harapan Hidup","PDRB atas harga konstan")
GGally::ggpairs(data21)

colnames(data21) <- c("Y","X1","X2","X3")
str(data21)
## 'data.frame':    514 obs. of  4 variables:
##  $ Y : num  66.4 69.2 67.4 69.4 67.8 ...
##  $ X1: num  9.48 8.68 8.88 9.67 8.21 ...
##  $ X2: num  65.3 67.4 64.4 68.2 68.7 ...
##  $ X3: num  1648096 1780419 4345784 3487157 8433526 ...

Eksplorasi

ggplot(data21,aes(x=X1, y=Y)) +
                 geom_point(alpha=0.55, color="black")+
      stat_smooth(method = "lm", 
                 formula = y~bs(x, knots = bs(data21$X1, df=6),"knots"), 
                 lty = 1, se=T) + theme_classic()+ labs(y="Indeks Pembangunan Manusia (IPM)", x="Rata-rata Lama Sekolah")

ggplot(data21,aes(x=X2, y=Y)) +
                 geom_point(alpha=0.55, color="black")+
      stat_smooth(method = "lm", 
                 formula = y~bs(x, knots = bs(data21$X2, df=6),"knots"), 
                 lty = 1, se=T) + theme_classic() + labs(y="Indeks Pembangunan Manusia (IPM)", x="Umur Harapan Hidup")

ggplot(data21,aes(x=X3, y=Y)) +
                 geom_point(alpha=0.55, color="black")+
      stat_smooth(method = "lm", 
                 formula = y~bs(x, knots = bs(data21$X3, df=6),"knots"), 
                 lty = 1, se=T)+ theme_classic()+ labs(y="Indeks Pembangunan Manusia (IPM)", x="PDRB atas Dasar harga konstan")

Spline

Spline Rata-rata lama sekolah

mod_spline1 = lm(Y ~ bs(X1, knots=attr(bs(data21$X1, df=6),"knots"))
                     ,data=data21)
summary(mod_spline1)
## 
## Call:
## lm(formula = Y ~ bs(X1, knots = attr(bs(data21$X1, df = 6), "knots")), 
##     data = data21)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0522  -1.8187   0.1209   2.1327   7.7550 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                             36.353      2.489
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))1   11.365      4.604
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))2   29.129      2.373
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))3   32.876      2.587
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))4   36.484      2.586
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))5   48.890      3.025
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))6   47.002      3.235
##                                                       t value Pr(>|t|)    
## (Intercept)                                            14.608   <2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))1   2.469   0.0139 *  
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))2  12.278   <2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))3  12.706   <2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))4  14.110   <2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))5  16.161   <2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))6  14.528   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.986 on 507 degrees of freedom
## Multiple R-squared:  0.7912, Adjusted R-squared:  0.7888 
## F-statistic: 320.3 on 6 and 507 DF,  p-value: < 2.2e-16

Spline Umur Harapan Hidup

mod_spline2 = lm(Y ~ bs(X2, knots=attr(bs(data21$X2, df=6),"knots"))
                 ,data=data21)
summary(mod_spline2)
## 
## Call:
## lm(formula = Y ~ bs(X2, knots = attr(bs(data21$X2, df = 6), "knots")), 
##     data = data21)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.1574  -2.2946   0.2324   3.0045  12.9395 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                             33.596      4.056
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))1   38.090      6.553
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))2   23.486      3.880
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))3   37.763      4.224
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))4   40.459      4.126
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))5   45.070      4.699
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))6   44.500      4.510
##                                                       t value Pr(>|t|)    
## (Intercept)                                             8.284 1.07e-15 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))1   5.812 1.09e-08 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))2   6.053 2.77e-09 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))3   8.940  < 2e-16 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))4   9.805  < 2e-16 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))5   9.592  < 2e-16 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))6   9.866  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.498 on 507 degrees of freedom
## Multiple R-squared:  0.5264, Adjusted R-squared:  0.5208 
## F-statistic: 93.91 on 6 and 507 DF,  p-value: < 2.2e-16

Spline PDRB atas Dasar Harga Konstan

mod_spline3 = lm(Y ~ bs(X3, knots=attr(bs(data21$X3, df=6),"knots"))
                     ,data=data21)
summary(mod_spline3)
## 
## Call:
## lm(formula = Y ~ bs(X3, knots = attr(bs(data21$X3, df = 6), "knots")), 
##     data = data21)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.0126  -3.1441  -0.4077   2.7474  16.2074 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                             48.794      1.958
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))1   16.711      2.782
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))2   22.061      1.893
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))3   22.228      2.068
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))4   33.253      3.680
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))5   29.557      7.061
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))6   34.925      4.207
##                                                       t value Pr(>|t|)    
## (Intercept)                                            24.914  < 2e-16 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))1   6.006 3.62e-09 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))2  11.652  < 2e-16 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))3  10.751  < 2e-16 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))4   9.035  < 2e-16 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))5   4.186 3.35e-05 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))6   8.302 9.38e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.873 on 507 degrees of freedom
## Multiple R-squared:  0.444,  Adjusted R-squared:  0.4374 
## F-statistic: 67.49 on 6 and 507 DF,  p-value: < 2.2e-16

Spline Rata-rata lama sekolah, Umur Harapan Hidup, dan PDRB atas Dasar Harga Konstan

mod_spline123 = lm(Y ~ bs(X1, knots=attr(bs(data21$X1, df=6),"knots"))+
                       bs(X2, knots=attr(bs(data21$X2, df=6),"knots"))+
                       bs(X3, knots=attr(bs(data21$X3, df=6),"knots"))
                     ,data=data21)
summary(mod_spline123)
## 
## Call:
## lm(formula = Y ~ bs(X1, knots = attr(bs(data21$X1, df = 6), "knots")) + 
##     bs(X2, knots = attr(bs(data21$X2, df = 6), "knots")) + bs(X3, 
##     knots = attr(bs(data21$X3, df = 6), "knots")), data = data21)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5191 -0.9903 -0.0339  1.0765  4.9299 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                            31.3889     1.5344
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))1  13.0087     3.2503
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))2  19.8961     1.6866
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))3  25.4753     1.9504
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))4  28.8773     1.8735
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))5  36.0650     2.1424
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))6  38.0093     2.1548
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))1   2.6330     3.1604
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))2   5.2988     1.8043
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))3   8.1963     2.1138
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))4   9.3846     1.9986
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))5  12.0766     2.2395
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))6  13.1677     2.1393
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))1   2.1091     0.9720
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))2   4.7923     0.7286
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))3   5.8150     0.7787
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))4   5.5232     1.2816
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))5   8.2573     2.3058
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))6   6.9560     1.4398
##                                                       t value Pr(>|t|)    
## (Intercept)                                            20.456  < 2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))1   4.002 7.23e-05 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))2  11.797  < 2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))3  13.062  < 2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))4  15.414  < 2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))5  16.834  < 2e-16 ***
## bs(X1, knots = attr(bs(data21$X1, df = 6), "knots"))6  17.639  < 2e-16 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))1   0.833 0.405192    
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))2   2.937 0.003471 ** 
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))3   3.878 0.000120 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))4   4.696 3.45e-06 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))5   5.392 1.08e-07 ***
## bs(X2, knots = attr(bs(data21$X2, df = 6), "knots"))6   6.155 1.55e-09 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))1   2.170 0.030498 *  
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))2   6.577 1.22e-10 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))3   7.468 3.71e-13 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))4   4.310 1.97e-05 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))5   3.581 0.000376 ***
## bs(X3, knots = attr(bs(data21$X3, df = 6), "knots"))6   4.831 1.81e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.562 on 495 degrees of freedom
## Multiple R-squared:  0.9442, Adjusted R-squared:  0.9422 
## F-statistic: 465.6 on 18 and 495 DF,  p-value: < 2.2e-16
sc_data21 <- data.frame(scale(data21))
sc_mod_spline123 = lm(Y ~ bs(X1, knots=attr(bs(sc_data21$X1, df=6),"knots"))+
                       bs(X2, knots=attr(bs(sc_data21$X2, df=6),"knots"))+
                       bs(X3, knots=attr(bs(sc_data21$X3, df=6),"knots"))
                     ,data=sc_data21)
summary(sc_mod_spline123)
## 
## Call:
## lm(formula = Y ~ bs(X1, knots = attr(bs(sc_data21$X1, df = 6), 
##     "knots")) + bs(X2, knots = attr(bs(sc_data21$X2, df = 6), 
##     "knots")) + bs(X3, knots = attr(bs(sc_data21$X3, df = 6), 
##     "knots")), data = sc_data21)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69556 -0.15242 -0.00521  0.16570  0.75879 
## 
## Coefficients:
##                                                          Estimate Std. Error
## (Intercept)                                               -5.9316     0.2362
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))1   2.0022     0.5003
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))2   3.0623     0.2596
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))3   3.9211     0.3002
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))4   4.4447     0.2884
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))5   5.5510     0.3297
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))6   5.8503     0.3317
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))1   0.4053     0.4864
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))2   0.8156     0.2777
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))3   1.2615     0.3253
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))4   1.4444     0.3076
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))5   1.8588     0.3447
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))6   2.0267     0.3293
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))1   0.3246     0.1496
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))2   0.7376     0.1121
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))3   0.8950     0.1199
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))4   0.8501     0.1973
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))5   1.2709     0.3549
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))6   1.0706     0.2216
##                                                          t value Pr(>|t|)    
## (Intercept)                                              -25.115  < 2e-16 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))1   4.002 7.23e-05 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))2  11.797  < 2e-16 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))3  13.062  < 2e-16 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))4  15.414  < 2e-16 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))5  16.834  < 2e-16 ***
## bs(X1, knots = attr(bs(sc_data21$X1, df = 6), "knots"))6  17.639  < 2e-16 ***
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))1   0.833 0.405192    
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))2   2.937 0.003471 ** 
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))3   3.878 0.000120 ***
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))4   4.696 3.45e-06 ***
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))5   5.392 1.08e-07 ***
## bs(X2, knots = attr(bs(sc_data21$X2, df = 6), "knots"))6   6.155 1.55e-09 ***
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))1   2.170 0.030498 *  
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))2   6.577 1.22e-10 ***
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))3   7.468 3.71e-13 ***
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))4   4.310 1.97e-05 ***
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))5   3.581 0.000376 ***
## bs(X3, knots = attr(bs(sc_data21$X3, df = 6), "knots"))6   4.831 1.81e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2404 on 495 degrees of freedom
## Multiple R-squared:  0.9442, Adjusted R-squared:  0.9422 
## F-statistic: 465.6 on 18 and 495 DF,  p-value: < 2.2e-16