library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(splines)
# Dataset
df_airquality <- datasets::airquality
# Cek apakah ada missing value
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0

Perbandingan Variabel Ozone dengan Temp

# Ganti NA pada kolom Ozone dengan median
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)

# Ganti NA pada kolom Temp dengan median
airquality$Temp[is.na(airquality$Temp)] <- median(airquality$Temp, na.rm = TRUE)

# Cek lagi apakah masih ada missing value
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##       0       7       0       0       0       0
ggplot(df_airquality,aes(x=Ozone, y=Temp)) +
                 geom_point(alpha=0.55, color="purple") + 
                 theme_bw() 
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Regresi linear

mod_linear = lm(Temp~Ozone,data=df_airquality)
summary(mod_linear)
## 
## Call:
## lm(formula = Temp ~ Ozone, data = df_airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.147  -4.858   1.828   4.342  12.328 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 69.41072    1.02971   67.41   <2e-16 ***
## Ozone        0.20081    0.01928   10.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.819 on 114 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.4877, Adjusted R-squared:  0.4832 
## F-statistic: 108.5 on 1 and 114 DF,  p-value: < 2.2e-16

Model regresi linear sederhana: Temp = β0 + β1*Ozone

ggplot(df_airquality,aes(x=Ozone, y=Temp)) +
                 geom_point(alpha=0.55, color="red") +
   stat_smooth(method = "lm", 
               formula = y~x,lty = 1, col = "lightgreen",se = F)+
  theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Dari grafik terlihat bahwa garis regresi linear cukup mewakili pola di tengah data. Namun, pada bagian ekor (nilai Ozone sangat rendah atau sangat tinggi), garis tidak pas mengikuti titik-titik data → ini menunjukkan adanya kemungkinan hubungan non-linear yang lebih kompleks.

Regresi Fungsi Tangga

mod_tangga = lm(Temp ~ cut(Ozone,5),data=df_airquality)
summary(mod_tangga)
## 
## Call:
## lm(formula = Temp ~ cut(Ozone, 5), data = df_airquality)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6935  -3.6935   0.6015   4.3065  12.3065 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               71.6935     0.7866  91.139  < 2e-16 ***
## cut(Ozone, 5)(34.4,67.8]   9.5478     1.3935   6.852 4.28e-10 ***
## cut(Ozone, 5)(67.8,101]   18.8620     1.6584  11.374  < 2e-16 ***
## cut(Ozone, 5)(101,135]    15.7065     2.8796   5.454 3.01e-07 ***
## cut(Ozone, 5)(135,168]    10.8065     4.4499   2.428   0.0168 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.194 on 111 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.5884, Adjusted R-squared:  0.5736 
## F-statistic: 39.67 on 4 and 111 DF,  p-value: < 2.2e-16

Model step function (cut(Ozone, 5)) membagi kadar Ozone menjadi 5 interval

ggplot(df_airquality,aes(x=Ozone, y=Temp)) +
                 geom_point(alpha=0.55, color="brown") +
  stat_smooth(method = "lm", 
               formula = y~cut(x,5), 
               lty = 1, col = "lightblue",se = F)+
  theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Regresi Spline

mod_spline3 = lm(Temp ~ bs(Ozone, knots = c(5, 10, 20, 30, 40)),data=df_airquality)
summary(mod_spline3)
## 
## Call:
## lm(formula = Temp ~ bs(Ozone, knots = c(5, 10, 20, 30, 40)), 
##     data = df_airquality)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6683  -3.6982   0.5521   4.0959  10.9132 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                59.035      6.003   9.834  < 2e-16
## bs(Ozone, knots = c(5, 10, 20, 30, 40))1  -13.769     14.721  -0.935  0.35172
## bs(Ozone, knots = c(5, 10, 20, 30, 40))2   14.367      7.941   1.809  0.07323
## bs(Ozone, knots = c(5, 10, 20, 30, 40))3    9.897      6.963   1.422  0.15807
## bs(Ozone, knots = c(5, 10, 20, 30, 40))4   13.948      6.608   2.111  0.03711
## bs(Ozone, knots = c(5, 10, 20, 30, 40))5   15.439      6.299   2.451  0.01586
## bs(Ozone, knots = c(5, 10, 20, 30, 40))6   38.311      7.923   4.836 4.47e-06
## bs(Ozone, knots = c(5, 10, 20, 30, 40))7   30.545      9.704   3.148  0.00213
## bs(Ozone, knots = c(5, 10, 20, 30, 40))8   20.788      8.367   2.485  0.01452
##                                             
## (Intercept)                              ***
## bs(Ozone, knots = c(5, 10, 20, 30, 40))1    
## bs(Ozone, knots = c(5, 10, 20, 30, 40))2 .  
## bs(Ozone, knots = c(5, 10, 20, 30, 40))3    
## bs(Ozone, knots = c(5, 10, 20, 30, 40))4 *  
## bs(Ozone, knots = c(5, 10, 20, 30, 40))5 *  
## bs(Ozone, knots = c(5, 10, 20, 30, 40))6 ***
## bs(Ozone, knots = c(5, 10, 20, 30, 40))7 ** 
## bs(Ozone, knots = c(5, 10, 20, 30, 40))8 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.003 on 107 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.6273, Adjusted R-squared:  0.5994 
## F-statistic: 22.51 on 8 and 107 DF,  p-value: < 2.2e-16
mod_spline3ns = lm(Temp ~ ns(Ozone, knots = c(5, 10, 20, 30, 40)),data=df_airquality)
summary(mod_spline3ns)
## 
## Call:
## lm(formula = Temp ~ ns(Ozone, knots = c(5, 10, 20, 30, 40)), 
##     data = df_airquality)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6502  -4.0877   0.7038   4.1132  10.8977 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                56.900      5.730   9.930  < 2e-16
## ns(Ozone, knots = c(5, 10, 20, 30, 40))1   13.549      6.242   2.171 0.032116
## ns(Ozone, knots = c(5, 10, 20, 30, 40))2   15.506      6.451   2.404 0.017913
## ns(Ozone, knots = c(5, 10, 20, 30, 40))3   17.585      5.931   2.965 0.003720
## ns(Ozone, knots = c(5, 10, 20, 30, 40))4   35.608      4.408   8.078 9.67e-13
## ns(Ozone, knots = c(5, 10, 20, 30, 40))5   42.658     12.076   3.532 0.000605
## ns(Ozone, knots = c(5, 10, 20, 30, 40))6   17.806      5.660   3.146 0.002134
##                                             
## (Intercept)                              ***
## ns(Ozone, knots = c(5, 10, 20, 30, 40))1 *  
## ns(Ozone, knots = c(5, 10, 20, 30, 40))2 *  
## ns(Ozone, knots = c(5, 10, 20, 30, 40))3 ** 
## ns(Ozone, knots = c(5, 10, 20, 30, 40))4 ***
## ns(Ozone, knots = c(5, 10, 20, 30, 40))5 ***
## ns(Ozone, knots = c(5, 10, 20, 30, 40))6 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.99 on 109 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.6221, Adjusted R-squared:  0.6013 
## F-statistic:  29.9 on 6 and 109 DF,  p-value: < 2.2e-16
ggplot(df_airquality,aes(x=Ozone, y=Temp)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~bs(x, knots = c(5, 10, 20, 30, 40)), 
               lty = 1, aes(col = "Cubic Spline"),se = F)+
    stat_smooth(method = "lm", 
               formula = y~ns(x, knots = c(5, 10, 20, 30, 40)), 
               lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="magenta","Cubic Spline"="green"))+theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Komparasi Model

# Fungsi MSE
MSE <- function(pred, actual) {
  mean((pred - actual)^2, na.rm = TRUE)
}

# Buat prediksi dari masing-masing model
pred_linear  <- predict(mod_linear)
pred_tangga  <- predict(mod_tangga)
pred_spline  <- predict(mod_spline3)
pred_nspline <- predict(mod_spline3ns)
compare_stats <- data.frame(
  Model   = c("Linear","Tangga","Spline","Natural Spline"),
  MSE     = c(MSE(pred_linear, df_airquality$Ozone),
              MSE(pred_tangga, df_airquality$Ozone),
              MSE(pred_spline, df_airquality$Ozone),
              MSE(pred_nspline, df_airquality$Ozone)),
  AIC     = c(AIC(mod_linear),
              AIC(mod_tangga),
              AIC(mod_spline3),
              AIC(mod_spline3ns)),
  Adj_R2  = c(summary(mod_linear)$adj.r.squared,
              summary(mod_tangga)$adj.r.squared,
              summary(mod_spline3)$adj.r.squared,
              summary(mod_spline3ns)$adj.r.squared)
)
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
compare_stats
##            Model      MSE      AIC    Adj_R2
## 1         Linear 2259.606 778.5467 0.4832134
## 2         Tangga 2230.637 759.1549 0.5735894
## 3         Spline 2191.380 755.6463 0.5994285
## 4 Natural Spline 2202.062 753.2652 0.6012520

Insight :

Kesimpulan: Model linear terlalu sederhana karena hanya menjelaskan 48% variasi data. Model tangga paling akurat (Adj R² ~92%, MSE terkecil) tapi cenderung terlalu rumit. Spline dan natural spline lebih seimbang. Natural spline paling direkomendasikan karena fleksibel, error cukup kecil, dan tidak terlalu kompleks.

Perbandingan Variabel Ozone dengan Wind

# Ganti NA pada kolom Ozone dengan median
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)

# Ganti NA pada kolom Temp dengan median
airquality$Wind[is.na(airquality$Wind)] <- median(airquality$Wind, na.rm = TRUE)

# Cek lagi apakah masih ada missing value
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##       0       7       0       0       0       0
ggplot(df_airquality,aes(x=Ozone, y=Wind)) +
                 geom_point(alpha=0.55, color="darkblue") + 
                 theme_bw() 
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Grafik menunjukkan Wind cenderung menurun saat Ozone meningkat, dengan pola melengkung. Artinya hubungan tidak sepenuhnya linear, sehingga model non-linear lebih sesuai.

Regresi linear

mod_linear = lm(Wind~Ozone,data=df_airquality)
summary(mod_linear)
## 
## Call:
## lm(formula = Wind ~ Ozone, data = df_airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2521 -2.2792 -0.2376  1.7474 10.5036 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.608430   0.433126   29.11  < 2e-16 ***
## Ozone       -0.065189   0.008108   -8.04 9.27e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.868 on 114 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.3619, Adjusted R-squared:  0.3563 
## F-statistic: 64.64 on 1 and 114 DF,  p-value: 9.272e-13

Model regresi linear sederhana: Wind = β0 + β1*Ozone

ggplot(df_airquality,aes(x=Ozone, y=Wind)) +
                 geom_point(alpha=0.55, color="brown") +
   stat_smooth(method = "lm", 
               formula = y~x,lty = 1, col = "lightgreen",se = F)+
  theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Regresi Fungsi Tangga

mod_tangga = lm(Wind ~ cut(Ozone,5),data=df_airquality)
summary(mod_tangga)
## 
## Call:
## lm(formula = Wind ~ cut(Ozone, 5), data = df_airquality)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.163 -1.962 -0.256  1.837 11.338 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               11.4629     0.3797  30.189  < 2e-16 ***
## cut(Ozone, 5)(34.4,67.8]  -2.1008     0.6726  -3.123 0.002281 ** 
## cut(Ozone, 5)(67.8,101]   -4.4462     0.8005  -5.554 1.93e-07 ***
## cut(Ozone, 5)(101,135]    -5.8629     1.3900  -4.218 5.05e-05 ***
## cut(Ozone, 5)(135,168]    -7.7129     2.1480  -3.591 0.000492 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.99 on 111 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.3248, Adjusted R-squared:  0.3005 
## F-statistic: 13.35 on 4 and 111 DF,  p-value: 6.476e-09
ggplot(df_airquality,aes(x=Ozone, y=Wind)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~cut(x,5), 
               lty = 1, col = "pink",se = F)+
  theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Intinya: ada indikasi bahwa kenaikan ozon berkaitan dengan penurunan kecepatan angin.

Regresi Spline

mod_spline3 = lm(Wind ~ bs(Ozone, knots = c(5, 10, 20, 30, 40)),data=df_airquality)
summary(mod_spline3)
## 
## Call:
## lm(formula = Wind ~ bs(Ozone, knots = c(5, 10, 20, 30, 40)), 
##     data = df_airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9514 -2.0566 -0.1276  1.1590 10.1440 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                9.6924     2.8862   3.358  0.00109
## bs(Ozone, knots = c(5, 10, 20, 30, 40))1  -4.5888     7.0775  -0.648  0.51814
## bs(Ozone, knots = c(5, 10, 20, 30, 40))2   6.2557     3.8179   1.639  0.10425
## bs(Ozone, knots = c(5, 10, 20, 30, 40))3   1.5135     3.3474   0.452  0.65208
## bs(Ozone, knots = c(5, 10, 20, 30, 40))4   0.8886     3.1769   0.280  0.78023
## bs(Ozone, knots = c(5, 10, 20, 30, 40))5   1.6743     3.0283   0.553  0.58149
## bs(Ozone, knots = c(5, 10, 20, 30, 40))6  -4.0437     3.8090  -1.062  0.29081
## bs(Ozone, knots = c(5, 10, 20, 30, 40))7  -4.2013     4.6654  -0.901  0.36986
## bs(Ozone, knots = c(5, 10, 20, 30, 40))8  -6.6076     4.0226  -1.643  0.10340
##                                            
## (Intercept)                              **
## bs(Ozone, knots = c(5, 10, 20, 30, 40))1   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))2   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))3   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))4   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))5   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))6   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))7   
## bs(Ozone, knots = c(5, 10, 20, 30, 40))8   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.886 on 107 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.3935, Adjusted R-squared:  0.3481 
## F-statistic: 8.677 on 8 and 107 DF,  p-value: 4.556e-09
mod_spline3ns = lm(Wind ~ ns(Ozone, knots = c(5, 10, 20, 30, 40)),data=df_airquality)
summary(mod_spline3ns)
## 
## Call:
## lm(formula = Wind ~ ns(Ozone, knots = c(5, 10, 20, 30, 40)), 
##     data = df_airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6041 -2.1131 -0.2267  1.2119 10.1370 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                8.9246     2.7468   3.249  0.00154
## ns(Ozone, knots = c(5, 10, 20, 30, 40))1   2.8025     2.9921   0.937  0.35103
## ns(Ozone, knots = c(5, 10, 20, 30, 40))2   1.4868     3.0924   0.481  0.63162
## ns(Ozone, knots = c(5, 10, 20, 30, 40))3   2.3873     2.8432   0.840  0.40294
## ns(Ozone, knots = c(5, 10, 20, 30, 40))4  -5.2073     2.1133  -2.464  0.01530
## ns(Ozone, knots = c(5, 10, 20, 30, 40))5   0.6487     5.7891   0.112  0.91099
## ns(Ozone, knots = c(5, 10, 20, 30, 40))6  -7.9496     2.7133  -2.930  0.00413
##                                            
## (Intercept)                              **
## ns(Ozone, knots = c(5, 10, 20, 30, 40))1   
## ns(Ozone, knots = c(5, 10, 20, 30, 40))2   
## ns(Ozone, knots = c(5, 10, 20, 30, 40))3   
## ns(Ozone, knots = c(5, 10, 20, 30, 40))4 * 
## ns(Ozone, knots = c(5, 10, 20, 30, 40))5   
## ns(Ozone, knots = c(5, 10, 20, 30, 40))6 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.871 on 109 degrees of freedom
##   (37 observations deleted due to missingness)
## Multiple R-squared:  0.3885, Adjusted R-squared:  0.3548 
## F-statistic: 11.54 on 6 and 109 DF,  p-value: 5.72e-10

-Residual SE = 2.871, cukup mirip dengan model B-spline sebelumnya.

ggplot(df_airquality,aes(x=Ozone, y=Temp)) +
                 geom_point(alpha=0.55, color="black") +
  stat_smooth(method = "lm", 
               formula = y~bs(x, knots = c(5, 10, 20, 30, 40)), 
               lty = 1, aes(col = "Cubic Spline"),se = F)+
    stat_smooth(method = "lm", 
               formula = y~ns(x, knots = c(5, 10, 20, 30, 40)), 
               lty = 1, aes(col = "Natural Cubic Spline"),se = F)+labs(color="Tipe Spline")+
  scale_color_manual(values = c("Natural Cubic Spline"="orange","Cubic Spline"="green"))+theme_bw()
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 37 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

Natural spline lebih stabil, cubic spline lebih fleksibel.

Komparasi Model

# Fungsi MSE
MSE <- function(pred, actual) {
  mean((pred - actual)^2, na.rm = TRUE)
}

# Buat prediksi dari masing-masing model
pred_linear  <- predict(mod_linear)
pred_tangga  <- predict(mod_tangga)
pred_spline  <- predict(mod_spline3)
pred_nspline <- predict(mod_spline3ns)
compare_stats <- data.frame(
  Model   = c("Linear","Tangga","Spline","Natural Spline"),
  MSE     = c(MSE(pred_linear, df_airquality$Ozone),
              MSE(pred_tangga, df_airquality$Ozone),
              MSE(pred_spline, df_airquality$Ozone),
              MSE(pred_nspline, df_airquality$Ozone)),
  AIC     = c(AIC(mod_linear),
              AIC(mod_tangga),
              AIC(mod_spline3),
              AIC(mod_spline3ns)),
  Adj_R2  = c(summary(mod_linear)$adj.r.squared,
              summary(mod_tangga)$adj.r.squared,
              summary(mod_spline3)$adj.r.squared,
              summary(mod_spline3ns)$adj.r.squared)
)
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
## Warning in pred - actual: longer object length is not a multiple of shorter
## object length
compare_stats
##            Model      MSE      AIC    Adj_R2
## 1         Linear 2111.593 577.6334 0.3562605
## 2         Tangga 2116.099 590.1736 0.3005156
## 3         Spline 2104.394 585.7362 0.3481423
## 4 Natural Spline 2108.308 582.6867 0.3548380

Insight:

Kesimpulan : kalau tujuannya prediksi & interpretasi sederhana, pilih linear. Tapi jika ingin menangkap pola non-linear dengan lebih halus, natural spline lebih disarankan daripada spline biasa atau tangga.