# Dataset
data("airquality")
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
airquality$Solar.R[is.na(airquality$Solar.R)] <- median(airquality$Solar.R, na.rm = TRUE)
df_airquality <- airquality
# Fungsi MSE
MSE <- function(pred, actual) mean((pred - actual)^2, na.rm = TRUE)
# Model linear
mod_linear_T <- lm(Ozone ~ Temp, data=df_airquality)
# Model tangga
df_airquality$Temp_cut <- cut(df_airquality$Temp, breaks=5)
mod_tangga_T <- lm(Ozone ~ Temp_cut, data=df_airquality)
# Model spline cubic
mod_spline_T <- lm(Ozone ~ bs(Temp, knots=quantile(Temp, probs=c(0.25,0.5,0.75))), data=df_airquality)
# Model natural spline
mod_nspline_T <- lm(Ozone ~ ns(Temp, knots=quantile(Temp, probs=c(0.25,0.5,0.75))), data=df_airquality)
# Perbandingan
compare_T <- data.frame(
Model = c("Linear","Tangga","Spline","Natural Spline"),
MSE = c(MSE(predict(mod_linear_T), df_airquality$Ozone),
MSE(predict(mod_tangga_T), df_airquality$Ozone),
MSE(predict(mod_spline_T), df_airquality$Ozone),
MSE(predict(mod_nspline_T), df_airquality$Ozone)),
AIC = c(AIC(mod_linear_T), AIC(mod_tangga_T), AIC(mod_spline_T), AIC(mod_nspline_T)),
Adj_R2= c(summary(mod_linear_T)$adj.r.squared,
summary(mod_tangga_T)$adj.r.squared,
summary(mod_spline_T)$adj.r.squared,
summary(mod_nspline_T)$adj.r.squared)
)
compare_T
## Model MSE AIC Adj_R2
## 1 Linear 535.8932 1401.637 0.3567682
## 2 Tangga 493.3074 1394.968 0.3958816
## 3 Spline 471.8250 1392.156 0.4142743
## 4 Natural Spline 472.7660 1388.461 0.4210372
Model terbaik untuk Ozone vs Temp adalah Natural Spline, karena memiliki Adj R² tertinggi (0.42) dan AIC terendah, meskipun MSE sedikit lebih tinggi dibanding spline biasa.
# Model linear
mod_linear_W <- lm(Ozone ~ Wind, data=df_airquality)
# Model tangga
df_airquality$Wind_cut <- cut(df_airquality$Wind, breaks=5)
mod_tangga_W <- lm(Ozone ~ Wind_cut, data=df_airquality)
# Model spline cubic
mod_spline_W <- lm(Ozone ~ bs(Wind, knots=quantile(Wind, probs=c(0.25,0.5,0.75))), data=df_airquality)
# Model natural spline
mod_nspline_W <- lm(Ozone ~ ns(Wind, knots=quantile(Wind, probs=c(0.25,0.5,0.75))), data=df_airquality)
# Perbandingan
compare_W <- data.frame(
Model = c("Linear","Tangga","Spline","Natural Spline"),
MSE = c(MSE(predict(mod_linear_W), df_airquality$Ozone),
MSE(predict(mod_tangga_W), df_airquality$Ozone),
MSE(predict(mod_spline_W), df_airquality$Ozone),
MSE(predict(mod_nspline_W), df_airquality$Ozone)),
AIC = c(AIC(mod_linear_W), AIC(mod_tangga_W), AIC(mod_spline_W), AIC(mod_nspline_W)),
Adj_R2= c(summary(mod_linear_W)$adj.r.squared,
summary(mod_tangga_W)$adj.r.squared,
summary(mod_spline_W)$adj.r.squared,
summary(mod_nspline_W)$adj.r.squared)
)
compare_W
## Model MSE AIC Adj_R2
## 1 Linear 601.3750 1419.276 0.2781705
## 2 Tangga 545.7385 1410.423 0.3316730
## 3 Spline 480.4891 1394.940 0.4035187
## 4 Natural Spline 513.2467 1401.031 0.3714633
#Interpretasi: Model terbaik untuk Ozone vs Wind adalah Spline, karena memberikan MSE terendah (480.5) dan Adj R² tertinggi (0.40) dibandingkan model lain.
Dari MSE Hubungan Ozone–Temp: model Spline (471.8) paling rendah = akurat. Hubungan Ozone–Wind: model Spline (480.5) paling rendah = akurat.
Dari AIC Ozone–Temp: Natural Spline (1388.5) terbaik (paling kecil). Ozone–Wind: Spline (1394.9) terbaik (paling kecil).
Dari Adjusted R² Ozone–Temp: Natural Spline (0.421) tertinggi, model paling mampu menjelaskan variasi. Ozone–Wind: Spline (0.404) tertinggi, lebih baik dibanding natural spline.
-Hubungan Ozone–Temp relatif lebih kuat dibanding Ozone–Wind. Terlihat dari nilai Adjusted R² yang lebih tinggi (0.42 vs 0.40). Artinya, suhu (Temp) lebih berpengaruh terhadap variasi Ozone dibanding kecepatan angin (Wind).
-Spline dan Natural Spline konsisten memberikan hasil lebih baik daripada model linear sederhana, karena hubungan sebenarnya tidak sepenuhnya linear, sehingga model fleksibel lebih cocok.
-Pada kasus Temp, Natural Spline unggul karena bisa menangkap pola non-linear dengan kompleksitas optimal.
-Pada kasus Wind, Spline biasa lebih stabil dibanding natural spline, kemungkinan karena distribusi Wind lebih “rata” sehingga natural spline kurang efisien.
Secara praktis, pemilihan model tergantung tujuan: