# Load libraries
library(ggplot2)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.9-3. For overview type 'help("mgcv-package")'.
library(kableExtra)
library(nortest)
# Membaca data
data <- readxl::read_excel("data fix new.xlsx", sheet = "Sheet1")
colnames(data) <- c("y", "x1", "x2", "x3", "x4")
# 1. Statistika Deskriptif
cat("1. STATISTIKA DESKRIPTIF\n")
## 1. STATISTIKA DESKRIPTIF
desc_stats <- data.frame(
Variabel = colnames(data),
Mean = sapply(data, mean),
Median = sapply(data, median),
SD = sapply(data, sd),
Min = sapply(data, min),
Max = sapply(data, max)
)
print(kable(desc_stats, format = "simple", digits = 4))
##
##
## Variabel Mean Median SD Min Max
## --- --------- -------- -------- -------- -------- ---------
## y y 0.7353 0.7515 0.3471 0.1043 2.4040
## x1 x1 0.7353 0.7274 0.0732 0.5469 0.8974
## x2 x2 56.0294 46.6667 20.0227 33.3333 96.6667
## x3 x3 0.7300 0.7222 0.0907 0.7222 1.7803
## x4 x4 69.0508 81.8182 26.6570 27.2727 100.0000
# 2. Scatter plot untuk variabel respon dengan setiap predictor
cat("\n2. SCATTER PLOT VARIABEL RESPON DENGAN PREDIKTOR\n")
##
## 2. SCATTER PLOT VARIABEL RESPON DENGAN PREDIKTOR
# Fungsi untuk membuat scatter plot
create_scatter <- function(predictor) {
ggplot(data, aes(x = .data[[predictor]], y = y)) +
geom_point() +
geom_smooth(method = "loess", se = FALSE, color = "red") +
labs(title = paste("Scatter Plot y vs", predictor),
x = predictor, y = "y") +
theme_minimal()
}
# Buat scatter plot untuk setiap predictor
scatter_plots <- lapply(colnames(data)[-1], create_scatter)
print(scatter_plots)
## [[1]]
## `geom_smooth()` using formula = 'y ~ x'

##
## [[2]]
## `geom_smooth()` using formula = 'y ~ x'

##
## [[3]]
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 0.71691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 2.799e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 0.71691
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.0052905
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1.1308
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group -1.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)

##
## [[4]]
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 100.36
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 57.939
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 2.9246e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1551.9

# 3. Regresi Nonparametrik Spline Linear dengan 1 titik knot
cat("\n3. REGRESI NONPARAMETRIK SPLINE LINEAR 1 TITIK KNOT\n")
##
## 3. REGRESI NONPARAMETRIK SPLINE LINEAR 1 TITIK KNOT
# Pilih predictor yang paling berkorelasi dengan y
correlations <- sapply(data[-1], function(x) cor(x, data$y))
best_predictor <- names(which.max(abs(correlations)))
cat("Predictor terpilih:", best_predictor, "\n")
## Predictor terpilih: x2
# Fungsi untuk spline linear dengan knot tertentu
linear_spline <- function(x, knots) {
basis <- matrix(x, ncol = 1)
for (k in knots) {
basis <- cbind(basis, pmax(0, x - k))
}
return(basis)
}
# 1 knot pada median
knot1 <- median(data[[best_predictor]])
X_spline1 <- linear_spline(data[[best_predictor]], knot1)
model_spline1 <- lm(y ~ X_spline1, data = data)
# Ringkasan model
cat("Model Spline Linear dengan 1 Knot (pada median):\n")
## Model Spline Linear dengan 1 Knot (pada median):
print(summary(model_spline1))
##
## Call:
## lm(formula = y ~ X_spline1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64340 -0.19160 0.00166 0.14187 1.67251
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.180461 0.251574 -0.717 0.47443
## X_spline11 0.019541 0.005879 3.324 0.00115 **
## X_spline12 -0.014676 0.006730 -2.181 0.03097 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3138 on 133 degrees of freedom
## Multiple R-squared: 0.1948, Adjusted R-squared: 0.1827
## F-statistic: 16.09 on 2 and 133 DF, p-value: 5.528e-07
# Plot hasil
plot_data1 <- data.frame(x = data[[best_predictor]], y = data$y)
plot_data1$predicted <- predict(model_spline1)
ggplot(plot_data1, aes(x = x)) +
geom_point(aes(y = y)) +
geom_line(aes(y = predicted), color = "blue", size = 1) +
geom_vline(xintercept = knot1, linetype = "dashed", color = "red") +
labs(title = "Spline Linear dengan 1 Knot",
x = best_predictor, y = "y") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 4. Regresi Nonparametrik Spline Linear dengan 2 titik knot
cat("\n4. REGRESI NONPARAMETRIK SPLINE LINEAR 2 TITIK KNOT\n")
##
## 4. REGRESI NONPARAMETRIK SPLINE LINEAR 2 TITIK KNOT
# 2 knots pada kuartil 1 dan 3
knots2 <- quantile(data[[best_predictor]], probs = c(0.33, 0.67))
X_spline2 <- linear_spline(data[[best_predictor]], knots2)
model_spline2 <- lm(y ~ X_spline2, data = data)
# Ringkasan model
cat("Model Spline Linear dengan 2 Knots (pada kuartil 1 dan 3):\n")
## Model Spline Linear dengan 2 Knots (pada kuartil 1 dan 3):
print(summary(model_spline2))
##
## Call:
## lm(formula = y ~ X_spline2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64870 -0.18188 -0.01642 0.10855 1.58947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.431202 0.259128 -1.664 0.098475 .
## X_spline21 0.026694 0.006214 4.296 3.35e-05 ***
## X_spline22 -0.092345 0.027245 -3.389 0.000924 ***
## X_spline23 0.075021 0.025545 2.937 0.003914 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3052 on 132 degrees of freedom
## Multiple R-squared: 0.2442, Adjusted R-squared: 0.227
## F-statistic: 14.22 on 3 and 132 DF, p-value: 4.402e-08
# Plot hasil
plot_data2 <- data.frame(x = data[[best_predictor]], y = data$y)
plot_data2$predicted <- predict(model_spline2)
ggplot(plot_data2, aes(x = x)) +
geom_point(aes(y = y)) +
geom_line(aes(y = predicted), color = "blue", size = 1) +
geom_vline(xintercept = knots2, linetype = "dashed", color = "red") +
labs(title = "Spline Linear dengan 2 Knots",
x = best_predictor, y = "y") +
theme_minimal()

# 5. Regresi Nonparametrik Spline Linear dengan 3 titik knot
cat("\n5. REGRESI NONPARAMETRIK SPLINE LINEAR 3 TITIK KNOT\n")
##
## 5. REGRESI NONPARAMETRIK SPLINE LINEAR 3 TITIK KNOT
# 3 knots pada persentil 25, 50, 75
knots3 <- quantile(data[[best_predictor]], probs = c(0.25, 0.5, 0.75))
X_spline3 <- linear_spline(data[[best_predictor]], knots3)
model_spline3 <- lm(y ~ X_spline3, data = data)
# Ringkasan model
cat("Model Spline Linear dengan 3 Knots (pada persentil 25, 50, 75):\n")
## Model Spline Linear dengan 3 Knots (pada persentil 25, 50, 75):
print(summary(model_spline3))
##
## Call:
## lm(formula = y ~ X_spline3, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62754 -0.16373 -0.04877 0.09408 1.69998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.497899 1.110602 -3.150 0.002026 **
## X_spline31 0.114170 0.031433 3.632 0.000402 ***
## X_spline32 -0.450414 0.145366 -3.098 0.002381 **
## X_spline33 0.344593 0.114791 3.002 0.003213 **
## X_spline34 -0.020046 0.009419 -2.128 0.035188 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3007 on 131 degrees of freedom
## Multiple R-squared: 0.272, Adjusted R-squared: 0.2498
## F-statistic: 12.24 on 4 and 131 DF, p-value: 1.753e-08
# Plot hasil
plot_data3 <- data.frame(x = data[[best_predictor]], y = data$y)
plot_data3$predicted <- predict(model_spline3)
ggplot(plot_data3, aes(x = x)) +
geom_point(aes(y = y)) +
geom_line(aes(y = predicted), color = "blue", size = 1) +
geom_vline(xintercept = knots3, linetype = "dashed", color = "red") +
labs(title = "Spline Linear dengan 3 Knots",
x = best_predictor, y = "y") +
theme_minimal()

# 6. Regresi Nonparametrik Spline Linear dengan kombinasi knot
cat("\n6. REGRESI NONPARAMETRIK SPLINE LINEAR DENGAN KOMBINASI KNOT\n")
##
## 6. REGRESI NONPARAMETRIK SPLINE LINEAR DENGAN KOMBINASI KNOT
# Coba beberapa kombinasi knot dan pilih yang terbaik berdasarkan AIC
knot_combinations <- list(
c(0.3, 0.6),
c(0.2, 0.5, 0.8),
c(0.4, 0.7),
c(0.25, 0.5, 0.75)
)
best_aic <- Inf
best_model <- NULL
best_knots <- NULL
for (knots in knot_combinations) {
actual_knots <- quantile(data[[best_predictor]], probs = knots)
X_spline <- linear_spline(data[[best_predictor]], actual_knots)
model <- lm(y ~ X_spline, data = data)
current_aic <- AIC(model)
if (current_aic < best_aic) {
best_aic <- current_aic
best_model <- model
best_knots <- actual_knots
}
}
# Ringkasan model terbaik
cat("Model Spline Terbaik berdasarkan AIC dengan Knots pada:\n")
## Model Spline Terbaik berdasarkan AIC dengan Knots pada:
print(best_knots)
## 20% 50% 80%
## 36.66667 46.66667 80.00000
cat("\nRingkasan Model Terbaik:\n")
##
## Ringkasan Model Terbaik:
print(summary(best_model))
##
## Call:
## lm(formula = y ~ X_spline, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62754 -0.16373 -0.04877 0.09408 1.69998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.497899 1.110602 -3.150 0.002026 **
## X_spline1 0.114170 0.031433 3.632 0.000402 ***
## X_spline2 -0.112604 0.036342 -3.098 0.002381 **
## X_spline3 0.006783 0.008757 0.774 0.440039
## X_spline4 -0.020046 0.009419 -2.128 0.035188 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3007 on 131 degrees of freedom
## Multiple R-squared: 0.272, Adjusted R-squared: 0.2498
## F-statistic: 12.24 on 4 and 131 DF, p-value: 1.753e-08
# Plot hasil
plot_data_best <- data.frame(x = data[[best_predictor]], y = data$y)
plot_data_best$predicted <- predict(best_model)
ggplot(plot_data_best, aes(x = x)) +
geom_point(aes(y = y)) +
geom_line(aes(y = predicted), color = "blue", size = 1) +
geom_vline(xintercept = best_knots, linetype = "dashed", color = "red") +
labs(title = "Spline Linear dengan Kombinasi Knot Terbaik",
x = best_predictor, y = "y") +
theme_minimal()

# 7. Model Spline Terbaik
cat("\n7. MODEL SPLINE TERBAIK\n")
##
## 7. MODEL SPLINE TERBAIK
# Bandingkan semua model spline yang telah dibuat
models <- list(
"1 Knot" = model_spline1,
"2 Knots" = model_spline2,
"3 Knots" = model_spline3,
"Best Combination" = best_model
)
# Buat tabel perbandingan
comparison <- data.frame(
Model = names(models),
AIC = sapply(models, AIC),
BIC = sapply(models, BIC),
R_squared = sapply(models, function(m) summary(m)$r.squared),
Adj_R_squared = sapply(models, function(m) summary(m)$adj.r.squared)
)
print(kable(comparison, format = "simple", digits = 4))
##
##
## Model AIC BIC R_squared Adj_R_squared
## ----------------- ----------------- -------- -------- ---------- --------------
## 1 Knot 1 Knot 75.7017 87.3523 0.1948 0.1827
## 2 Knots 2 Knots 69.0939 83.6571 0.2442 0.2270
## 3 Knots 3 Knots 65.9931 83.4690 0.2720 0.2498
## Best Combination Best Combination 65.9931 83.4690 0.2720 0.2498
# Pilih model terbaik berdasarkan AIC
best_spline_model <- models[[which.min(comparison$AIC)]]
cat("\nModel Spline Terbaik adalah:", names(models)[which.min(comparison$AIC)], "\n")
##
## Model Spline Terbaik adalah: Best Combination
print(summary(best_spline_model))
##
## Call:
## lm(formula = y ~ X_spline, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62754 -0.16373 -0.04877 0.09408 1.69998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.497899 1.110602 -3.150 0.002026 **
## X_spline1 0.114170 0.031433 3.632 0.000402 ***
## X_spline2 -0.112604 0.036342 -3.098 0.002381 **
## X_spline3 0.006783 0.008757 0.774 0.440039
## X_spline4 -0.020046 0.009419 -2.128 0.035188 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3007 on 131 degrees of freedom
## Multiple R-squared: 0.272, Adjusted R-squared: 0.2498
## F-statistic: 12.24 on 4 and 131 DF, p-value: 1.753e-08
# 8. Uji Normalitas Residual
cat("\n8. UJI NORMALITAS RESIDUAL\n")
##
## 8. UJI NORMALITAS RESIDUAL
residuals <- resid(best_spline_model)
# Shapiro-Wilk test
shapiro_test <- shapiro.test(residuals)
cat("Shapiro-Wilk Test for Normality:\n")
## Shapiro-Wilk Test for Normality:
print(shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.88961, p-value = 1.282e-08
# Anderson-Darling test
ad_test <- ad.test(residuals)
cat("\nAnderson-Darling Test for Normality:\n")
##
## Anderson-Darling Test for Normality:
print(ad_test)
##
## Anderson-Darling normality test
##
## data: residuals
## A = 2.8066, p-value = 4.248e-07
# QQ Plot
qqnorm(residuals)
qqline(residuals, col = "red")
title("QQ Plot of Residuals")

# Histogram residuals
hist(residuals, main = "Histogram of Residuals", xlab = "Residuals", col = "lightblue")

# 9. Uji Serentak
cat("\n9. UJI SERENTAK\n")
##
## 9. UJI SERENTAK
# Uji F untuk signifikansi model
cat("Uji F untuk Signifikansi Model:\n")
## Uji F untuk Signifikansi Model:
print(anova(best_spline_model))
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## X_spline 4 4.4252 1.10631 12.237 1.753e-08 ***
## Residuals 131 11.8436 0.09041
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 10. Uji Parsial
cat("\n10. UJI PARSIAL\n")
##
## 10. UJI PARSIAL
# Uji t untuk setiap koefisien
cat("Uji t untuk Koefisien Model:\n")
## Uji t untuk Koefisien Model:
print(summary(best_spline_model)$coefficients)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.497899020 1.110601978 -3.1495523 0.0020259809
## X_spline1 0.114169919 0.031433119 3.6321537 0.0004021378
## X_spline2 -0.112603560 0.036341603 -3.0984753 0.0023808660
## X_spline3 0.006782523 0.008757446 0.7744864 0.4400387869
## X_spline4 -0.020046256 0.009419099 -2.1282562 0.0351883359