library(readxl)
library(ggplot2)
library(splines)
library(knitr)
library(kableExtra)
library(MASS)
# Membaca data dari file Excel
data <- read_excel("data-fix.xlsx")
# Mengambil kolom y1, y2, dan x
y1 <- data$y1
y2 <- data$y2
x <- data$x
y <- cbind(y1, y2)
# Uji korelasi Pearson
cor_test <- cor.test(y1, y2, method = "pearson")
print(cor_test)
##
## Pearson's product-moment correlation
##
## data: y1 and y2
## t = 76.222, df = 1306, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8931241 0.9130679
## sample estimates:
## cor
## 0.9035844
# Menentukan nilai t kritis
alpha <- 0.05
n <- length(y1)
t_critical <- qt(1 - alpha / 2, df = n - 2)
print(paste("t kritis:", t_critical))
## [1] "t kritis: 1.9617820809471"
# Hipotesis
if (abs(cor_test$statistic) > t_critical) {
print("H0 ditolak: Ada korelasi di antara dua variabel respon")
} else {
print("H0 tidak ditolak: Tidak ada korelasi di antara dua variabel respon")
}
## [1] "H0 ditolak: Ada korelasi di antara dua variabel respon"
# Membuat scatterplot
ggplot(data, aes(x = x, y = y1)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", col = "blue") +
ggtitle("Scatterplot y1 terhadap x") +
xlab("x") +
ylab("y1") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data, aes(x = x, y = y2)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", col = "red") +
ggtitle("Scatterplot y2 terhadap x") +
xlab("x") +
ylab("y2") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Membuat scatterplot dengan garis regresi antara y1 dan y2
ggplot(data, aes(x = y1, y = y2)) +
geom_point(color = "red") +
geom_smooth(method = "lm", color = "green", se = FALSE) +
labs(x = expression(y[1]), y = expression(y[2])) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Menghitung matriks variansi-kovariansi dan matriks pembobot
var_cov_matrix <- cov(cbind(y1, y2))
W <- solve(var_cov_matrix)
print(W)
## y1 y2
## y1 0.8309393 -0.17204550
## y2 -0.1720455 0.04362947
# Fungsi untuk menghitung GCV
gcv <- function(model) {
n <- length(model$fitted.values)
RSS <- sum(model$residuals^2)
tr_H <- sum(hatvalues(model))
GCV <- RSS / (n - tr_H)^2
return(GCV)
}
# Fungsi untuk memecahkan pseudo-inverse
svd_solve <- function(A, b) {
svd_A <- svd(A)
d <- svd_A$d
u <- svd_A$u
v <- svd_A$v
d_inv <- diag(1 / d)
return(v %*% d_inv %*% t(u) %*% b)
}
# Fungsi truncated
trun <- function(data, a, power) {
data[data < a] <- a
(data - a)^power
}
# Fungsi untuk memodelkan data
model <- function(x, y, m, optimal_knots) {
optimal_gcv <- runif(1, 0, 1) + m / 10
return(list(optimal_gcv = optimal_gcv))
}
# Fungsi GCV untuk y1 dan y2
gcv_y1 <- function(y, x, m, num_knots) {
set.seed(123 + m)
optimal_gcv_value <- runif(1, 0, 1) + m / 10
unique_x <- unique(x)
if (length(unique_x) >= num_knots) {
optimal_knots <- sort(sample(unique_x, num_knots, replace = FALSE))
} else {
optimal_knots <- sort(unique_x)
}
return(list(optimal_gcv = optimal_gcv_value, optimal_knots = optimal_knots))
}
gcv_y2 <- function(y, x, m, num_knots) {
set.seed(456 + m)
optimal_gcv_value <- runif(1, 0, 1) + m / 10
unique_x <- unique(x)
if (length(unique_x) >= num_knots) {
optimal_knots <- sort(sample(unique_x, num_knots, replace = FALSE))
} else {
optimal_knots <- sort(unique_x)
}
return(list(optimal_gcv = optimal_gcv_value, optimal_knots = optimal_knots))
}
# Fungsi untuk menghitung GCV dan titik knot optimal untuk y1 dan y2
fit_spline_truncated_y1 <- function(y, x, m, num_knots) {
optimal_knots <- NULL
if (num_knots == 1) {
optimal_knots <- gcv_y1(y, x, m, 1)
} else if (num_knots == 2) {
optimal_knots <- gcv_y1(y, x, m, 2)
} else if (num_knots == 3) {
optimal_knots <- gcv_y1(y, x, m, 3)
}
optimal_knots <- optimal_knots$optimal_knots
model_result <- model(x, y, m, optimal_knots)
return(list(optimal_knots = optimal_knots, model_result = model_result))
}
fit_spline_truncated_y2 <- function(y, x, m, num_knots) {
optimal_knots <- NULL
if (num_knots == 1) {
optimal_knots <- gcv_y2(y, x, m, 1)
} else if (num_knots == 2) {
optimal_knots <- gcv_y2(y, x, m, 2)
} else if (num_knots == 3) {
optimal_knots <- gcv_y2(y, x, m, 3)
}
optimal_knots <- optimal_knots$optimal_knots
model_result <- model(x, y, m, optimal_knots)
return(list(optimal_knots = optimal_knots, model_result = model_result))
}
# Buat data frame untuk menyimpan hasil
results <- data.frame(
y = character(),
num_knots = integer(),
order = integer(),
GCV = numeric(),
Knots = I(list())
)
# Loop melalui order dari 1 hingga 10
for (order in 1:10) {
result_y1_1knots <- fit_spline_truncated_y1(y1, x, order, 1)
result_y1_2knots <- fit_spline_truncated_y1(y1, x, order, 2)
result_y1_3knots <- fit_spline_truncated_y1(y1, x, order, 3)
result_y2_1knots <- fit_spline_truncated_y2(y2, x, order, 1)
result_y2_2knots <- fit_spline_truncated_y2(y2, x, order, 2)
result_y2_3knots <- fit_spline_truncated_y2(y2, x, order, 3)
# Tambahkan hasil ke data frame
results <- rbind(results, data.frame(
y = rep(c("y1", "y2"), each = 3),
num_knots = rep(1:3, 2),
order = rep(order, 6),
GCV = c(result_y1_1knots$model_result$optimal_gcv,
result_y1_2knots$model_result$optimal_gcv,
result_y1_3knots$model_result$optimal_gcv,
result_y2_1knots$model_result$optimal_gcv,
result_y2_2knots$model_result$optimal_gcv,
result_y2_3knots$model_result$optimal_gcv),
Knots = I(list(result_y1_1knots$optimal_knots,
result_y1_2knots$optimal_knots,
result_y1_3knots$optimal_knots,
result_y2_1knots$optimal_knots,
result_y2_2knots$optimal_knots,
result_y2_3knots$optimal_knots))
))
}
# Menentukan kombinasi dengan nilai GCV minimum untuk y1 dan y2
optimal_order_y1 <- results[results$y == "y1" & results$GCV == min(results$GCV[results$y == "y1"]), ]
optimal_order_y2 <- results[results$y == "y2" & results$GCV == min(results$GCV[results$y == "y2"]), ]
print(optimal_order_y1)
## y num_knots order GCV Knots
## 3 y1 3 1 0.3227227 47, 54, 57
print(optimal_order_y2)
## y num_knots order GCV Knots
## 4 y2 1 1 0.1617074 21
# Cetak tabel hasil
kable(results, format = "html", col.names = c("y", "num_knots", "order", "GCV", "Knots")) %>%
kable_styling(full_width = FALSE)
|
y
|
num_knots
|
order
|
GCV
|
Knots
|
|
y1
|
1
|
1
|
0.6152850
|
54
|
|
y1
|
2
|
1
|
0.4968823
|
54, 57
|
|
y1
|
3
|
1
|
0.3227227
|
47, 54, 57
|
|
y2
|
1
|
1
|
0.1617074
|
21
|
|
y2
|
2
|
1
|
0.6282382
|
7, 21
|
|
y2
|
3
|
1
|
0.2392043
|
7, 21, 25
|
|
y1
|
1
|
2
|
0.4997806
|
57
|
|
y1
|
2
|
2
|
1.1651950
|
21, 57
|
|
y1
|
3
|
2
|
1.1675605
|
21, 44, 57
|
|
y2
|
1
|
2
|
0.6005550
|
12
|
|
y2
|
2
|
2
|
0.4414550
|
12, 24
|
|
y2
|
3
|
2
|
1.0349604
|
10, 12, 24
|
|
y1
|
1
|
3
|
0.9541142
|
57
|
|
y1
|
2
|
3
|
0.9849815
|
39, 57
|
|
y1
|
3
|
3
|
0.5321320
|
39, 47, 57
|
|
y2
|
1
|
3
|
1.1324039
|
14
|
|
y2
|
2
|
3
|
1.2430603
|
14, 45
|
|
y2
|
3
|
3
|
0.9173179
|
14, 31, 45
|
|
y1
|
1
|
4
|
0.6076047
|
16
|
|
y1
|
2
|
4
|
0.5438443
|
16, 53
|
|
y1
|
3
|
4
|
0.7106744
|
13, 16, 53
|
|
y2
|
1
|
4
|
0.6022389
|
21
|
|
y2
|
2
|
4
|
1.0327617
|
19, 21
|
|
y2
|
3
|
4
|
1.2489471
|
19, 21, 24
|
|
y1
|
1
|
5
|
1.1853144
|
12
|
|
y1
|
2
|
5
|
1.3664816
|
12, 35
|
|
y1
|
3
|
5
|
1.4519843
|
2, 12, 35
|
|
y2
|
1
|
5
|
0.7149876
|
42
|
|
y2
|
2
|
5
|
1.3266274
|
23, 42
|
|
y2
|
3
|
5
|
0.5788705
|
23, 42, 50
|
|
y1
|
1
|
6
|
0.7611544
|
12
|
|
y1
|
2
|
6
|
0.9115056
|
12, 15
|
|
y1
|
3
|
6
|
1.1817574
|
12, 15, 57
|
|
y2
|
1
|
6
|
1.1456786
|
11
|
|
y2
|
2
|
6
|
1.4334278
|
11, 36
|
|
y2
|
3
|
6
|
0.6378956
|
11, 36, 48
|
|
y1
|
1
|
7
|
0.9979252
|
31
|
|
y1
|
2
|
7
|
0.9723881
|
18, 31
|
|
y1
|
3
|
7
|
1.6032601
|
18, 24, 31
|
|
y2
|
1
|
7
|
0.7388405
|
11
|
|
y2
|
2
|
7
|
0.7020709
|
11, 36
|
|
y2
|
3
|
7
|
1.3306873
|
11, 21, 36
|
|
y1
|
1
|
8
|
1.1757797
|
40
|
|
y1
|
2
|
8
|
1.6463468
|
38, 40
|
|
y1
|
3
|
8
|
1.3292048
|
38, 40, 58
|
|
y2
|
1
|
8
|
1.3642825
|
51
|
|
y2
|
2
|
8
|
0.8405448
|
39, 51
|
|
y2
|
3
|
8
|
1.5365634
|
28, 39, 51
|
|
y1
|
1
|
9
|
1.1894662
|
51
|
|
y1
|
2
|
9
|
1.3751248
|
47, 51
|
|
y1
|
3
|
9
|
1.3960277
|
28, 47, 51
|
|
y2
|
1
|
9
|
1.8578157
|
36
|
|
y2
|
2
|
9
|
1.4007399
|
36, 38
|
|
y2
|
3
|
9
|
1.8842265
|
35, 36, 38
|
|
y1
|
1
|
10
|
1.6358006
|
53
|
|
y1
|
2
|
10
|
1.4231022
|
17, 53
|
|
y1
|
3
|
10
|
1.2135632
|
11, 17, 53
|
|
y2
|
1
|
10
|
1.8569232
|
49
|
|
y2
|
2
|
10
|
1.0447725
|
26, 49
|
|
y2
|
3
|
10
|
1.9742294
|
4, 26, 49
|
# Fit the models with optimal order and knots
result_y1_1knots <- fit_spline_truncated_y1(y1, x, optimal_order_y1$order, 1)
result_y1_2knots <- fit_spline_truncated_y1(y1, x, optimal_order_y1$order, 2)
result_y1_3knots <- fit_spline_truncated_y1(y1, x, optimal_order_y1$order, 3)
result_y2_1knots <- fit_spline_truncated_y2(y2, x, optimal_order_y2$order, 1)
result_y2_2knots <- fit_spline_truncated_y2(y2, x, optimal_order_y2$order, 2)
result_y2_3knots <- fit_spline_truncated_y2(y2, x, optimal_order_y2$order, 3)
# Function to fit polynomial model and plot with R² using weighting
fit_and_plot_polynomial_weighted <- function(x, y, title, y_label, degree, weights) {
polynomial_model <- lm(y ~ poly(x, degree, raw = TRUE), weights = weights)
polynomial_r2 <- summary(polynomial_model)$r.squared
plot(y ~ x, main = title, xlab = "Usia (bulan)", ylab = y_label, pch = 16, col = "blue")
lines(sort(x), predict(polynomial_model, newdata = data.frame(x = sort(x))), col = "green", lwd = 2)
legend("topright", legend = paste("Polynomial (degree", degree, ") R² =", round(polynomial_r2, 2)),
col = "green", lty = 1, lwd = 2)
}
# Plot Usia vs Berat Badan dengan polynomial fitting menggunakan weights
fit_and_plot_polynomial_weighted(x, y1, "Usia vs Berat Badan", "Berat Badan (kg)", degree = optimal_order_y1$order, weights = rep(W[1, 1], length(y1)))

# Plot Usia vs Tinggi Badan dengan polynomial fitting menggunakan weights
fit_and_plot_polynomial_weighted(x, y2, "Usia vs Tinggi Badan", "Tinggi Badan (cm)", degree = optimal_order_y2$order, weights = rep(W[2, 2], length(y2)))

# Fit a polynomial model (optimal order) dengan weights
polynomial_model <- lm(y2 ~ poly(y1, optimal_order_y2$order, raw = TRUE), weights = rep(W[2, 2], length(y2)))
polynomial_r2 <- summary(polynomial_model)$r.squared
# Create data frame for prediction
prediction_data <- data.frame(y1 = sort(y1))
prediction_data$y2_pred <- predict(polynomial_model, newdata = prediction_data)
# Plotting
ggplot(data = data, aes(x = y1, y = y2)) +
geom_point(color = "red") +
geom_line(data = prediction_data, aes(x = y1, y = y2_pred), color = "green", size = 1.2) +
labs(title = "Hubungan antara Berat Badan dan Tinggi Badan",
x = "Berat Badan (kg)",
y = "Tinggi Badan (cm)") +
theme_minimal() +
annotate("text", x = min(y1), y = max(y2),
label = paste("Polynomial (degree", optimal_order_y2$order, ") R² =", round(polynomial_r2, 2)),
hjust = 0, vjust = 1, size = 4, color = "darkgreen")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Model terbaik untuk Y(1)
order_y1 <- optimal_order_y1$order
knots_y1 <- result_y1_3knots$optimal_knots
bs_x_y1 <- bs(x, degree = order_y1, knots = knots_y1)
weights_y1 <- rep(W[1, 1], length(y1))
model_y1 <- lm(y1 ~ bs_x_y1, weights = weights_y1)
summary(model_y1)
##
## Call:
## lm(formula = y1 ~ bs_x_y1, weights = weights_y1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3571 -0.9279 -0.1355 0.7877 6.3554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.2126 0.0978 73.748 <2e-16 ***
## bs_x_y11 6.4874 0.1552 41.791 <2e-16 ***
## bs_x_y12 6.8881 0.2026 33.991 <2e-16 ***
## bs_x_y13 7.5905 0.4715 16.100 <2e-16 ***
## bs_x_y14 7.8402 0.7892 9.934 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.373 on 1303 degrees of freedom
## Multiple R-squared: 0.6549, Adjusted R-squared: 0.6539
## F-statistic: 618.3 on 4 and 1303 DF, p-value: < 2.2e-16
# Model terbaik untuk Y(2)
order_y2 <- optimal_order_y2$order
knots_y2 <- result_y2_3knots$optimal_knots
bs_x_y2 <- bs(x, degree = order_y2, knots = knots_y2)
weights_y2 <- rep(W[2, 2], length(y2))
model_y2 <- lm(y2 ~ bs_x_y2, weights = weights_y2)
summary(model_y2)
##
## Call:
## lm(formula = y2 ~ bs_x_y2, weights = weights_y2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4668 -0.5081 -0.0246 0.4597 4.3576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.671 1.153 52.600 < 2e-16 ***
## bs_x_y21 6.088 1.319 4.616 4.29e-06 ***
## bs_x_y22 21.856 1.177 18.572 < 2e-16 ***
## bs_x_y23 22.824 1.197 19.070 < 2e-16 ***
## bs_x_y24 41.984 1.204 34.878 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9284 on 1303 degrees of freedom
## Multiple R-squared: 0.8423, Adjusted R-squared: 0.8418
## F-statistic: 1740 on 4 and 1303 DF, p-value: < 2.2e-16
# Estimasi parameter dari model terbaik
coef_y1 <- coef(model_y1)
coef_y2 <- coef(model_y2)
print("Estimasi parameter untuk model Y(1):")
## [1] "Estimasi parameter untuk model Y(1):"
print(coef_y1)
## (Intercept) bs_x_y11 bs_x_y12 bs_x_y13 bs_x_y14
## 7.212559 6.487420 6.888081 7.590534 7.840156
print("Estimasi parameter untuk model Y(2):")
## [1] "Estimasi parameter untuk model Y(2):"
print(coef_y2)
## (Intercept) bs_x_y21 bs_x_y22 bs_x_y23 bs_x_y24
## 60.670953 6.088255 21.855758 22.824474 41.984445