# Load necessary libraries
library(splines)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Define the function to calculate GCV and get the top 10 smallest GCV values
calculate_top_10_gcv <- function(file_path) {
# Load the dataset
data <- read_excel(file_path)
# Define the GCV function
gcv_function <- function(data, knot_x1, knot_x2, knot_x3) {
n <- nrow(data)
y <- data$y
# Fit the model using a natural spline with 1 knot for each predictor
fit <- lm(y ~ ns(x1, knots = knot_x1) + ns(x2, knots = knot_x2) + ns(x3, knots = knot_x3), data = data)
# Calculate the mean squared error (MSE)
mse <- mean(residuals(fit)^2)
# Calculate the hat matrix
hat_matrix <- hatvalues(fit)
# Calculate GCV
gcv <- mse / ((1 - mean(hat_matrix))^2)
return(gcv)
}
# Define possible knot points (e.g., unique values of x1, x2, and x3)
knot_points_x1 <- unique(data$x1)
knot_points_x2 <- unique(data$x2)
knot_points_x3 <- unique(data$x3)
# Initialize a dataframe to store GCV results
gcv_results <- data.frame(knot_x1 = numeric(), knot_x2 = numeric(), knot_x3 = numeric(), gcv = numeric())
# Calculate GCV for each combination of knot points
for (knot_x1 in knot_points_x1) {
for (knot_x2 in knot_points_x2) {
for (knot_x3 in knot_points_x3) {
gcv_value <- tryCatch({
gcv_function(data, knot_x1, knot_x2, knot_x3)
}, error = function(e) {
NA # Return NA if there's an error
})
gcv_results <- rbind(gcv_results, data.frame(knot_x1 = knot_x1, knot_x2 = knot_x2, knot_x3 = knot_x3, gcv = gcv_value))
}
}
}
# Remove NA values
gcv_results <- na.omit(gcv_results)
# Save all the GCV results to a CSV file
write.csv(gcv_results, "gcv_results.csv", row.names = FALSE)
# Sort by GCV value and get the 10 smallest GCV values
top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
# Return the results
return(top_10_gcv)
}
# Define the function to calculate GCV and get the top 10 smallest GCV values for two knot points
calculate_top_10_gcv_two_knots <- function(file_path) {
# Load the dataset
data <- read_excel(file_path)
# Define the GCV function for two knot points
gcv_function <- function(data, knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3) {
n <- nrow(data)
y <- data$y
# Fit the model using a natural spline with 2 knots for each predictor
fit <- lm(y ~ ns(x1, knots = c(knot1_x1, knot2_x1)) + ns(x2, knots = c(knot1_x2, knot2_x2)) + ns(x3, knots = c(knot1_x3, knot2_x3)), data = data)
# Calculate the mean squared error (MSE)
mse <- mean(residuals(fit)^2)
# Calculate the hat matrix
hat_matrix <- hatvalues(fit)
# Calculate GCV
gcv <- mse / ((1 - mean(hat_matrix))^2)
return(gcv)
}
# Define possible knot points (e.g., unique values of x1, x2, and x3)
knot_points_x1 <- unique(data$x1)
knot_points_x2 <- unique(data$x2)
knot_points_x3 <- unique(data$x3)
# Initialize a dataframe to store GCV results
gcv_results <- data.frame(knot1_x1 = numeric(), knot2_x1 = numeric(), knot1_x2 = numeric(), knot2_x2 = numeric(), knot1_x3 = numeric(), knot2_x3 = numeric(), gcv = numeric())
# Calculate GCV for each combination of two knot points
for (knot1_x1 in knot_points_x1) {
for (knot2_x1 in knot_points_x1[knot_points_x1 > knot1_x1]) {
for (knot1_x2 in knot_points_x2) {
for (knot2_x2 in knot_points_x2[knot_points_x2 > knot1_x2]) {
for (knot1_x3 in knot_points_x3) {
for (knot2_x3 in knot_points_x3[knot_points_x3 > knot1_x3]) {
gcv_value <- tryCatch({
gcv_function(data, knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3)
}, error = function(e) {
NA # Return NA if there's an error
})
gcv_results <- rbind(gcv_results, data.frame(knot1_x1 = knot1_x1, knot2_x1 = knot2_x1, knot1_x2 = knot1_x2, knot2_x2 = knot2_x2, knot1_x3 = knot1_x3, knot2_x3 = knot2_x3, gcv = gcv_value))
}
}
}
}
}
}
# Remove NA values
gcv_results <- na.omit(gcv_results)
# Save all the GCV results to a CSV file
write.csv(gcv_results, "gcv_results_two_knots.csv", row.names = FALSE)
# Sort by GCV value and get the 10 smallest GCV values
top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
# Return the results
return(top_10_gcv)
}
# Define the function to calculate GCV and get the top 10 smallest GCV values for three knot points
calculate_top_10_gcv_three_knots <- function(file_path, sample_size = 10) {
# Load the dataset
data <- read_excel(file_path)
# Define the GCV function for three knot points
gcv_function <- function(data, knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_x3, knot2_x3, knot3_x3) {
n <- nrow(data)
y <- data$y
# Fit the model using a natural spline with 3 knots for each predictor
fit <- lm(y ~ ns(x1, knots = c(knot1_x1, knot2_x1, knot3_x1)) + ns(x2, knots = c(knot1_x2, knot2_x2, knot3_x2)) + ns(x3, knots = c(knot1_x3, knot2_x3, knot3_x3)), data = data)
# Calculate the mean squared error (MSE)
mse <- mean(residuals(fit)^2)
# Calculate the hat matrix
hat_matrix <- hatvalues(fit)
# Calculate GCV
gcv <- mse / ((1 - mean(hat_matrix))^2)
return(gcv)
}
# Define possible knot points (e.g., unique values of x1, x2, and x3)
set.seed(123) # For reproducibility
knot_points_x1 <- sample(unique(data$x1), min(sample_size, length(unique(data$x1))))
knot_points_x2 <- sample(unique(data$x2), min(sample_size, length(unique(data$x2))))
knot_points_x3 <- sample(unique(data$x3), min(sample_size, length(unique(data$x3))))
# Initialize a dataframe to store GCV results
gcv_results <- data.frame(knot1_x1 = numeric(), knot2_x1 = numeric(), knot3_x1 = numeric(),
knot1_x2 = numeric(), knot2_x2 = numeric(), knot3_x2 = numeric(),
knot1_x3 = numeric(), knot2_x3 = numeric(), knot3_x3 = numeric(), gcv = numeric())
# Calculate GCV for each combination of three knot points
for (knot1_x1 in knot_points_x1) {
for (knot2_x1 in knot_points_x1[knot_points_x1 > knot1_x1]) {
for (knot3_x1 in knot_points_x1[knot_points_x1 > knot2_x1]) {
for (knot1_x2 in knot_points_x2) {
for (knot2_x2 in knot_points_x2[knot_points_x2 > knot1_x2]) {
for (knot3_x2 in knot_points_x2[knot_points_x2 > knot2_x2]) {
for (knot1_x3 in knot_points_x3) {
for (knot2_x3 in knot_points_x3[knot_points_x3 > knot1_x3]) {
for (knot3_x3 in knot_points_x3[knot_points_x3 > knot2_x3]) {
gcv_value <- tryCatch({
gcv_function(data, knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_x3, knot2_x3, knot3_x3)
}, error = function(e) {
NA # Return NA if there's an error
})
gcv_results <- rbind(gcv_results, data.frame(knot1_x1 = knot1_x1, knot2_x1 = knot2_x1, knot3_x1 = knot3_x1,
knot1_x2 = knot1_x2, knot2_x2 = knot2_x2, knot3_x2 = knot3_x2,
knot1_x3 = knot1_x3, knot2_x3 = knot2_x3, knot3_x3 = knot3_x3, gcv = gcv_value))
}
}
}
}
}
}
}
}
}
# Remove NA values
gcv_results <- na.omit(gcv_results)
# Save all the GCV results to a CSV file
write.csv(gcv_results, "gcv_results_three_knots.csv", row.names = FALSE)
# Sort by GCV value and get the 10 smallest GCV values
top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
# Return the results
return(top_10_gcv)
}
# Define the function to read the CSV files and display the top 10 GCV values
display_top_10_gcv <- function(file_path) {
# Read the CSV file
data <- read.csv(file_path)
# Get the top 10 smallest GCV values
top_10_gcv <- data %>% arrange(gcv) %>% head(10)
# Print the top 10 GCV values
print(top_10_gcv)
}
# Define the function to estimate parameters
estimate_parameters <- function(data_analisis, knot_data, para) {
data <- data_analisis
data <- as.matrix(data)
p <- nrow(data)
q <- ncol(data)
F <- matrix(0, nrow = p, ncol = p)
diag(F) <- 1
min_gcv_index <- which.min(knot_data$gcv)
min_knot <- as.numeric(knot_data[min_gcv_index, 1:3])
aa <- rep(1, p)
m <- ncol(data) - para - 1
data1 <- matrix(ncol = length(min_knot), nrow = p)
data2 <- data[, (para + 2):q]
for (i in 1:length(min_knot)) {
for (k in 1:p) {
if (data2[k, ceiling(i/3)] < min_knot[i]) {
data1[k, i] <- 0
} else {
data1[k, i] <- data2[k, ceiling(i/3)] - min_knot[i]
}
}
}
mx <- cbind(aa, data[, 2:q], data1)
mx <- as.matrix(mx)
C <- MASS::ginv(t(mx) %*% mx)
B <- C %*% (t(mx) %*% data[, 1])
return(list(B = B, mx = mx, C = C))
}
# Define the function for simultaneous F-test
uji_simultan <- function(data_analisis, knot_data, para) {
estimasi <- estimate_parameters(data_analisis, knot_data, para)
B <- estimasi$B
mx <- estimasi$mx
data <- data_analisis
data <- as.matrix(data)
p <- nrow(data)
y <- data[, 1]
yhat <- mx %*% B
SSR <- sum((yhat - mean(y))^2)
SSE <- sum((y - yhat)^2)
MSR <- SSR / (length(B) - 1)
MSE <- SSE / (p - length(B))
F_stat <- MSR / MSE
df1 <- length(B) - 1
df2 <- p - length(B)
p_value <- 1 - pf(F_stat, df1, df2)
return(list(SSR = SSR, SSE = SSE, MSR = MSR, MSE = MSE, F_stat = F_stat, p_value = p_value, df1 = df1, df2 = df2))
}
# Define the function for partial t-test
uji_parsial <- function(data_analisis, knot_data, para, alpha = 0.05) {
estimasi <- estimate_parameters(data_analisis, knot_data, para)
B <- estimasi$B
C <- estimasi$C
data <- data_analisis
data <- as.matrix(data)
p <- nrow(data)
q <- ncol(data)
mx <- estimasi$mx
y <- data[, 1]
yhat <- mx %*% B
SSE <- sum((y - yhat)^2)
MSE <- SSE / (p - length(B))
SE <- sqrt(diag(MSE * C))
t_stat <- B / SE
df <- p - length(B)
p_values <- 2 * (1 - pt(abs(t_stat), df))
signifikan <- ifelse(p_values < alpha, "Signifikan", "Tidak Signifikan")
return(data.frame(Parameter = B, SE = SE, t_stat = t_stat, p_value = p_values, Signifikan = signifikan))
}
# File paths
file_path <- "fileku.xlsx"
# Call the GCV functions and display the results
top_10_gcv_results <- calculate_top_10_gcv(file_path)
print(top_10_gcv_results)
## knot_x1 knot_x2 knot_x3 gcv
## 1 0.17 2.8 1.2 8.561401
## 2 0.17 3.2 1.2 8.821678
## 3 0.21 2.8 1.2 8.916781
## 4 0.20 2.8 1.2 8.916844
## 5 0.19 2.8 1.2 8.919435
## 6 0.18 2.8 1.2 8.923331
## 7 0.24 2.8 1.2 8.926404
## 8 0.25 2.8 1.2 8.930552
## 9 0.27 2.8 1.2 8.938791
## 10 0.28 2.8 1.2 8.944000
top_10_gcv_results_two_knots <- calculate_top_10_gcv_two_knots(file_path)
print(top_10_gcv_results_two_knots)
## knot1_x1 knot2_x1 knot1_x2 knot2_x2 knot1_x3 knot2_x3 gcv
## 1 0.17 0.20 3.2 3.4 1.2 2.4 8.716636
## 2 0.17 0.19 3.2 3.4 1.2 2.4 8.717171
## 3 0.17 0.21 3.2 3.4 1.2 2.4 8.718834
## 4 0.17 0.18 3.2 3.4 1.2 2.4 8.719118
## 5 0.17 0.20 3.2 3.4 1.2 3.6 8.725173
## 6 0.17 0.19 3.2 3.4 1.2 3.6 8.725660
## 7 0.17 0.21 3.2 3.4 1.2 3.6 8.727389
## 8 0.17 0.18 3.2 3.4 1.2 3.6 8.727560
## 9 0.17 0.20 3.2 3.4 1.2 4.8 8.735309
## 10 0.17 0.19 3.2 3.4 1.2 4.8 8.735684
top_10_gcv_results_three_knots <- calculate_top_10_gcv_three_knots(file_path)
print(top_10_gcv_results_three_knots)
## knot1_x1 knot2_x1 knot3_x1 knot1_x2 knot2_x2 knot3_x2 knot1_x3 knot2_x3
## 1 0.18 0.2 0.24 2.7 3.2 3.4 1.2 4.8
## 2 0.18 0.2 0.24 2.7 3.2 3.4 1.2 3.6
## 3 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 4 0.18 0.2 0.24 2.7 3.2 3.4 3.6 4.8
## 5 0.18 0.2 0.28 2.7 3.2 3.4 1.2 4.8
## 6 0.18 0.2 0.24 2.7 3.2 3.4 1.2 3.6
## 7 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 8 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 9 0.18 0.2 0.24 2.7 3.2 3.4 2.4 4.8
## 10 0.18 0.2 0.28 2.7 3.2 3.4 1.2 3.6
## knot3_x3 gcv
## 1 6.0 8.826686
## 2 6.0 9.267963
## 3 6.0 9.406265
## 4 6.0 9.480160
## 5 6.0 9.554743
## 6 4.8 9.568576
## 7 4.8 9.607318
## 8 3.6 9.664246
## 9 6.0 9.718161
## 10 6.0 9.936421
# Display the top 10 GCV values for each file
file_paths <- c("gcv_results.csv", "gcv_results_two_knots.csv", "gcv_results_three_knots.csv")
for (file_path in file_paths) {
cat("\nTop 10 GCV values for:", file_path, "\n")
display_top_10_gcv(file_path)
}
##
## Top 10 GCV values for: gcv_results.csv
## knot_x1 knot_x2 knot_x3 gcv
## 1 0.17 2.8 1.2 8.561401
## 2 0.17 3.2 1.2 8.821678
## 3 0.21 2.8 1.2 8.916781
## 4 0.20 2.8 1.2 8.916844
## 5 0.19 2.8 1.2 8.919435
## 6 0.18 2.8 1.2 8.923331
## 7 0.24 2.8 1.2 8.926404
## 8 0.25 2.8 1.2 8.930552
## 9 0.27 2.8 1.2 8.938791
## 10 0.28 2.8 1.2 8.944000
##
## Top 10 GCV values for: gcv_results_two_knots.csv
## knot1_x1 knot2_x1 knot1_x2 knot2_x2 knot1_x3 knot2_x3 gcv
## 1 0.17 0.20 3.2 3.4 1.2 2.4 8.716636
## 2 0.17 0.19 3.2 3.4 1.2 2.4 8.717171
## 3 0.17 0.21 3.2 3.4 1.2 2.4 8.718834
## 4 0.17 0.18 3.2 3.4 1.2 2.4 8.719118
## 5 0.17 0.20 3.2 3.4 1.2 3.6 8.725173
## 6 0.17 0.19 3.2 3.4 1.2 3.6 8.725660
## 7 0.17 0.21 3.2 3.4 1.2 3.6 8.727389
## 8 0.17 0.18 3.2 3.4 1.2 3.6 8.727560
## 9 0.17 0.20 3.2 3.4 1.2 4.8 8.735309
## 10 0.17 0.19 3.2 3.4 1.2 4.8 8.735684
##
## Top 10 GCV values for: gcv_results_three_knots.csv
## knot1_x1 knot2_x1 knot3_x1 knot1_x2 knot2_x2 knot3_x2 knot1_x3 knot2_x3
## 1 0.18 0.2 0.24 2.7 3.2 3.4 1.2 4.8
## 2 0.18 0.2 0.24 2.7 3.2 3.4 1.2 3.6
## 3 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 4 0.18 0.2 0.24 2.7 3.2 3.4 3.6 4.8
## 5 0.18 0.2 0.28 2.7 3.2 3.4 1.2 4.8
## 6 0.18 0.2 0.24 2.7 3.2 3.4 1.2 3.6
## 7 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 8 0.18 0.2 0.24 2.7 3.2 3.4 1.2 2.4
## 9 0.18 0.2 0.24 2.7 3.2 3.4 2.4 4.8
## 10 0.18 0.2 0.28 2.7 3.2 3.4 1.2 3.6
## knot3_x3 gcv
## 1 6.0 8.826686
## 2 6.0 9.267963
## 3 6.0 9.406265
## 4 6.0 9.480160
## 5 6.0 9.554743
## 6 4.8 9.568576
## 7 4.8 9.607318
## 8 3.6 9.664246
## 9 6.0 9.718161
## 10 6.0 9.936421
# Load the data and run parameter estimation and tests
data_analisis <- read_excel("fileku.xlsx")
knot_data <- read.csv("gcv_results.csv")
# Example usage
para <- 1 # Set your value for para
result <- estimate_parameters(data_analisis, knot_data, para)
print(result$B)
## [,1]
## [1,] 76.21753492
## [2,] -11.49219917
## [3,] 17.92992821
## [4,] 0.09086878
## [5,] 4.97294728
## [6,] 54.14829813
## [7,] -73.53111369
print(result$mx)
## aa x1 x2 x3
## [1,] 1 0.27 3.6 3.6 3.43 0.8 2.4
## [2,] 1 0.36 3.6 6.0 3.43 0.8 2.4
## [3,] 1 0.34 3.4 2.4 3.23 0.6 2.2
## [4,] 1 0.44 3.6 1.2 3.43 0.8 2.4
## [5,] 1 0.28 3.6 4.8 3.43 0.8 2.4
## [6,] 1 0.36 3.6 2.4 3.43 0.8 2.4
## [7,] 1 0.35 3.6 7.2 3.43 0.8 2.4
## [8,] 1 0.35 2.7 1.2 2.53 0.0 1.5
## [9,] 1 0.32 3.6 3.6 3.43 0.8 2.4
## [10,] 1 0.21 2.8 2.4 2.63 0.0 1.6
## [11,] 1 0.19 3.6 6.0 3.43 0.8 2.4
## [12,] 1 0.34 3.6 3.6 3.43 0.8 2.4
## [13,] 1 0.31 3.6 2.4 3.43 0.8 2.4
## [14,] 1 0.39 3.2 4.8 3.03 0.4 2.0
## [15,] 1 0.36 3.6 2.4 3.43 0.8 2.4
## [16,] 1 0.30 3.4 1.2 3.23 0.6 2.2
## [17,] 1 0.17 3.6 3.6 3.43 0.8 2.4
## [18,] 1 0.21 3.6 4.8 3.43 0.8 2.4
## [19,] 1 0.24 3.6 1.2 3.43 0.8 2.4
## [20,] 1 0.18 3.6 3.6 3.43 0.8 2.4
## [21,] 1 0.20 3.6 2.4 3.43 0.8 2.4
## [22,] 1 0.35 3.6 1.2 3.43 0.8 2.4
## [23,] 1 0.32 3.6 3.6 3.43 0.8 2.4
## [24,] 1 0.25 3.6 1.2 3.43 0.8 2.4
## [25,] 1 0.35 2.8 4.8 2.63 0.0 1.6
## [26,] 1 0.19 3.6 4.8 3.43 0.8 2.4
## [27,] 1 0.21 3.6 2.4 3.43 0.8 2.4
## [28,] 1 0.30 3.6 2.4 3.43 0.8 2.4
print(result$C)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 238.1368799 -4.22391000 45.56577510 0.388179311 5.082505536
## [2,] -4.2239100 7.04110997 -1.14164363 0.019201235 -0.423578935
## [3,] 45.5657751 -1.14164363 8.74315059 0.067394182 0.996968832
## [4,] 0.3881793 0.01920123 0.06739418 0.014346745 0.001403699
## [5,] 5.0825055 -0.42357893 0.99696883 0.001403699 0.132942891
## [6,] 196.1645229 -2.14279862 37.39233931 0.334596233 4.044370441
## [7,] -240.1984808 3.92704837 -45.93577953 -0.398420991 -5.102037811
## [,6] [,7]
## [1,] 196.1645229 -240.198481
## [2,] -2.1427986 3.927048
## [3,] 37.3923393 -45.935780
## [4,] 0.3345962 -0.398421
## [5,] 4.0443704 -5.102038
## [6,] 162.8134217 -198.005088
## [7,] -198.0050881 242.302397
# Simultaneous F-test
simultan_result <- uji_simultan(data_analisis, knot_data, para)
print(simultan_result)
## $SSR
## [1] 41.72565
##
## $SSE
## [1] 170.8344
##
## $MSR
## [1] 6.954274
##
## $MSE
## [1] 8.134969
##
## $F_stat
## [1] 0.8548618
##
## $p_value
## [1] 0.5431029
##
## $df1
## [1] 6
##
## $df2
## [1] 21
# Partial t-test
parsial_result <- uji_parsial(data_analisis, knot_data, para)
print(parsial_result)
## Parameter SE t_stat p_value Signifikan
## 1 76.21753492 44.0140453 1.7316639 0.0979994247 Tidak Signifikan
## 2 -11.49219917 7.5683032 -1.5184644 0.1438096067 Tidak Signifikan
## 3 17.92992821 8.4335794 2.1260164 0.0455274212 Signifikan
## 4 0.09086878 0.3416289 0.2659868 0.7928433601 Tidak Signifikan
## 5 4.97294728 1.0399453 4.7819314 0.0001004632 Signifikan
## 6 54.14829813 36.3934358 1.4878589 0.1516511582 Tidak Signifikan
## 7 -73.53111369 44.3973258 -1.6562059 0.1125476091 Tidak Signifikan
data_analisis = read_excel("fileku.xlsx")
gcv_results = read.csv("gcv_results.csv")
para = 0
# Load necessary libraries
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(splines)
# Define the function to perform residual diagnostics for single knot
uji_residual_single_knot <- function(data_analisis, knot_data, para) {
# Pilih knot dengan GCV terkecil
min_gcv_index <- which.min(knot_data$gcv)
min_knots <- as.numeric(knot_data[min_gcv_index, 1:3])
# Buat model regresi spline dengan 1 titik knot
fit <- lm(y ~ ns(x1, knots = min_knots[1]) + ns(x2, knots = min_knots[2]) + ns(x3, knots = min_knots[3]),
data = data_analisis)
# Hitung residual
residuals <- residuals(fit)
# Uji Normalitas Residual (Shapiro-Wilk Test)
shapiro_test <- shapiro.test(residuals)
# Uji Homoskedastisitas (Breusch-Pagan Test)
bp_test <- bptest(fit)
# Uji Autokorelasi (Durbin-Watson Test)
dw_test <- dwtest(fit)
# Menyusun Kesimpulan
kesimpulan <- list()
# Kesimpulan Normalitas
if (shapiro_test$p.value > 0.05) {
kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
} else {
kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
}
# Kesimpulan Homoskedastisitas
if (bp_test$p.value > 0.05) {
kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
} else {
kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
}
# Kesimpulan Autokorelasi
if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
} else {
kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
}
# Return hasil uji residual dan kesimpulan
return(list(
residuals = residuals,
shapiro_test = shapiro_test,
bp_test = bp_test,
dw_test = dw_test,
kesimpulan = kesimpulan
))
}
# Contoh Penggunaan Fungsi Uji Residual untuk 1 Titik Knot
residual_result_single_knot <- uji_residual_single_knot(data_analisis, gcv_results, para = 1)
## Warning in dwtest(fit): imaginary parts of eigenvalues discarded
# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 1 Titik Knot:\n")
##
## Hasil Uji Residual dengan 1 Titik Knot:
cat("Shapiro-Wilk Normality Test:\n")
## Shapiro-Wilk Normality Test:
print(residual_result_single_knot$shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.9416, p-value = 0.1214
cat("\nBreusch-Pagan Homoskedasticity Test:\n")
##
## Breusch-Pagan Homoskedasticity Test:
print(residual_result_single_knot$bp_test)
##
## studentized Breusch-Pagan test
##
## data: fit
## BP = 1.2964, df = 4, p-value = 0.862
cat("\nDurbin-Watson Autocorrelation Test:\n")
##
## Durbin-Watson Autocorrelation Test:
print(residual_result_single_knot$dw_test)
##
## Durbin-Watson test
##
## data: fit
## DW = 1.58, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")
##
## Kesimpulan:
print(residual_result_single_knot$kesimpulan)
## $normalitas
## [1] "Residual berdistribusi normal (p > 0,05)."
##
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
##
## $autokorelasi
## [1] "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
gcv_results_two_knots = read.csv("gcv_results_two_knots.csv")
para = 0
# Load necessary libraries
library(lmtest)
library(splines)
# Define the function to perform residual diagnostics for two knots
uji_residual_two_knots <- function(data_analisis, knot_data, para) {
# Pilih knot dengan GCV terkecil
min_gcv_index <- which.min(knot_data$gcv)
min_knots <- as.numeric(knot_data[min_gcv_index, 1:6])
# Buat model regresi spline dengan 2 titik knot
fit <- lm(y ~ ns(x1, knots = min_knots[1:2]) + ns(x2, knots = min_knots[3:4]) + ns(x3, knots = min_knots[5:6]),
data = data_analisis)
# Hitung residual
residuals <- residuals(fit)
# Uji Normalitas Residual (Shapiro-Wilk Test)
shapiro_test <- shapiro.test(residuals)
# Uji Homoskedastisitas (Breusch-Pagan Test)
bp_test <- bptest(fit)
# Uji Autokorelasi (Durbin-Watson Test)
dw_test <- dwtest(fit)
# Menyusun Kesimpulan
kesimpulan <- list()
# Kesimpulan Normalitas
if (shapiro_test$p.value > 0.05) {
kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
} else {
kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
}
# Kesimpulan Homoskedastisitas
if (bp_test$p.value > 0.05) {
kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
} else {
kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
}
# Kesimpulan Autokorelasi
if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
} else {
kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
}
# Return hasil uji residual dan kesimpulan
return(list(
residuals = residuals,
shapiro_test = shapiro_test,
bp_test = bp_test,
dw_test = dw_test,
kesimpulan = kesimpulan
))
}
# Contoh Penggunaan Fungsi Uji Residual untuk 2 Titik Knot
residual_result_two_knots <- uji_residual_two_knots(data_analisis, gcv_results_two_knots, para = 1)
## Warning in dwtest(fit): imaginary parts of eigenvalues discarded
# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 2 Titik Knot:\n")
##
## Hasil Uji Residual dengan 2 Titik Knot:
cat("Shapiro-Wilk Normality Test:\n")
## Shapiro-Wilk Normality Test:
print(residual_result_two_knots$shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.91931, p-value = 0.03335
cat("\nBreusch-Pagan Homoskedasticity Test:\n")
##
## Breusch-Pagan Homoskedasticity Test:
print(residual_result_two_knots$bp_test)
##
## studentized Breusch-Pagan test
##
## data: fit
## BP = 3.4135, df = 7, p-value = 0.8443
cat("\nDurbin-Watson Autocorrelation Test:\n")
##
## Durbin-Watson Autocorrelation Test:
print(residual_result_two_knots$dw_test)
##
## Durbin-Watson test
##
## data: fit
## DW = 1.5391, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")
##
## Kesimpulan:
print(residual_result_two_knots$kesimpulan)
## $normalitas
## [1] "Residual tidak berdistribusi normal (p <= 0,05)."
##
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
##
## $autokorelasi
## [1] "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
gcv_results_three_knots = read.csv("gcv_results_three_knots.csv")
para = 0
# Load necessary libraries
library(lmtest)
library(splines)
# Define the function to perform residual diagnostics for three knots
uji_residual_three_knots <- function(data_analisis, knot_data, para) {
# Pilih knot dengan GCV terkecil
min_gcv_index <- which.min(knot_data$gcv)
min_knots <- as.numeric(knot_data[min_gcv_index, 1:9])
# Buat model regresi spline dengan 3 titik knot
fit <- lm(y ~ ns(x1, knots = min_knots[1:3]) + ns(x2, knots = min_knots[4:6]) + ns(x3, knots = min_knots[7:9]),
data = data_analisis)
# Hitung residual
residuals <- residuals(fit)
# Uji Normalitas Residual (Shapiro-Wilk Test)
shapiro_test <- shapiro.test(residuals)
# Uji Homoskedastisitas (Breusch-Pagan Test)
bp_test <- bptest(fit)
# Uji Autokorelasi (Durbin-Watson Test)
dw_test <- dwtest(fit)
# Menyusun Kesimpulan
kesimpulan <- list()
# Kesimpulan Normalitas
if (shapiro_test$p.value > 0.05) {
kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
} else {
kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
}
# Kesimpulan Homoskedastisitas
if (bp_test$p.value > 0.05) {
kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
} else {
kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
}
# Kesimpulan Autokorelasi
if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
} else {
kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
}
# Return hasil uji residual dan kesimpulan
return(list(
residuals = residuals,
shapiro_test = shapiro_test,
bp_test = bp_test,
dw_test = dw_test,
kesimpulan = kesimpulan
))
}
# Contoh Penggunaan Fungsi Uji Residual untuk 3 Titik Knot
residual_result_three_knots <- uji_residual_three_knots(data_analisis, gcv_results_three_knots, para = 1)
## Warning in dwtest(fit): imaginary parts of eigenvalues discarded
# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 3 Titik Knot:\n")
##
## Hasil Uji Residual dengan 3 Titik Knot:
cat("Shapiro-Wilk Normality Test:\n")
## Shapiro-Wilk Normality Test:
print(residual_result_three_knots$shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: residuals
## W = 0.934, p-value = 0.07782
cat("\nBreusch-Pagan Homoskedasticity Test:\n")
##
## Breusch-Pagan Homoskedasticity Test:
print(residual_result_three_knots$bp_test)
##
## studentized Breusch-Pagan test
##
## data: fit
## BP = 7.3192, df = 10, p-value = 0.695
cat("\nDurbin-Watson Autocorrelation Test:\n")
##
## Durbin-Watson Autocorrelation Test:
print(residual_result_three_knots$dw_test)
##
## Durbin-Watson test
##
## data: fit
## DW = 1.4733, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")
##
## Kesimpulan:
print(residual_result_three_knots$kesimpulan)
## $normalitas
## [1] "Residual berdistribusi normal (p > 0,05)."
##
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
##
## $autokorelasi
## [1] "Ada bukti autokorelasi (nilai DW jauh dari 2)."
# Load necessary libraries
library(ggplot2)
library(splines)
library(readr)
# Function to plot spline with 2 knots based on GCV data
plot_spline_from_gcv_two_knots <- function(file_path_data, file_path_gcv) {
# Load the dataset
data <- read_excel(file_path_data)
# Load the GCV results
gcv_data <- read_csv(file_path_gcv)
# Get the best knot points based on minimum GCV
best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
knot1_x <- best_gcv$knot1_x1
knot2_x <- best_gcv$knot2_x1
# Fit the model with the best knots
fit <- lm(y ~ ns(x1, knots = c(knot1_x, knot2_x)), data = data)
# Create a new data frame for plotting predictions
plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
plot_data$y_hat <- predict(fit, newdata = plot_data)
# Create the plot
ggplot(data, aes(x = x1, y = y)) +
geom_point(color = "red") + # original data points
geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
geom_vline(xintercept = c(knot1_x, knot2_x), linetype = "dashed", color = "grey") + # add vertical lines for knots
labs(title = "Spline dengan 2 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
theme_minimal()
}
# Example usage
plot_spline_from_gcv_two_knots("fileku.xlsx", "gcv_results_two_knots.csv")
## Rows: 7200 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3, gcv
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Function to plot spline with 1 knot based on GCV data
plot_spline_from_gcv_one_knot <- function(file_path_data, file_path_gcv) {
# Load the dataset
data <- read_excel(file_path_data)
# Load the GCV results
gcv_data <- read_csv(file_path_gcv)
# Get the best knot point based on minimum GCV
best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
knot_x <- best_gcv$knot_x1
# Fit the model with the best knot
fit <- lm(y ~ ns(x1, knots = knot_x), data = data)
# Create a new data frame for plotting predictions
plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
plot_data$y_hat <- predict(fit, newdata = plot_data)
# Create the plot
ggplot(data, aes(x = x1, y = y)) +
geom_point(color = "red") + # original data points
geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
geom_vline(xintercept = knot_x, linetype = "dashed", color = "grey") + # add vertical line for knot
labs(title = "Spline dengan 1 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
theme_minimal()
}
# Example usage
plot_spline_from_gcv_one_knot("fileku.xlsx", "gcv_results.csv")
## Rows: 320 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): knot_x1, knot_x2, knot_x3, gcv
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Function to plot spline with 3 knots based on GCV data
plot_spline_from_gcv_three_knots <- function(file_path_data, file_path_gcv) {
# Load the dataset
data <- read_excel(file_path_data)
# Load the GCV results
gcv_data <- read_csv(file_path_gcv)
# Get the best knot points based on minimum GCV
best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
knot1_x <- best_gcv$knot1_x1
knot2_x <- best_gcv$knot2_x1
knot3_x <- best_gcv$knot3_x1
# Fit the model with the best knots
fit <- lm(y ~ ns(x1, knots = c(knot1_x, knot2_x, knot3_x)), data = data)
# Create a new data frame for plotting predictions
plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
plot_data$y_hat <- predict(fit, newdata = plot_data)
# Create the plot
ggplot(data, aes(x = x1, y = y)) +
geom_point(color = "red") + # original data points
geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
geom_vline(xintercept = c(knot1_x, knot2_x, knot3_x), linetype = "dashed", color = "grey") + # add vertical lines for knots
labs(title = "Spline dengan 3 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
theme_minimal()
}
# Example usage
plot_spline_from_gcv_three_knots("fileku.xlsx", "gcv_results_three_knots.csv")
## Rows: 3360 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
