REGRESI NONPARAMETRIK SPLINE

# Load necessary libraries
library(splines)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

# Define the function to calculate GCV and get the top 10 smallest GCV values
calculate_top_10_gcv <- function(file_path) {
  # Load the dataset
  data <- read_excel(file_path)
  
  # Define the GCV function
  gcv_function <- function(data, knot_x1, knot_x2, knot_x3) {
    n <- nrow(data)
    y <- data$y
    
    # Fit the model using a natural spline with 1 knot for each predictor
    fit <- lm(y ~ ns(x1, knots = knot_x1) + ns(x2, knots = knot_x2) + ns(x3, knots = knot_x3), data = data)
    
    # Calculate the mean squared error (MSE)
    mse <- mean(residuals(fit)^2)
    
    # Calculate the hat matrix
    hat_matrix <- hatvalues(fit)
    
    # Calculate GCV
    gcv <- mse / ((1 - mean(hat_matrix))^2)
    return(gcv)
  }
  
  # Define possible knot points (e.g., unique values of x1, x2, and x3)
  knot_points_x1 <- unique(data$x1)
  knot_points_x2 <- unique(data$x2)
  knot_points_x3 <- unique(data$x3)
  
  # Initialize a dataframe to store GCV results
  gcv_results <- data.frame(knot_x1 = numeric(), knot_x2 = numeric(), knot_x3 = numeric(), gcv = numeric())
  
  # Calculate GCV for each combination of knot points
  for (knot_x1 in knot_points_x1) {
    for (knot_x2 in knot_points_x2) {
      for (knot_x3 in knot_points_x3) {
        gcv_value <- tryCatch({
          gcv_function(data, knot_x1, knot_x2, knot_x3)
        }, error = function(e) {
          NA  # Return NA if there's an error
        })
        gcv_results <- rbind(gcv_results, data.frame(knot_x1 = knot_x1, knot_x2 = knot_x2, knot_x3 = knot_x3, gcv = gcv_value))
      }
    }
  }
  
  # Remove NA values
  gcv_results <- na.omit(gcv_results)
  
  # Save all the GCV results to a CSV file
  write.csv(gcv_results, "gcv_results.csv", row.names = FALSE)
  
  # Sort by GCV value and get the 10 smallest GCV values
  top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
  
  # Return the results
  return(top_10_gcv)
}

# Define the function to calculate GCV and get the top 10 smallest GCV values for two knot points
calculate_top_10_gcv_two_knots <- function(file_path) {
  # Load the dataset
  data <- read_excel(file_path)
  
  # Define the GCV function for two knot points
  gcv_function <- function(data, knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3) {
    n <- nrow(data)
    y <- data$y
    
    # Fit the model using a natural spline with 2 knots for each predictor
    fit <- lm(y ~ ns(x1, knots = c(knot1_x1, knot2_x1)) + ns(x2, knots = c(knot1_x2, knot2_x2)) + ns(x3, knots = c(knot1_x3, knot2_x3)), data = data)
    
    # Calculate the mean squared error (MSE)
    mse <- mean(residuals(fit)^2)
    
    # Calculate the hat matrix
    hat_matrix <- hatvalues(fit)
    
    # Calculate GCV
    gcv <- mse / ((1 - mean(hat_matrix))^2)
    return(gcv)
  }
  
  # Define possible knot points (e.g., unique values of x1, x2, and x3)
  knot_points_x1 <- unique(data$x1)
  knot_points_x2 <- unique(data$x2)
  knot_points_x3 <- unique(data$x3)
  
  # Initialize a dataframe to store GCV results
  gcv_results <- data.frame(knot1_x1 = numeric(), knot2_x1 = numeric(), knot1_x2 = numeric(), knot2_x2 = numeric(), knot1_x3 = numeric(), knot2_x3 = numeric(), gcv = numeric())
  
  # Calculate GCV for each combination of two knot points
  for (knot1_x1 in knot_points_x1) {
    for (knot2_x1 in knot_points_x1[knot_points_x1 > knot1_x1]) {
      for (knot1_x2 in knot_points_x2) {
        for (knot2_x2 in knot_points_x2[knot_points_x2 > knot1_x2]) {
          for (knot1_x3 in knot_points_x3) {
            for (knot2_x3 in knot_points_x3[knot_points_x3 > knot1_x3]) {
              gcv_value <- tryCatch({
                gcv_function(data, knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3)
              }, error = function(e) {
                NA  # Return NA if there's an error
              })
              gcv_results <- rbind(gcv_results, data.frame(knot1_x1 = knot1_x1, knot2_x1 = knot2_x1, knot1_x2 = knot1_x2, knot2_x2 = knot2_x2, knot1_x3 = knot1_x3, knot2_x3 = knot2_x3, gcv = gcv_value))
            }
          }
        }
      }
    }
  }
  
  # Remove NA values
  gcv_results <- na.omit(gcv_results)
  
  # Save all the GCV results to a CSV file
  write.csv(gcv_results, "gcv_results_two_knots.csv", row.names = FALSE)
  
  # Sort by GCV value and get the 10 smallest GCV values
  top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
  
  # Return the results
  return(top_10_gcv)
}

# Define the function to calculate GCV and get the top 10 smallest GCV values for three knot points
calculate_top_10_gcv_three_knots <- function(file_path, sample_size = 10) {
  # Load the dataset
  data <- read_excel(file_path)
  
  # Define the GCV function for three knot points
  gcv_function <- function(data, knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_x3, knot2_x3, knot3_x3) {
    n <- nrow(data)
    y <- data$y
    
    # Fit the model using a natural spline with 3 knots for each predictor
    fit <- lm(y ~ ns(x1, knots = c(knot1_x1, knot2_x1, knot3_x1)) + ns(x2, knots = c(knot1_x2, knot2_x2, knot3_x2)) + ns(x3, knots = c(knot1_x3, knot2_x3, knot3_x3)), data = data)
    
    # Calculate the mean squared error (MSE)
    mse <- mean(residuals(fit)^2)
    
    # Calculate the hat matrix
    hat_matrix <- hatvalues(fit)
    
    # Calculate GCV
    gcv <- mse / ((1 - mean(hat_matrix))^2)
    return(gcv)
  }
  
  # Define possible knot points (e.g., unique values of x1, x2, and x3)
  set.seed(123)  # For reproducibility
  knot_points_x1 <- sample(unique(data$x1), min(sample_size, length(unique(data$x1))))
  knot_points_x2 <- sample(unique(data$x2), min(sample_size, length(unique(data$x2))))
  knot_points_x3 <- sample(unique(data$x3), min(sample_size, length(unique(data$x3))))
  
  # Initialize a dataframe to store GCV results
  gcv_results <- data.frame(knot1_x1 = numeric(), knot2_x1 = numeric(), knot3_x1 = numeric(), 
                            knot1_x2 = numeric(), knot2_x2 = numeric(), knot3_x2 = numeric(), 
                            knot1_x3 = numeric(), knot2_x3 = numeric(), knot3_x3 = numeric(), gcv = numeric())
  
  # Calculate GCV for each combination of three knot points
  for (knot1_x1 in knot_points_x1) {
    for (knot2_x1 in knot_points_x1[knot_points_x1 > knot1_x1]) {
      for (knot3_x1 in knot_points_x1[knot_points_x1 > knot2_x1]) {
        for (knot1_x2 in knot_points_x2) {
          for (knot2_x2 in knot_points_x2[knot_points_x2 > knot1_x2]) {
            for (knot3_x2 in knot_points_x2[knot_points_x2 > knot2_x2]) {
              for (knot1_x3 in knot_points_x3) {
                for (knot2_x3 in knot_points_x3[knot_points_x3 > knot1_x3]) {
                  for (knot3_x3 in knot_points_x3[knot_points_x3 > knot2_x3]) {
                    gcv_value <- tryCatch({
                      gcv_function(data, knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_x3, knot2_x3, knot3_x3)
                    }, error = function(e) {
                      NA  # Return NA if there's an error
                    })
                    gcv_results <- rbind(gcv_results, data.frame(knot1_x1 = knot1_x1, knot2_x1 = knot2_x1, knot3_x1 = knot3_x1,
                                                                 knot1_x2 = knot1_x2, knot2_x2 = knot2_x2, knot3_x2 = knot3_x2,
                                                                 knot1_x3 = knot1_x3, knot2_x3 = knot2_x3, knot3_x3 = knot3_x3, gcv = gcv_value))
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  
  # Remove NA values
  gcv_results <- na.omit(gcv_results)
  
  # Save all the GCV results to a CSV file
  write.csv(gcv_results, "gcv_results_three_knots.csv", row.names = FALSE)
  
  # Sort by GCV value and get the 10 smallest GCV values
  top_10_gcv <- gcv_results %>% arrange(gcv) %>% head(10)
  
  # Return the results
  return(top_10_gcv)
}

# Define the function to read the CSV files and display the top 10 GCV values
display_top_10_gcv <- function(file_path) {
  # Read the CSV file
  data <- read.csv(file_path)
  
  # Get the top 10 smallest GCV values
  top_10_gcv <- data %>% arrange(gcv) %>% head(10)
  
  # Print the top 10 GCV values
  print(top_10_gcv)
}

# Define the function to estimate parameters
estimate_parameters <- function(data_analisis, knot_data, para) {
  data <- data_analisis
  data <- as.matrix(data)
  
  p <- nrow(data)
  q <- ncol(data)
  F <- matrix(0, nrow = p, ncol = p)
  diag(F) <- 1
  
  min_gcv_index <- which.min(knot_data$gcv)
  min_knot <- as.numeric(knot_data[min_gcv_index, 1:3])
  
  aa <- rep(1, p)
  m <- ncol(data) - para - 1
  data1 <- matrix(ncol = length(min_knot), nrow = p)
  data2 <- data[, (para + 2):q]
  
  for (i in 1:length(min_knot)) {
    for (k in 1:p) {
      if (data2[k, ceiling(i/3)] < min_knot[i]) {
        data1[k, i] <- 0
      } else {
        data1[k, i] <- data2[k, ceiling(i/3)] - min_knot[i]
      }
    }
  }
  
  mx <- cbind(aa, data[, 2:q], data1)
  mx <- as.matrix(mx)
  C <- MASS::ginv(t(mx) %*% mx)
  B <- C %*% (t(mx) %*% data[, 1])
  
  return(list(B = B, mx = mx, C = C))
}

# Define the function for simultaneous F-test
uji_simultan <- function(data_analisis, knot_data, para) {
  estimasi <- estimate_parameters(data_analisis, knot_data, para)
  B <- estimasi$B
  mx <- estimasi$mx
  data <- data_analisis
  data <- as.matrix(data)
  
  p <- nrow(data)
  y <- data[, 1]
  yhat <- mx %*% B
  
  SSR <- sum((yhat - mean(y))^2)
  SSE <- sum((y - yhat)^2)
  MSR <- SSR / (length(B) - 1)
  MSE <- SSE / (p - length(B))
  
  F_stat <- MSR / MSE
  df1 <- length(B) - 1
  df2 <- p - length(B)
  
  p_value <- 1 - pf(F_stat, df1, df2)
  
  return(list(SSR = SSR, SSE = SSE, MSR = MSR, MSE = MSE, F_stat = F_stat, p_value = p_value, df1 = df1, df2 = df2))
}

# Define the function for partial t-test
uji_parsial <- function(data_analisis, knot_data, para, alpha = 0.05) {
  estimasi <- estimate_parameters(data_analisis, knot_data, para)
  B <- estimasi$B
  C <- estimasi$C
  data <- data_analisis
  data <- as.matrix(data)
  
  p <- nrow(data)
  q <- ncol(data)
  mx <- estimasi$mx
  y <- data[, 1]
  yhat <- mx %*% B
  SSE <- sum((y - yhat)^2)
  MSE <- SSE / (p - length(B))
  SE <- sqrt(diag(MSE * C))
  
  t_stat <- B / SE
  df <- p - length(B)
  p_values <- 2 * (1 - pt(abs(t_stat), df))
  signifikan <- ifelse(p_values < alpha, "Signifikan", "Tidak Signifikan")
  
  return(data.frame(Parameter = B, SE = SE, t_stat = t_stat, p_value = p_values, Signifikan = signifikan))
}

# File paths
file_path <- "fileku.xlsx"

# Call the GCV functions and display the results
top_10_gcv_results <- calculate_top_10_gcv(file_path)
print(top_10_gcv_results)

##    knot_x1 knot_x2 knot_x3      gcv
## 1     0.17     2.8     1.2 8.561401
## 2     0.17     3.2     1.2 8.821678
## 3     0.21     2.8     1.2 8.916781
## 4     0.20     2.8     1.2 8.916844
## 5     0.19     2.8     1.2 8.919435
## 6     0.18     2.8     1.2 8.923331
## 7     0.24     2.8     1.2 8.926404
## 8     0.25     2.8     1.2 8.930552
## 9     0.27     2.8     1.2 8.938791
## 10    0.28     2.8     1.2 8.944000

top_10_gcv_results_two_knots <- calculate_top_10_gcv_two_knots(file_path)
print(top_10_gcv_results_two_knots)

##    knot1_x1 knot2_x1 knot1_x2 knot2_x2 knot1_x3 knot2_x3      gcv
## 1      0.17     0.20      3.2      3.4      1.2      2.4 8.716636
## 2      0.17     0.19      3.2      3.4      1.2      2.4 8.717171
## 3      0.17     0.21      3.2      3.4      1.2      2.4 8.718834
## 4      0.17     0.18      3.2      3.4      1.2      2.4 8.719118
## 5      0.17     0.20      3.2      3.4      1.2      3.6 8.725173
## 6      0.17     0.19      3.2      3.4      1.2      3.6 8.725660
## 7      0.17     0.21      3.2      3.4      1.2      3.6 8.727389
## 8      0.17     0.18      3.2      3.4      1.2      3.6 8.727560
## 9      0.17     0.20      3.2      3.4      1.2      4.8 8.735309
## 10     0.17     0.19      3.2      3.4      1.2      4.8 8.735684

top_10_gcv_results_three_knots <- calculate_top_10_gcv_three_knots(file_path)
print(top_10_gcv_results_three_knots)

##    knot1_x1 knot2_x1 knot3_x1 knot1_x2 knot2_x2 knot3_x2 knot1_x3 knot2_x3
## 1      0.18      0.2     0.24      2.7      3.2      3.4      1.2      4.8
## 2      0.18      0.2     0.24      2.7      3.2      3.4      1.2      3.6
## 3      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 4      0.18      0.2     0.24      2.7      3.2      3.4      3.6      4.8
## 5      0.18      0.2     0.28      2.7      3.2      3.4      1.2      4.8
## 6      0.18      0.2     0.24      2.7      3.2      3.4      1.2      3.6
## 7      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 8      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 9      0.18      0.2     0.24      2.7      3.2      3.4      2.4      4.8
## 10     0.18      0.2     0.28      2.7      3.2      3.4      1.2      3.6
##    knot3_x3      gcv
## 1       6.0 8.826686
## 2       6.0 9.267963
## 3       6.0 9.406265
## 4       6.0 9.480160
## 5       6.0 9.554743
## 6       4.8 9.568576
## 7       4.8 9.607318
## 8       3.6 9.664246
## 9       6.0 9.718161
## 10      6.0 9.936421

# Display the top 10 GCV values for each file
file_paths <- c("gcv_results.csv", "gcv_results_two_knots.csv", "gcv_results_three_knots.csv")
for (file_path in file_paths) {
  cat("\nTop 10 GCV values for:", file_path, "\n")
  display_top_10_gcv(file_path)
}

## 
## Top 10 GCV values for: gcv_results.csv 
##    knot_x1 knot_x2 knot_x3      gcv
## 1     0.17     2.8     1.2 8.561401
## 2     0.17     3.2     1.2 8.821678
## 3     0.21     2.8     1.2 8.916781
## 4     0.20     2.8     1.2 8.916844
## 5     0.19     2.8     1.2 8.919435
## 6     0.18     2.8     1.2 8.923331
## 7     0.24     2.8     1.2 8.926404
## 8     0.25     2.8     1.2 8.930552
## 9     0.27     2.8     1.2 8.938791
## 10    0.28     2.8     1.2 8.944000
## 
## Top 10 GCV values for: gcv_results_two_knots.csv 
##    knot1_x1 knot2_x1 knot1_x2 knot2_x2 knot1_x3 knot2_x3      gcv
## 1      0.17     0.20      3.2      3.4      1.2      2.4 8.716636
## 2      0.17     0.19      3.2      3.4      1.2      2.4 8.717171
## 3      0.17     0.21      3.2      3.4      1.2      2.4 8.718834
## 4      0.17     0.18      3.2      3.4      1.2      2.4 8.719118
## 5      0.17     0.20      3.2      3.4      1.2      3.6 8.725173
## 6      0.17     0.19      3.2      3.4      1.2      3.6 8.725660
## 7      0.17     0.21      3.2      3.4      1.2      3.6 8.727389
## 8      0.17     0.18      3.2      3.4      1.2      3.6 8.727560
## 9      0.17     0.20      3.2      3.4      1.2      4.8 8.735309
## 10     0.17     0.19      3.2      3.4      1.2      4.8 8.735684
## 
## Top 10 GCV values for: gcv_results_three_knots.csv 
##    knot1_x1 knot2_x1 knot3_x1 knot1_x2 knot2_x2 knot3_x2 knot1_x3 knot2_x3
## 1      0.18      0.2     0.24      2.7      3.2      3.4      1.2      4.8
## 2      0.18      0.2     0.24      2.7      3.2      3.4      1.2      3.6
## 3      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 4      0.18      0.2     0.24      2.7      3.2      3.4      3.6      4.8
## 5      0.18      0.2     0.28      2.7      3.2      3.4      1.2      4.8
## 6      0.18      0.2     0.24      2.7      3.2      3.4      1.2      3.6
## 7      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 8      0.18      0.2     0.24      2.7      3.2      3.4      1.2      2.4
## 9      0.18      0.2     0.24      2.7      3.2      3.4      2.4      4.8
## 10     0.18      0.2     0.28      2.7      3.2      3.4      1.2      3.6
##    knot3_x3      gcv
## 1       6.0 8.826686
## 2       6.0 9.267963
## 3       6.0 9.406265
## 4       6.0 9.480160
## 5       6.0 9.554743
## 6       4.8 9.568576
## 7       4.8 9.607318
## 8       3.6 9.664246
## 9       6.0 9.718161
## 10      6.0 9.936421

# Load the data and run parameter estimation and tests
data_analisis <- read_excel("fileku.xlsx")
knot_data <- read.csv("gcv_results.csv")

# Example usage
para <- 1  # Set your value for para
result <- estimate_parameters(data_analisis, knot_data, para)
print(result$B)

##              [,1]
## [1,]  76.21753492
## [2,] -11.49219917
## [3,]  17.92992821
## [4,]   0.09086878
## [5,]   4.97294728
## [6,]  54.14829813
## [7,] -73.53111369

print(result$mx)

##       aa   x1  x2  x3             
##  [1,]  1 0.27 3.6 3.6 3.43 0.8 2.4
##  [2,]  1 0.36 3.6 6.0 3.43 0.8 2.4
##  [3,]  1 0.34 3.4 2.4 3.23 0.6 2.2
##  [4,]  1 0.44 3.6 1.2 3.43 0.8 2.4
##  [5,]  1 0.28 3.6 4.8 3.43 0.8 2.4
##  [6,]  1 0.36 3.6 2.4 3.43 0.8 2.4
##  [7,]  1 0.35 3.6 7.2 3.43 0.8 2.4
##  [8,]  1 0.35 2.7 1.2 2.53 0.0 1.5
##  [9,]  1 0.32 3.6 3.6 3.43 0.8 2.4
## [10,]  1 0.21 2.8 2.4 2.63 0.0 1.6
## [11,]  1 0.19 3.6 6.0 3.43 0.8 2.4
## [12,]  1 0.34 3.6 3.6 3.43 0.8 2.4
## [13,]  1 0.31 3.6 2.4 3.43 0.8 2.4
## [14,]  1 0.39 3.2 4.8 3.03 0.4 2.0
## [15,]  1 0.36 3.6 2.4 3.43 0.8 2.4
## [16,]  1 0.30 3.4 1.2 3.23 0.6 2.2
## [17,]  1 0.17 3.6 3.6 3.43 0.8 2.4
## [18,]  1 0.21 3.6 4.8 3.43 0.8 2.4
## [19,]  1 0.24 3.6 1.2 3.43 0.8 2.4
## [20,]  1 0.18 3.6 3.6 3.43 0.8 2.4
## [21,]  1 0.20 3.6 2.4 3.43 0.8 2.4
## [22,]  1 0.35 3.6 1.2 3.43 0.8 2.4
## [23,]  1 0.32 3.6 3.6 3.43 0.8 2.4
## [24,]  1 0.25 3.6 1.2 3.43 0.8 2.4
## [25,]  1 0.35 2.8 4.8 2.63 0.0 1.6
## [26,]  1 0.19 3.6 4.8 3.43 0.8 2.4
## [27,]  1 0.21 3.6 2.4 3.43 0.8 2.4
## [28,]  1 0.30 3.6 2.4 3.43 0.8 2.4

print(result$C)

##              [,1]        [,2]         [,3]         [,4]         [,5]
## [1,]  238.1368799 -4.22391000  45.56577510  0.388179311  5.082505536
## [2,]   -4.2239100  7.04110997  -1.14164363  0.019201235 -0.423578935
## [3,]   45.5657751 -1.14164363   8.74315059  0.067394182  0.996968832
## [4,]    0.3881793  0.01920123   0.06739418  0.014346745  0.001403699
## [5,]    5.0825055 -0.42357893   0.99696883  0.001403699  0.132942891
## [6,]  196.1645229 -2.14279862  37.39233931  0.334596233  4.044370441
## [7,] -240.1984808  3.92704837 -45.93577953 -0.398420991 -5.102037811
##              [,6]        [,7]
## [1,]  196.1645229 -240.198481
## [2,]   -2.1427986    3.927048
## [3,]   37.3923393  -45.935780
## [4,]    0.3345962   -0.398421
## [5,]    4.0443704   -5.102038
## [6,]  162.8134217 -198.005088
## [7,] -198.0050881  242.302397

# Simultaneous F-test
simultan_result <- uji_simultan(data_analisis, knot_data, para)
print(simultan_result)

## $SSR
## [1] 41.72565
## 
## $SSE
## [1] 170.8344
## 
## $MSR
## [1] 6.954274
## 
## $MSE
## [1] 8.134969
## 
## $F_stat
## [1] 0.8548618
## 
## $p_value
## [1] 0.5431029
## 
## $df1
## [1] 6
## 
## $df2
## [1] 21

# Partial t-test
parsial_result <- uji_parsial(data_analisis, knot_data, para)
print(parsial_result)

##      Parameter         SE     t_stat      p_value       Signifikan
## 1  76.21753492 44.0140453  1.7316639 0.0979994247 Tidak Signifikan
## 2 -11.49219917  7.5683032 -1.5184644 0.1438096067 Tidak Signifikan
## 3  17.92992821  8.4335794  2.1260164 0.0455274212       Signifikan
## 4   0.09086878  0.3416289  0.2659868 0.7928433601 Tidak Signifikan
## 5   4.97294728  1.0399453  4.7819314 0.0001004632       Signifikan
## 6  54.14829813 36.3934358  1.4878589 0.1516511582 Tidak Signifikan
## 7 -73.53111369 44.3973258 -1.6562059 0.1125476091 Tidak Signifikan

data_analisis = read_excel("fileku.xlsx")
gcv_results = read.csv("gcv_results.csv")
para = 0


# Load necessary libraries
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(splines)

# Define the function to perform residual diagnostics for single knot
uji_residual_single_knot <- function(data_analisis, knot_data, para) {
  # Pilih knot dengan GCV terkecil
  min_gcv_index <- which.min(knot_data$gcv)
  min_knots <- as.numeric(knot_data[min_gcv_index, 1:3])
  
  # Buat model regresi spline dengan 1 titik knot
  fit <- lm(y ~ ns(x1, knots = min_knots[1]) + ns(x2, knots = min_knots[2]) + ns(x3, knots = min_knots[3]), 
            data = data_analisis)
  
  # Hitung residual
  residuals <- residuals(fit)
  
  # Uji Normalitas Residual (Shapiro-Wilk Test)
  shapiro_test <- shapiro.test(residuals)
  
  # Uji Homoskedastisitas (Breusch-Pagan Test)
  bp_test <- bptest(fit)
  
  # Uji Autokorelasi (Durbin-Watson Test)
  dw_test <- dwtest(fit)
  
  # Menyusun Kesimpulan
  kesimpulan <- list()
  
  # Kesimpulan Normalitas
  if (shapiro_test$p.value > 0.05) {
    kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
  } else {
    kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
  }
  
  # Kesimpulan Homoskedastisitas
  if (bp_test$p.value > 0.05) {
    kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
  } else {
    kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
  }
  
  # Kesimpulan Autokorelasi
  if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
    kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
  } else {
    kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
  }
  
  # Return hasil uji residual dan kesimpulan
  return(list(
    residuals = residuals,
    shapiro_test = shapiro_test,
    bp_test = bp_test,
    dw_test = dw_test,
    kesimpulan = kesimpulan
  ))
}

# Contoh Penggunaan Fungsi Uji Residual untuk 1 Titik Knot
residual_result_single_knot <- uji_residual_single_knot(data_analisis, gcv_results, para = 1)

## Warning in dwtest(fit): imaginary parts of eigenvalues discarded

# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 1 Titik Knot:\n")

## 
## Hasil Uji Residual dengan 1 Titik Knot:

cat("Shapiro-Wilk Normality Test:\n")

## Shapiro-Wilk Normality Test:

print(residual_result_single_knot$shapiro_test)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.9416, p-value = 0.1214

cat("\nBreusch-Pagan Homoskedasticity Test:\n")

## 
## Breusch-Pagan Homoskedasticity Test:

print(residual_result_single_knot$bp_test)

## 
##  studentized Breusch-Pagan test
## 
## data:  fit
## BP = 1.2964, df = 4, p-value = 0.862

cat("\nDurbin-Watson Autocorrelation Test:\n")

## 
## Durbin-Watson Autocorrelation Test:

print(residual_result_single_knot$dw_test)

## 
##  Durbin-Watson test
## 
## data:  fit
## DW = 1.58, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")

## 
## Kesimpulan:

print(residual_result_single_knot$kesimpulan)

## $normalitas
## [1] "Residual berdistribusi normal (p > 0,05)."
## 
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
## 
## $autokorelasi
## [1] "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."

gcv_results_two_knots = read.csv("gcv_results_two_knots.csv")
para = 0

# Load necessary libraries
library(lmtest)
library(splines)

# Define the function to perform residual diagnostics for two knots
uji_residual_two_knots <- function(data_analisis, knot_data, para) {
  # Pilih knot dengan GCV terkecil
  min_gcv_index <- which.min(knot_data$gcv)
  min_knots <- as.numeric(knot_data[min_gcv_index, 1:6])
  
  # Buat model regresi spline dengan 2 titik knot
  fit <- lm(y ~ ns(x1, knots = min_knots[1:2]) + ns(x2, knots = min_knots[3:4]) + ns(x3, knots = min_knots[5:6]), 
            data = data_analisis)
  
  # Hitung residual
  residuals <- residuals(fit)
  
  # Uji Normalitas Residual (Shapiro-Wilk Test)
  shapiro_test <- shapiro.test(residuals)
  
  # Uji Homoskedastisitas (Breusch-Pagan Test)
  bp_test <- bptest(fit)
  
  # Uji Autokorelasi (Durbin-Watson Test)
  dw_test <- dwtest(fit)
  
  # Menyusun Kesimpulan
  kesimpulan <- list()
  
  # Kesimpulan Normalitas
  if (shapiro_test$p.value > 0.05) {
    kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
  } else {
    kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
  }
  
  # Kesimpulan Homoskedastisitas
  if (bp_test$p.value > 0.05) {
    kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
  } else {
    kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
  }
  
  # Kesimpulan Autokorelasi
  if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
    kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
  } else {
    kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
  }
  
  # Return hasil uji residual dan kesimpulan
  return(list(
    residuals = residuals,
    shapiro_test = shapiro_test,
    bp_test = bp_test,
    dw_test = dw_test,
    kesimpulan = kesimpulan
  ))
}

# Contoh Penggunaan Fungsi Uji Residual untuk 2 Titik Knot
residual_result_two_knots <- uji_residual_two_knots(data_analisis, gcv_results_two_knots, para = 1)

## Warning in dwtest(fit): imaginary parts of eigenvalues discarded

# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 2 Titik Knot:\n")

## 
## Hasil Uji Residual dengan 2 Titik Knot:

cat("Shapiro-Wilk Normality Test:\n")

## Shapiro-Wilk Normality Test:

print(residual_result_two_knots$shapiro_test)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91931, p-value = 0.03335

cat("\nBreusch-Pagan Homoskedasticity Test:\n")

## 
## Breusch-Pagan Homoskedasticity Test:

print(residual_result_two_knots$bp_test)

## 
##  studentized Breusch-Pagan test
## 
## data:  fit
## BP = 3.4135, df = 7, p-value = 0.8443

cat("\nDurbin-Watson Autocorrelation Test:\n")

## 
## Durbin-Watson Autocorrelation Test:

print(residual_result_two_knots$dw_test)

## 
##  Durbin-Watson test
## 
## data:  fit
## DW = 1.5391, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")

## 
## Kesimpulan:

print(residual_result_two_knots$kesimpulan)

## $normalitas
## [1] "Residual tidak berdistribusi normal (p <= 0,05)."
## 
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
## 
## $autokorelasi
## [1] "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."

gcv_results_three_knots = read.csv("gcv_results_three_knots.csv")
para = 0

# Load necessary libraries
library(lmtest)
library(splines)

# Define the function to perform residual diagnostics for three knots
uji_residual_three_knots <- function(data_analisis, knot_data, para) {
  # Pilih knot dengan GCV terkecil
  min_gcv_index <- which.min(knot_data$gcv)
  min_knots <- as.numeric(knot_data[min_gcv_index, 1:9])
  
  # Buat model regresi spline dengan 3 titik knot
  fit <- lm(y ~ ns(x1, knots = min_knots[1:3]) + ns(x2, knots = min_knots[4:6]) + ns(x3, knots = min_knots[7:9]), 
            data = data_analisis)
  
  # Hitung residual
  residuals <- residuals(fit)
  
  # Uji Normalitas Residual (Shapiro-Wilk Test)
  shapiro_test <- shapiro.test(residuals)
  
  # Uji Homoskedastisitas (Breusch-Pagan Test)
  bp_test <- bptest(fit)
  
  # Uji Autokorelasi (Durbin-Watson Test)
  dw_test <- dwtest(fit)
  
  # Menyusun Kesimpulan
  kesimpulan <- list()
  
  # Kesimpulan Normalitas
  if (shapiro_test$p.value > 0.05) {
    kesimpulan$normalitas <- "Residual berdistribusi normal (p > 0,05)."
  } else {
    kesimpulan$normalitas <- "Residual tidak berdistribusi normal (p <= 0,05)."
  }
  
  # Kesimpulan Homoskedastisitas
  if (bp_test$p.value > 0.05) {
    kesimpulan$homoskedastisitas <- "Tidak ada bukti heteroskedastisitas (p > 0,05)."
  } else {
    kesimpulan$homoskedastisitas <- "Ada bukti heteroskedastisitas (p <= 0,05)."
  }
  
  # Kesimpulan Autokorelasi
  if (dw_test$statistic > 1.5 && dw_test$statistic < 2.5) {
    kesimpulan$autokorelasi <- "Tidak ada bukti autokorelasi (nilai DW mendekati 2)."
  } else {
    kesimpulan$autokorelasi <- "Ada bukti autokorelasi (nilai DW jauh dari 2)."
  }
  
  # Return hasil uji residual dan kesimpulan
  return(list(
    residuals = residuals,
    shapiro_test = shapiro_test,
    bp_test = bp_test,
    dw_test = dw_test,
    kesimpulan = kesimpulan
  ))
}

# Contoh Penggunaan Fungsi Uji Residual untuk 3 Titik Knot
residual_result_three_knots <- uji_residual_three_knots(data_analisis, gcv_results_three_knots, para = 1)

## Warning in dwtest(fit): imaginary parts of eigenvalues discarded

# Tampilkan Hasil Uji Residual
cat("\nHasil Uji Residual dengan 3 Titik Knot:\n")

## 
## Hasil Uji Residual dengan 3 Titik Knot:

cat("Shapiro-Wilk Normality Test:\n")

## Shapiro-Wilk Normality Test:

print(residual_result_three_knots$shapiro_test)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.934, p-value = 0.07782

cat("\nBreusch-Pagan Homoskedasticity Test:\n")

## 
## Breusch-Pagan Homoskedasticity Test:

print(residual_result_three_knots$bp_test)

## 
##  studentized Breusch-Pagan test
## 
## data:  fit
## BP = 7.3192, df = 10, p-value = 0.695

cat("\nDurbin-Watson Autocorrelation Test:\n")

## 
## Durbin-Watson Autocorrelation Test:

print(residual_result_three_knots$dw_test)

## 
##  Durbin-Watson test
## 
## data:  fit
## DW = 1.4733, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

# Tampilkan Kesimpulan
cat("\nKesimpulan:\n")

## 
## Kesimpulan:

print(residual_result_three_knots$kesimpulan)

## $normalitas
## [1] "Residual berdistribusi normal (p > 0,05)."
## 
## $homoskedastisitas
## [1] "Tidak ada bukti heteroskedastisitas (p > 0,05)."
## 
## $autokorelasi
## [1] "Ada bukti autokorelasi (nilai DW jauh dari 2)."

# Load necessary libraries
library(ggplot2)
library(splines)
library(readr)

# Function to plot spline with 2 knots based on GCV data
plot_spline_from_gcv_two_knots <- function(file_path_data, file_path_gcv) {
  # Load the dataset
  data <- read_excel(file_path_data)
  
  # Load the GCV results
  gcv_data <- read_csv(file_path_gcv)
  
  # Get the best knot points based on minimum GCV
  best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
  knot1_x <- best_gcv$knot1_x1
  knot2_x <- best_gcv$knot2_x1
  
  # Fit the model with the best knots
  fit <- lm(y ~ ns(x1, knots = c(knot1_x, knot2_x)), data = data)
  
  # Create a new data frame for plotting predictions
  plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
  plot_data$y_hat <- predict(fit, newdata = plot_data)
  
  # Create the plot
  ggplot(data, aes(x = x1, y = y)) +
    geom_point(color = "red") + # original data points
    geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
    geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
    geom_vline(xintercept = c(knot1_x, knot2_x), linetype = "dashed", color = "grey") + # add vertical lines for knots
    labs(title = "Spline dengan 2 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
    theme_minimal()
}

# Example usage
plot_spline_from_gcv_two_knots("fileku.xlsx", "gcv_results_two_knots.csv")

## Rows: 7200 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): knot1_x1, knot2_x1, knot1_x2, knot2_x2, knot1_x3, knot2_x3, gcv
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Function to plot spline with 1 knot based on GCV data
plot_spline_from_gcv_one_knot <- function(file_path_data, file_path_gcv) {
  # Load the dataset
  data <- read_excel(file_path_data)
  
  # Load the GCV results
  gcv_data <- read_csv(file_path_gcv)
  
  # Get the best knot point based on minimum GCV
  best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
  knot_x <- best_gcv$knot_x1
  
  # Fit the model with the best knot
  fit <- lm(y ~ ns(x1, knots = knot_x), data = data)
  
  # Create a new data frame for plotting predictions
  plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
  plot_data$y_hat <- predict(fit, newdata = plot_data)
  
  # Create the plot
  ggplot(data, aes(x = x1, y = y)) +
    geom_point(color = "red") + # original data points
    geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
    geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
    geom_vline(xintercept = knot_x, linetype = "dashed", color = "grey") + # add vertical line for knot
    labs(title = "Spline dengan 1 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
    theme_minimal()
}

# Example usage
plot_spline_from_gcv_one_knot("fileku.xlsx", "gcv_results.csv")

## Rows: 320 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): knot_x1, knot_x2, knot_x3, gcv
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Function to plot spline with 3 knots based on GCV data
plot_spline_from_gcv_three_knots <- function(file_path_data, file_path_gcv) {
  # Load the dataset
  data <- read_excel(file_path_data)
  
  # Load the GCV results
  gcv_data <- read_csv(file_path_gcv)
  
  # Get the best knot points based on minimum GCV
  best_gcv <- gcv_data[which.min(gcv_data$gcv), ]
  knot1_x <- best_gcv$knot1_x1
  knot2_x <- best_gcv$knot2_x1
  knot3_x <- best_gcv$knot3_x1
  
  # Fit the model with the best knots
  fit <- lm(y ~ ns(x1, knots = c(knot1_x, knot2_x, knot3_x)), data = data)
  
  # Create a new data frame for plotting predictions
  plot_data <- data.frame(x1 = seq(min(data$x1), max(data$x1), length.out = 100))
  plot_data$y_hat <- predict(fit, newdata = plot_data)
  
  # Create the plot
  ggplot(data, aes(x = x1, y = y)) +
    geom_point(color = "red") + # original data points
    geom_line(data = plot_data, aes(x = x1, y = y_hat), color = "blue", size = 1) + # spline line
    geom_text(aes(label = round(y, 2)), vjust = -0.5, color = "red", size = 3) + # add text labels for the points
    geom_vline(xintercept = c(knot1_x, knot2_x, knot3_x), linetype = "dashed", color = "grey") + # add vertical lines for knots
    labs(title = "Spline dengan 3 Titik Knot Berdasarkan GCV", x = "x1", y = "Y") + # add GCV label
    theme_minimal()
}

# Example usage
plot_spline_from_gcv_three_knots("fileku.xlsx", "gcv_results_three_knots.csv")

## Rows: 3360 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): knot1_x1, knot2_x1, knot3_x1, knot1_x2, knot2_x2, knot3_x2, knot1_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

REGRESI NONPARAMETRIK SPLINE

taufiq hidayah abdullah

2024-07-30