Pendahuluan

Dokumen ini dibuat sebagai latihan penggunaan R Markdown. Analisis yang digunakan adalah contoh syntax R mengenai pemodelan harga penutupan saham BBNI menggunakan metode Feed Forward Neural Network (FFNN).

Tahapan analisis meliputi import data, preprocessing, eksplorasi data, pembagian data training dan testing, pembentukan variabel lag, pelatihan model neural network, evaluasi model, serta visualisasi hasil prediksi.

Load Package

library(readxl)
library(dplyr)
library(ggplot2)
library(lubridate)
library(neuralnet)
library(forecast)

seed_nrp <- 001
set.seed(seed_nrp)

Import dan Preprocessing Data

data <- read_excel("C:/Users/LENOVO/Downloads/BBNI.xlsx")
names(data) <- trimws(names(data))

if (!("Date" %in% names(data))) names(data)[1] <- "Date"
if (!("Close" %in% names(data))) names(data)[2] <- "Close"

if (inherits(data$Date, "Date")) {
  data$Date <- data$Date
  
} else if (inherits(data$Date, "POSIXct") | inherits(data$Date, "POSIXt")) {
  data$Date <- as.Date(data$Date)
  
} else if (is.numeric(data$Date)) {
  data$Date <- as.Date(data$Date, origin = "1899-12-30")
  
} else {
  data$Date <- as.Date(data$Date, tryFormats = c(
    "%Y-%m-%d",
    "%d/%m/%Y",
    "%m/%d/%Y",
    "%d-%m-%Y",
    "%Y/%m/%d"
  ))
}

data$Close <- as.numeric(gsub(",", "", as.character(data$Close)))

data <- data %>%
  select(Date, Close) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  na.omit()

cat("Jumlah data setelah preprocessing:", nrow(data), "\n")
## Jumlah data setelah preprocessing: 712

Statistik Deskriptif

stat_deskriptif <- data %>%
  summarise(
    Jumlah_Data = n(),
    Mean = mean(Close),
    Median = median(Close),
    Minimum = min(Close),
    Maksimum = max(Close),
    Standar_Deviasi = sd(Close)
  )

print(stat_deskriptif)
## # A tibble: 1 × 6
##   Jumlah_Data  Mean Median Minimum Maksimum Standar_Deviasi
##         <int> <dbl>  <dbl>   <dbl>    <dbl>           <dbl>
## 1         712 4747.   4590    3720     6225            512.

Plot Time Series

ggplot(data, aes(x = Date, y = Close)) +
  geom_line() +
  labs(
    title = "Plot Time Series Harga Penutupan BBNI",
    x = "Tanggal",
    y = "Close"
  ) +
  theme_minimal()

Pembagian Data Training dan Testing

n <- nrow(data)
n_train <- floor(0.8 * n)

train_data <- data[1:n_train, ]
test_data  <- data[(n_train + 1):n, ]

cat("Jumlah data training:", nrow(train_data), "\n")
## Jumlah data training: 569
cat("Jumlah data testing :", nrow(test_data), "\n")
## Jumlah data testing : 143

Normalisasi dan Differencing

train_min <- min(train_data$Close)
train_max <- max(train_data$Close)

data$Close_norm <- (data$Close - train_min) / (train_max - train_min)

d_close <- diff(data$Close_norm)
train_diff <- d_close[1:(n_train - 1)]

ACF dan PACF

acf(train_diff, main = "ACF Data Differencing - Training")

pacf(train_diff, main = "PACF Data Differencing - Training")

lag_max <- min(20, length(train_diff) - 1)
batas_sig <- 1.96 / sqrt(length(train_diff))

acf_obj <- acf(train_diff, lag.max = lag_max, plot = FALSE)
pacf_obj <- pacf(train_diff, lag.max = lag_max, plot = FALSE)

acf_values <- as.numeric(acf_obj$acf)[-1]
pacf_values <- as.numeric(pacf_obj$acf)

lag_acf <- 1:length(acf_values)
lag_pacf <- 1:length(pacf_values)

sig_acf <- lag_acf[abs(acf_values) > batas_sig]
sig_pacf <- lag_pacf[abs(pacf_values) > batas_sig]

sig_lags <- sort(unique(c(sig_acf, sig_pacf)))

if (length(sig_lags) == 0) {
  sig_lags <- 1
}

sig_lags <- head(sig_lags, 5)

cat("Lag signifikan yang digunakan:", sig_lags, "\n")
## Lag signifikan yang digunakan: 1 2 7

Pembentukan Data Lag

make_lag_data <- function(diff_series, lags, tanggal) {
  max_lag <- max(lags)
  idx <- (max_lag + 1):length(diff_series)
  
  lag_data <- data.frame(
    idx_diff = idx,
    Date = tanggal[idx + 1],
    y = diff_series[idx]
  )
  
  for (l in lags) {
    lag_data[[paste0("lag", l)]] <- diff_series[idx - l]
  }
  
  return(lag_data)
}

lag_data <- make_lag_data(d_close, sig_lags, data$Date)

train_nn <- lag_data %>% filter(idx_diff <= (n_train - 1))
test_nn <- lag_data %>% filter(idx_diff >= n_train)

xvars <- paste0("lag", sig_lags)
formula_nn <- as.formula(paste("y ~", paste(xvars, collapse = " + ")))

cat("Formula model FFNN:\n")
## Formula model FFNN:
print(formula_nn)
## y ~ lag1 + lag2 + lag7

Fungsi Evaluasi Model

akurasi <- function(actual, pred) {
  data.frame(
    RMSE = sqrt(mean((actual - pred)^2, na.rm = TRUE)),
    MAE = mean(abs(actual - pred), na.rm = TRUE),
    MAPE = mean(abs((actual - pred) / ifelse(actual == 0, NA, actual)), na.rm = TRUE) * 100
  )
}

pred_detail <- function(model, dataset) {
  pred_diff <- as.vector(compute(model, as.data.frame(dataset[, xvars]))$net.result)
  
  idx <- dataset$idx_diff
  
  pred_norm_close <- data$Close_norm[idx] + pred_diff
  actual_norm_close <- data$Close_norm[idx + 1]
  
  pred_close <- pred_norm_close * (train_max - train_min) + train_min
  actual_close <- actual_norm_close * (train_max - train_min) + train_min
  
  hasil_pred <- data.frame(
    Date = dataset$Date,
    Actual_Close = actual_close,
    Pred_Close = pred_close
  )
  
  return(hasil_pred)
}

Pelatihan Model FFNN

hidden_list <- list(
  HL1_2   = c(2),
  HL1_3   = c(3),
  HL1_4   = c(4),
  HL1_5   = c(5),
  HL2_2_1 = c(2, 1),
  HL2_3_2 = c(3, 2),
  HL2_4_2 = c(4, 2),
  HL2_5_3 = c(5, 3)
)

models <- list()
hasil_eval <- data.frame()

i <- 1

for (nm in names(hidden_list)) {
  
  set.seed(seed_nrp + i)
  hidden_arch <- hidden_list[[nm]]
  
  cat("\nTraining model:", nm, "dengan hidden =", hidden_arch, "\n")
  
  model_nn <- tryCatch(
    neuralnet(
      formula_nn,
      data = as.data.frame(train_nn[, c("y", xvars)]),
      hidden = hidden_arch,
      linear.output = TRUE,
      stepmax = 1e6
    ),
    error = function(e) {
      cat("Model gagal:", nm, "-", e$message, "\n")
      return(NULL)
    }
  )
  
  if (!is.null(model_nn)) {
    
    pred_train <- pred_detail(model_nn, train_nn)
    pred_test  <- pred_detail(model_nn, test_nn)
    
    met_train <- akurasi(pred_train$Actual_Close, pred_train$Pred_Close)
    met_test  <- akurasi(pred_test$Actual_Close, pred_test$Pred_Close)
    
    hasil_eval <- rbind(
      hasil_eval,
      data.frame(
        Model = nm,
        Layer = ifelse(length(hidden_arch) == 1, "1 hidden layer", "2 hidden layer"),
        Hidden = paste(hidden_arch, collapse = "-"),
        RMSE_Train = met_train$RMSE,
        MAE_Train = met_train$MAE,
        MAPE_Train = met_train$MAPE,
        RMSE_Test = met_test$RMSE,
        MAE_Test = met_test$MAE,
        MAPE_Test = met_test$MAPE
      )
    )
    
    models[[nm]] <- model_nn
  }
  
  i <- i + 1
}
## 
## Training model: HL1_2 dengan hidden = 2 
## 
## Training model: HL1_3 dengan hidden = 3 
## 
## Training model: HL1_4 dengan hidden = 4 
## 
## Training model: HL1_5 dengan hidden = 5 
## 
## Training model: HL2_2_1 dengan hidden = 2 1 
## 
## Training model: HL2_3_2 dengan hidden = 3 2 
## 
## Training model: HL2_4_2 dengan hidden = 4 2 
## 
## Training model: HL2_5_3 dengan hidden = 5 3

Hasil Evaluasi Model

hasil_eval <- hasil_eval %>%
  arrange(RMSE_Test)

print(hasil_eval)
##     Model          Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1   HL1_5 1 hidden layer      5   90.94647  66.97712   1.388173  79.51824
## 2 HL2_3_2 2 hidden layer    3-2   91.00788  67.01766   1.389110  79.53050
## 3   HL1_3 1 hidden layer      3   91.01309  67.03886   1.389473  79.54409
## 4   HL1_4 1 hidden layer      4   91.00928  67.04918   1.389693  79.57517
## 5   HL1_2 1 hidden layer      2   91.00402  67.05284   1.389752  79.57533
## 6 HL2_5_3 2 hidden layer    5-3   91.00757  67.05436   1.389783  79.57884
## 7 HL2_2_1 2 hidden layer    2-1   90.99895  67.05826   1.389864  79.58791
## 8 HL2_4_2 2 hidden layer    4-2   91.00115  67.06325   1.390040  79.59320
##   MAE_Test MAPE_Test
## 1 58.54045  1.383494
## 2 58.34596  1.378902
## 3 58.34335  1.378831
## 4 58.33932  1.378756
## 5 58.37289  1.379547
## 6 58.38456  1.379825
## 7 58.41477  1.380565
## 8 58.44082  1.381162
best_1hl <- hasil_eval %>%
  filter(Layer == "1 hidden layer") %>%
  slice_min(RMSE_Test, n = 1)

best_2hl <- hasil_eval %>%
  filter(Layer == "2 hidden layer") %>%
  slice_min(RMSE_Test, n = 1)

banding_model_terbaik <- rbind(best_1hl, best_2hl) %>%
  arrange(RMSE_Test)

cat("\nModel terbaik 1 hidden layer:\n")
## 
## Model terbaik 1 hidden layer:
print(best_1hl)
##   Model          Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL1_5 1 hidden layer      5   90.94647  66.97712   1.388173  79.51824
##   MAE_Test MAPE_Test
## 1 58.54045  1.383494
cat("\nModel terbaik 2 hidden layer:\n")
## 
## Model terbaik 2 hidden layer:
print(best_2hl)
##     Model          Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL2_3_2 2 hidden layer    3-2   91.00788  67.01766    1.38911   79.5305
##   MAE_Test MAPE_Test
## 1 58.34596  1.378902
cat("\nPerbandingan dua model terbaik:\n")
## 
## Perbandingan dua model terbaik:
print(banding_model_terbaik)
##     Model          Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1   HL1_5 1 hidden layer      5   90.94647  66.97712   1.388173  79.51824
## 2 HL2_3_2 2 hidden layer    3-2   91.00788  67.01766   1.389110  79.53050
##   MAE_Test MAPE_Test
## 1 58.54045  1.383494
## 2 58.34596  1.378902

Visualisasi Aktual dan Prediksi

best_model_name <- hasil_eval$Model[1]
best_model <- models[[best_model_name]]

pred_best_test <- pred_detail(best_model, test_nn)

ggplot(pred_best_test, aes(x = Date)) +
  geom_line(aes(y = Actual_Close, linetype = "Aktual")) +
  geom_line(aes(y = Pred_Close, linetype = "Prediksi")) +
  labs(
    title = paste("Aktual vs Prediksi Testing - Model", best_model_name),
    x = "Tanggal",
    y = "Close",
    linetype = "Keterangan"
  ) +
  theme_minimal()

Kesimpulan

Berdasarkan hasil evaluasi, model terbaik dipilih dari arsitektur dengan nilai RMSE testing paling kecil. Nilai RMSE, MAE, dan MAPE digunakan untuk menilai seberapa besar kesalahan prediksi model terhadap data aktual.

Secara umum, apabila garis prediksi mengikuti pola garis aktual, maka model FFNN dapat dikatakan cukup baik dalam menangkap pola pergerakan harga penutupan saham BBNI.