Dokumen ini dibuat sebagai latihan penggunaan R Markdown. Analisis yang digunakan adalah contoh syntax R mengenai pemodelan harga penutupan saham BBNI menggunakan metode Feed Forward Neural Network (FFNN).
Tahapan analisis meliputi import data, preprocessing, eksplorasi data, pembagian data training dan testing, pembentukan variabel lag, pelatihan model neural network, evaluasi model, serta visualisasi hasil prediksi.
library(readxl)
library(dplyr)
library(ggplot2)
library(lubridate)
library(neuralnet)
library(forecast)
seed_nrp <- 001
set.seed(seed_nrp)
data <- read_excel("C:/Users/LENOVO/Downloads/BBNI.xlsx")
names(data) <- trimws(names(data))
if (!("Date" %in% names(data))) names(data)[1] <- "Date"
if (!("Close" %in% names(data))) names(data)[2] <- "Close"
if (inherits(data$Date, "Date")) {
data$Date <- data$Date
} else if (inherits(data$Date, "POSIXct") | inherits(data$Date, "POSIXt")) {
data$Date <- as.Date(data$Date)
} else if (is.numeric(data$Date)) {
data$Date <- as.Date(data$Date, origin = "1899-12-30")
} else {
data$Date <- as.Date(data$Date, tryFormats = c(
"%Y-%m-%d",
"%d/%m/%Y",
"%m/%d/%Y",
"%d-%m-%Y",
"%Y/%m/%d"
))
}
data$Close <- as.numeric(gsub(",", "", as.character(data$Close)))
data <- data %>%
select(Date, Close) %>%
arrange(Date) %>%
distinct(Date, .keep_all = TRUE) %>%
na.omit()
cat("Jumlah data setelah preprocessing:", nrow(data), "\n")
## Jumlah data setelah preprocessing: 712
stat_deskriptif <- data %>%
summarise(
Jumlah_Data = n(),
Mean = mean(Close),
Median = median(Close),
Minimum = min(Close),
Maksimum = max(Close),
Standar_Deviasi = sd(Close)
)
print(stat_deskriptif)
## # A tibble: 1 × 6
## Jumlah_Data Mean Median Minimum Maksimum Standar_Deviasi
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 712 4747. 4590 3720 6225 512.
ggplot(data, aes(x = Date, y = Close)) +
geom_line() +
labs(
title = "Plot Time Series Harga Penutupan BBNI",
x = "Tanggal",
y = "Close"
) +
theme_minimal()
n <- nrow(data)
n_train <- floor(0.8 * n)
train_data <- data[1:n_train, ]
test_data <- data[(n_train + 1):n, ]
cat("Jumlah data training:", nrow(train_data), "\n")
## Jumlah data training: 569
cat("Jumlah data testing :", nrow(test_data), "\n")
## Jumlah data testing : 143
train_min <- min(train_data$Close)
train_max <- max(train_data$Close)
data$Close_norm <- (data$Close - train_min) / (train_max - train_min)
d_close <- diff(data$Close_norm)
train_diff <- d_close[1:(n_train - 1)]
acf(train_diff, main = "ACF Data Differencing - Training")
pacf(train_diff, main = "PACF Data Differencing - Training")
lag_max <- min(20, length(train_diff) - 1)
batas_sig <- 1.96 / sqrt(length(train_diff))
acf_obj <- acf(train_diff, lag.max = lag_max, plot = FALSE)
pacf_obj <- pacf(train_diff, lag.max = lag_max, plot = FALSE)
acf_values <- as.numeric(acf_obj$acf)[-1]
pacf_values <- as.numeric(pacf_obj$acf)
lag_acf <- 1:length(acf_values)
lag_pacf <- 1:length(pacf_values)
sig_acf <- lag_acf[abs(acf_values) > batas_sig]
sig_pacf <- lag_pacf[abs(pacf_values) > batas_sig]
sig_lags <- sort(unique(c(sig_acf, sig_pacf)))
if (length(sig_lags) == 0) {
sig_lags <- 1
}
sig_lags <- head(sig_lags, 5)
cat("Lag signifikan yang digunakan:", sig_lags, "\n")
## Lag signifikan yang digunakan: 1 2 7
make_lag_data <- function(diff_series, lags, tanggal) {
max_lag <- max(lags)
idx <- (max_lag + 1):length(diff_series)
lag_data <- data.frame(
idx_diff = idx,
Date = tanggal[idx + 1],
y = diff_series[idx]
)
for (l in lags) {
lag_data[[paste0("lag", l)]] <- diff_series[idx - l]
}
return(lag_data)
}
lag_data <- make_lag_data(d_close, sig_lags, data$Date)
train_nn <- lag_data %>% filter(idx_diff <= (n_train - 1))
test_nn <- lag_data %>% filter(idx_diff >= n_train)
xvars <- paste0("lag", sig_lags)
formula_nn <- as.formula(paste("y ~", paste(xvars, collapse = " + ")))
cat("Formula model FFNN:\n")
## Formula model FFNN:
print(formula_nn)
## y ~ lag1 + lag2 + lag7
akurasi <- function(actual, pred) {
data.frame(
RMSE = sqrt(mean((actual - pred)^2, na.rm = TRUE)),
MAE = mean(abs(actual - pred), na.rm = TRUE),
MAPE = mean(abs((actual - pred) / ifelse(actual == 0, NA, actual)), na.rm = TRUE) * 100
)
}
pred_detail <- function(model, dataset) {
pred_diff <- as.vector(compute(model, as.data.frame(dataset[, xvars]))$net.result)
idx <- dataset$idx_diff
pred_norm_close <- data$Close_norm[idx] + pred_diff
actual_norm_close <- data$Close_norm[idx + 1]
pred_close <- pred_norm_close * (train_max - train_min) + train_min
actual_close <- actual_norm_close * (train_max - train_min) + train_min
hasil_pred <- data.frame(
Date = dataset$Date,
Actual_Close = actual_close,
Pred_Close = pred_close
)
return(hasil_pred)
}
hidden_list <- list(
HL1_2 = c(2),
HL1_3 = c(3),
HL1_4 = c(4),
HL1_5 = c(5),
HL2_2_1 = c(2, 1),
HL2_3_2 = c(3, 2),
HL2_4_2 = c(4, 2),
HL2_5_3 = c(5, 3)
)
models <- list()
hasil_eval <- data.frame()
i <- 1
for (nm in names(hidden_list)) {
set.seed(seed_nrp + i)
hidden_arch <- hidden_list[[nm]]
cat("\nTraining model:", nm, "dengan hidden =", hidden_arch, "\n")
model_nn <- tryCatch(
neuralnet(
formula_nn,
data = as.data.frame(train_nn[, c("y", xvars)]),
hidden = hidden_arch,
linear.output = TRUE,
stepmax = 1e6
),
error = function(e) {
cat("Model gagal:", nm, "-", e$message, "\n")
return(NULL)
}
)
if (!is.null(model_nn)) {
pred_train <- pred_detail(model_nn, train_nn)
pred_test <- pred_detail(model_nn, test_nn)
met_train <- akurasi(pred_train$Actual_Close, pred_train$Pred_Close)
met_test <- akurasi(pred_test$Actual_Close, pred_test$Pred_Close)
hasil_eval <- rbind(
hasil_eval,
data.frame(
Model = nm,
Layer = ifelse(length(hidden_arch) == 1, "1 hidden layer", "2 hidden layer"),
Hidden = paste(hidden_arch, collapse = "-"),
RMSE_Train = met_train$RMSE,
MAE_Train = met_train$MAE,
MAPE_Train = met_train$MAPE,
RMSE_Test = met_test$RMSE,
MAE_Test = met_test$MAE,
MAPE_Test = met_test$MAPE
)
)
models[[nm]] <- model_nn
}
i <- i + 1
}
##
## Training model: HL1_2 dengan hidden = 2
##
## Training model: HL1_3 dengan hidden = 3
##
## Training model: HL1_4 dengan hidden = 4
##
## Training model: HL1_5 dengan hidden = 5
##
## Training model: HL2_2_1 dengan hidden = 2 1
##
## Training model: HL2_3_2 dengan hidden = 3 2
##
## Training model: HL2_4_2 dengan hidden = 4 2
##
## Training model: HL2_5_3 dengan hidden = 5 3
hasil_eval <- hasil_eval %>%
arrange(RMSE_Test)
print(hasil_eval)
## Model Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL1_5 1 hidden layer 5 90.94647 66.97712 1.388173 79.51824
## 2 HL2_3_2 2 hidden layer 3-2 91.00788 67.01766 1.389110 79.53050
## 3 HL1_3 1 hidden layer 3 91.01309 67.03886 1.389473 79.54409
## 4 HL1_4 1 hidden layer 4 91.00928 67.04918 1.389693 79.57517
## 5 HL1_2 1 hidden layer 2 91.00402 67.05284 1.389752 79.57533
## 6 HL2_5_3 2 hidden layer 5-3 91.00757 67.05436 1.389783 79.57884
## 7 HL2_2_1 2 hidden layer 2-1 90.99895 67.05826 1.389864 79.58791
## 8 HL2_4_2 2 hidden layer 4-2 91.00115 67.06325 1.390040 79.59320
## MAE_Test MAPE_Test
## 1 58.54045 1.383494
## 2 58.34596 1.378902
## 3 58.34335 1.378831
## 4 58.33932 1.378756
## 5 58.37289 1.379547
## 6 58.38456 1.379825
## 7 58.41477 1.380565
## 8 58.44082 1.381162
best_1hl <- hasil_eval %>%
filter(Layer == "1 hidden layer") %>%
slice_min(RMSE_Test, n = 1)
best_2hl <- hasil_eval %>%
filter(Layer == "2 hidden layer") %>%
slice_min(RMSE_Test, n = 1)
banding_model_terbaik <- rbind(best_1hl, best_2hl) %>%
arrange(RMSE_Test)
cat("\nModel terbaik 1 hidden layer:\n")
##
## Model terbaik 1 hidden layer:
print(best_1hl)
## Model Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL1_5 1 hidden layer 5 90.94647 66.97712 1.388173 79.51824
## MAE_Test MAPE_Test
## 1 58.54045 1.383494
cat("\nModel terbaik 2 hidden layer:\n")
##
## Model terbaik 2 hidden layer:
print(best_2hl)
## Model Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL2_3_2 2 hidden layer 3-2 91.00788 67.01766 1.38911 79.5305
## MAE_Test MAPE_Test
## 1 58.34596 1.378902
cat("\nPerbandingan dua model terbaik:\n")
##
## Perbandingan dua model terbaik:
print(banding_model_terbaik)
## Model Layer Hidden RMSE_Train MAE_Train MAPE_Train RMSE_Test
## 1 HL1_5 1 hidden layer 5 90.94647 66.97712 1.388173 79.51824
## 2 HL2_3_2 2 hidden layer 3-2 91.00788 67.01766 1.389110 79.53050
## MAE_Test MAPE_Test
## 1 58.54045 1.383494
## 2 58.34596 1.378902
best_model_name <- hasil_eval$Model[1]
best_model <- models[[best_model_name]]
pred_best_test <- pred_detail(best_model, test_nn)
ggplot(pred_best_test, aes(x = Date)) +
geom_line(aes(y = Actual_Close, linetype = "Aktual")) +
geom_line(aes(y = Pred_Close, linetype = "Prediksi")) +
labs(
title = paste("Aktual vs Prediksi Testing - Model", best_model_name),
x = "Tanggal",
y = "Close",
linetype = "Keterangan"
) +
theme_minimal()
Berdasarkan hasil evaluasi, model terbaik dipilih dari arsitektur dengan nilai RMSE testing paling kecil. Nilai RMSE, MAE, dan MAPE digunakan untuk menilai seberapa besar kesalahan prediksi model terhadap data aktual.
Secara umum, apabila garis prediksi mengikuti pola garis aktual, maka model FFNN dapat dikatakan cukup baik dalam menangkap pola pergerakan harga penutupan saham BBNI.