Library
library(readxl)
library(TSA)
library(forecast)
library(tseries)
library(nortest)
library(ggplot2)
library(Metrics)
Penentuan Data
# Import data
df <- read_excel("C:/Users/Lenovo/Documents/ARW/Data tubes arw.xlsx")
# Penamaan kolom
names(df) <- c("Bulan", "Tanggal", "Nitrogen_Dioksida")
# Bersihkan dan ubah ke numerik
df$Nitrogen_Dioksida <- gsub("[^0-9.]", "", df$Nitrogen_Dioksida)
df$Nitrogen_Dioksida[df$Nitrogen_Dioksida == ""] <- NA
df$Nitrogen_Dioksida <- as.numeric(df$Nitrogen_Dioksida)
# Tambah kolom waktu
df$Waktu <- as.Date(paste("2024", df$Bulan, df$Tanggal, sep = "-"), format = "%Y-%m-%d")
# Hapus baris NA
df <- df[!is.na(df$Nitrogen_Dioksida), ]
# Buat time series
index <- ts(df$Nitrogen_Dioksida, frequency = 366, start = c(2024, 1))
Penentuan Pola
cat("=== Statistik Deskriptif Keseluruhan ===\n")
## === Statistik Deskriptif Keseluruhan ===
print(summary(df$Nitrogen_Dioksida))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 16.00 23.00 25.79 35.00 98.00
par(mfrow = c(1, 2))
boxplot(df$Nitrogen_Dioksida,
main = "Boxplot Keseluruhan Data NO2",
ylab = "Kadar Nitrogen Dioksida",
col = "skyblue", border = "darkblue")
boxplot(Nitrogen_Dioksida ~ Bulan, data = df,
main = "Boxplot NO2 per Bulan",
xlab = "Bulan", ylab = "Kadar NO2",
col = "lightgreen", border = "darkgreen", las = 2)

par(mfrow = c(1, 1))
Uji Stasioneritas
index <- ts(df$Nitrogen_Dioksida, frequency = 366, start = c(2024, 1))
plot(index, xlab = "Waktu", ylab = "Index NO2",
main = "Time Series NO2 (Tanpa Tren & Musiman)",
col = "blue", lwd = 2)

cat("\n=== Uji ADF (Stasioneritas) ===\n")
##
## === Uji ADF (Stasioneritas) ===
print(adf.test(index))
##
## Augmented Dickey-Fuller Test
##
## data: index
## Dickey-Fuller = -3.8791, Lag order = 7, p-value = 0.01532
## alternative hypothesis: stationary
tsdisplay(index, main = "Plot, ACF, dan PACF Data NO2")

Split Data
n <- length(index)
test_size <- 31
train_size <- n - test_size
train <- index[1:train_size]
test <- index[(train_size + 1):n]
cat("Jumlah data training:", length(train), "\n")
## Jumlah data training: 333
cat("Jumlah data testing :", length(test), "\n")
## Jumlah data testing : 31
Permodelan Simple Eksponensial Smoothing (SES)
# Buat model SES dari data training
fit_ses <- ses(train)
# (Opsional) Lihat ringkasan model
summary(fit_ses)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = train)
##
## Smoothing parameters:
## alpha = 0.6122
##
## Initial states:
## l = 13.2466
##
## sigma: 8.4888
##
## AIC AICc BIC
## 3362.514 3362.587 3373.938
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03244154 8.463304 5.617118 -7.951971 24.93111 0.944723
## ACF1
## Training set 0.03966716
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 334 6.632769 -4.246111 17.51165 -10.00504 23.27058
## 335 6.632769 -6.122981 19.38852 -12.87547 26.14100
## 336 6.632769 -7.757099 21.02264 -15.37463 28.64017
## 337 6.632769 -9.223695 22.48923 -17.61760 30.88314
## 338 6.632769 -10.565678 23.83122 -19.66999 32.93552
## 339 6.632769 -11.810271 25.07581 -21.57343 34.83896
## 340 6.632769 -12.976027 26.24156 -23.35630 36.62183
## 341 6.632769 -14.076263 27.34180 -25.03896 38.30450
## 342 6.632769 -15.120924 28.38646 -26.63663 39.90217
## 343 6.632769 -16.117666 29.38320 -28.16102 41.42656
test_ts <- ts(test, start = end(train)[1] + 1/frequency(train), frequency = frequency(train))
autoplot(fit_ses) +
autolayer(test_ts, series = "Aktual", color = "red") +
labs(title = "Peramalan Simple Exponential Smoothing (SES)",
x = "Waktu", y = "Kadar NO2") +
theme_minimal()

Simple Moving Average
k <- 5 # jumlah periode rata-rata bergerak (bisa disesuaikan)
sma_values <- stats::filter(train, rep(1/k, k), sides = 2)
plot(train, type = "l", col = "gray", lwd = 2,
main = paste("Simple Moving Average (k =", k, ")"),
xlab = "Waktu", ylab = "Kadar NO2")
lines(sma_values, col = "blue", lwd = 2)
legend("topleft", legend = c("Data Asli", "SMA"),
col = c("gray", "blue"), lwd = 2, bty = "n")

Permodelan ARMA
# Permodelan ARMA
# Karena data stasioner, tidak perlu differencing (d=0)
tsdisplay(train, main = "ACF dan PACF Data Training")

# Coba beberapa model ARMA
arma1 <- Arima(train, order = c(1, 0, 1))
arma2 <- Arima(train, order = c(2, 0, 2))
arma3 <- Arima(train, order = c(1, 0, 2))
# Bandingkan AIC
aic_table <- data.frame(
Model = c("ARMA(1,0,1)", "ARMA(2,0,2)", "ARMA(1,0,2)"),
AIC = c(AIC(arma1), AIC(arma2), AIC(arma3))
)
print(aic_table)
## Model AIC
## 1 ARMA(1,0,1) 2360.737
## 2 ARMA(2,0,2) 2363.897
## 3 ARMA(1,0,2) 2362.018
# Pilih model terbaik
fit_arma <- arma1
summary(fit_arma)
## Series: train
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.8864 -0.2909 24.5620
## s.e. 0.0326 0.0701 2.7732
##
## sigma^2 = 68.95: log likelihood = -1176.37
## AIC=2360.74 AICc=2360.86 BIC=2375.97
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06087184 8.265995 5.702939 -12.26756 27.58552 0.9591568
## ACF1
## Training set 0.0122096
# Peramalan menggunakan model ARMA terbaik
forecast_arma <- forecast(fit_arma, h = length(test))
Diagnostik Model
## Diagnostik Model
checkresiduals(fit_arma)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1) with non-zero mean
## Q* = 12.68, df = 8, p-value = 0.1233
##
## Model df: 2. Total lags used: 10
adf.test(fit_arma$residuals)
## Warning in adf.test(fit_arma$residuals): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: fit_arma$residuals
## Dickey-Fuller = -6.2379, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
shapiro.test(fit_arma$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit_arma$residuals
## W = 0.90716, p-value = 1.941e-13
qqnorm(fit_arma$residuals); qqline(fit_arma$residuals, col = "blue")

Forecasting
## Forecasting
test_ts <- ts(test,
start = end(train)[1] + 1/frequency(train),
frequency = frequency(train))
autoplot(forecast_arma) +
autolayer(test_ts, series = "Aktual", color = "red") +
labs(title = "Peramalan Model ARMA",
x = "Waktu", y = "Kadar NO2") +
theme_minimal()

Evaluasi
# Sesuaikan panjang prediksi dengan data aktual
pred_ses <- as.numeric(fit_ses$mean)
pred_arma <- as.numeric(forecast_arma$mean)
# Samakan panjangnya
len <- min(length(test), length(pred_arma), length(pred_ses))
test <- test[1:len]
pred_ses <- pred_ses[1:len]
pred_arma <- pred_arma[1:len]
# Hitung akurasi
mape_ses <- mape(test, pred_ses)
rmse_ses <- rmse(test, pred_ses)
mape_arma <- mape(test, pred_arma)
rmse_arma <- rmse(test, pred_arma)
akurasi_df <- data.frame(
Model = c("SES", "SMA", "ARMA"),
MAPE = c(mape_ses, NA, mape_arma),
RMSE = c(rmse_ses, NA, rmse_arma)
)
cat("\n=== Evaluasi Akurasi Model ===\n")
##
## === Evaluasi Akurasi Model ===
print(akurasi_df)
## Model MAPE RMSE
## 1 SES 0.7467642 40.74764
## 2 SMA NA NA
## 3 ARMA 0.4740272 33.81107