Library

library(readxl)
library(TSA)
library(forecast)
library(tseries)
library(nortest)
library(ggplot2)
library(Metrics)

Penentuan Data

# Import data
df <- read_excel("C:/Users/Lenovo/Documents/ARW/Data tubes arw.xlsx")

# Penamaan kolom
names(df) <- c("Bulan", "Tanggal", "Nitrogen_Dioksida")

# Bersihkan dan ubah ke numerik
df$Nitrogen_Dioksida <- gsub("[^0-9.]", "", df$Nitrogen_Dioksida)
df$Nitrogen_Dioksida[df$Nitrogen_Dioksida == ""] <- NA
df$Nitrogen_Dioksida <- as.numeric(df$Nitrogen_Dioksida)

# Tambah kolom waktu
df$Waktu <- as.Date(paste("2024", df$Bulan, df$Tanggal, sep = "-"), format = "%Y-%m-%d")

# Hapus baris NA
df <- df[!is.na(df$Nitrogen_Dioksida), ]

# Buat time series
index <- ts(df$Nitrogen_Dioksida, frequency = 366, start = c(2024, 1))

Penentuan Pola

cat("=== Statistik Deskriptif Keseluruhan ===\n")
## === Statistik Deskriptif Keseluruhan ===
print(summary(df$Nitrogen_Dioksida))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   16.00   23.00   25.79   35.00   98.00
par(mfrow = c(1, 2))
boxplot(df$Nitrogen_Dioksida,
        main = "Boxplot Keseluruhan Data NO2",
        ylab = "Kadar Nitrogen Dioksida",
        col = "skyblue", border = "darkblue")
boxplot(Nitrogen_Dioksida ~ Bulan, data = df,
        main = "Boxplot NO2 per Bulan",
        xlab = "Bulan", ylab = "Kadar NO2",
        col = "lightgreen", border = "darkgreen", las = 2)

par(mfrow = c(1, 1))

Uji Stasioneritas

index <- ts(df$Nitrogen_Dioksida, frequency = 366, start = c(2024, 1))

plot(index, xlab = "Waktu", ylab = "Index NO2",
     main = "Time Series NO2 (Tanpa Tren & Musiman)",
     col = "blue", lwd = 2)

cat("\n=== Uji ADF (Stasioneritas) ===\n")
## 
## === Uji ADF (Stasioneritas) ===
print(adf.test(index))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  index
## Dickey-Fuller = -3.8791, Lag order = 7, p-value = 0.01532
## alternative hypothesis: stationary
tsdisplay(index, main = "Plot, ACF, dan PACF Data NO2")

Split Data

n <- length(index)
test_size <- 31
train_size <- n - test_size
train <- index[1:train_size]
test  <- index[(train_size + 1):n]

cat("Jumlah data training:", length(train), "\n")
## Jumlah data training: 333
cat("Jumlah data testing :", length(test), "\n")
## Jumlah data testing : 31

Permodelan Simple Eksponensial Smoothing (SES)

# Buat model SES dari data training
fit_ses <- ses(train)

# (Opsional) Lihat ringkasan model
summary(fit_ses)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
## ses(y = train)
## 
##   Smoothing parameters:
##     alpha = 0.6122 
## 
##   Initial states:
##     l = 13.2466 
## 
##   sigma:  8.4888
## 
##      AIC     AICc      BIC 
## 3362.514 3362.587 3373.938 
## 
## Error measures:
##                       ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -0.03244154 8.463304 5.617118 -7.951971 24.93111 0.944723
##                    ACF1
## Training set 0.03966716
## 
## Forecasts:
##     Point Forecast      Lo 80    Hi 80     Lo 95    Hi 95
## 334       6.632769  -4.246111 17.51165 -10.00504 23.27058
## 335       6.632769  -6.122981 19.38852 -12.87547 26.14100
## 336       6.632769  -7.757099 21.02264 -15.37463 28.64017
## 337       6.632769  -9.223695 22.48923 -17.61760 30.88314
## 338       6.632769 -10.565678 23.83122 -19.66999 32.93552
## 339       6.632769 -11.810271 25.07581 -21.57343 34.83896
## 340       6.632769 -12.976027 26.24156 -23.35630 36.62183
## 341       6.632769 -14.076263 27.34180 -25.03896 38.30450
## 342       6.632769 -15.120924 28.38646 -26.63663 39.90217
## 343       6.632769 -16.117666 29.38320 -28.16102 41.42656
test_ts <- ts(test, start = end(train)[1] + 1/frequency(train), frequency = frequency(train))

autoplot(fit_ses) +
  autolayer(test_ts, series = "Aktual", color = "red") +
  labs(title = "Peramalan Simple Exponential Smoothing (SES)",
       x = "Waktu", y = "Kadar NO2") +
  theme_minimal()

Simple Moving Average

k <- 5  # jumlah periode rata-rata bergerak (bisa disesuaikan)
sma_values <- stats::filter(train, rep(1/k, k), sides = 2)

plot(train, type = "l", col = "gray", lwd = 2,
     main = paste("Simple Moving Average (k =", k, ")"),
     xlab = "Waktu", ylab = "Kadar NO2")
lines(sma_values, col = "blue", lwd = 2)
legend("topleft", legend = c("Data Asli", "SMA"),
       col = c("gray", "blue"), lwd = 2, bty = "n")

Permodelan ARMA

# Permodelan ARMA
# Karena data stasioner, tidak perlu differencing (d=0)
tsdisplay(train, main = "ACF dan PACF Data Training")

# Coba beberapa model ARMA
arma1 <- Arima(train, order = c(1, 0, 1))
arma2 <- Arima(train, order = c(2, 0, 2))
arma3 <- Arima(train, order = c(1, 0, 2))

# Bandingkan AIC
aic_table <- data.frame(
  Model = c("ARMA(1,0,1)", "ARMA(2,0,2)", "ARMA(1,0,2)"),
  AIC = c(AIC(arma1), AIC(arma2), AIC(arma3))
)
print(aic_table)
##         Model      AIC
## 1 ARMA(1,0,1) 2360.737
## 2 ARMA(2,0,2) 2363.897
## 3 ARMA(1,0,2) 2362.018
# Pilih model terbaik
fit_arma <- arma1
summary(fit_arma)
## Series: train 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ma1     mean
##       0.8864  -0.2909  24.5620
## s.e.  0.0326   0.0701   2.7732
## 
## sigma^2 = 68.95:  log likelihood = -1176.37
## AIC=2360.74   AICc=2360.86   BIC=2375.97
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.06087184 8.265995 5.702939 -12.26756 27.58552 0.9591568
##                   ACF1
## Training set 0.0122096
# Peramalan menggunakan model ARMA terbaik
forecast_arma <- forecast(fit_arma, h = length(test))

Diagnostik Model

## Diagnostik Model
checkresiduals(fit_arma)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,1) with non-zero mean
## Q* = 12.68, df = 8, p-value = 0.1233
## 
## Model df: 2.   Total lags used: 10
adf.test(fit_arma$residuals)
## Warning in adf.test(fit_arma$residuals): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  fit_arma$residuals
## Dickey-Fuller = -6.2379, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
shapiro.test(fit_arma$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit_arma$residuals
## W = 0.90716, p-value = 1.941e-13
qqnorm(fit_arma$residuals); qqline(fit_arma$residuals, col = "blue")

Forecasting

## Forecasting
test_ts <- ts(test,
              start = end(train)[1] + 1/frequency(train),
              frequency = frequency(train))

autoplot(forecast_arma) +
  autolayer(test_ts, series = "Aktual", color = "red") +
  labs(title = "Peramalan Model ARMA",
       x = "Waktu", y = "Kadar NO2") +
  theme_minimal()

Evaluasi

# Sesuaikan panjang prediksi dengan data aktual
pred_ses  <- as.numeric(fit_ses$mean)
pred_arma <- as.numeric(forecast_arma$mean)

# Samakan panjangnya
len <- min(length(test), length(pred_arma), length(pred_ses))
test      <- test[1:len]
pred_ses  <- pred_ses[1:len]
pred_arma <- pred_arma[1:len]

# Hitung akurasi
mape_ses  <- mape(test, pred_ses)
rmse_ses  <- rmse(test, pred_ses)
mape_arma <- mape(test, pred_arma)
rmse_arma <- rmse(test, pred_arma)

akurasi_df <- data.frame(
  Model = c("SES", "SMA", "ARMA"),
  MAPE  = c(mape_ses, NA, mape_arma),
  RMSE  = c(rmse_ses, NA, rmse_arma)
)

cat("\n=== Evaluasi Akurasi Model ===\n")
## 
## === Evaluasi Akurasi Model ===
print(akurasi_df)
##   Model      MAPE     RMSE
## 1   SES 0.7467642 40.74764
## 2   SMA        NA       NA
## 3  ARMA 0.4740272 33.81107