Library

library(readxl)
library(TSA)
library(forecast)
library(tseries)
library(nortest)
library(ggplot2)

Penentuan Data

df <- read_excel("C:/Users/Lenovo/Documents/ARW/Data ARW.xlsx")
names(df) <- c("Kategori", "Bulan", "Tanggal", "Index")

df$Index <- as.numeric(df$Index)

df$Waktu <- as.Date(paste("2024", df$Bulan, df$Tanggal, sep = "-"), format = "%Y-%m-%d")

head(df)
## # A tibble: 6 × 5
##   Kategori    Bulan Tanggal Index Waktu     
##   <chr>       <chr> <chr>   <dbl> <date>    
## 1 TIDAK SEHAT 10    1          17 2024-10-01
## 2 SEDANG      10    2          16 2024-10-02
## 3 SEDANG      10    3          17 2024-10-03
## 4 SEDANG      10    4          16 2024-10-04
## 5 SEDANG      10    5          18 2024-10-05
## 6 SEDANG      10    6          15 2024-10-06

Penentuan Pola

Statistik Deskriptif

cat("=== Statistik Deskriptif Keseluruhan ===\n")
## === Statistik Deskriptif Keseluruhan ===
summary(df$Index)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   13.00   16.50   20.41   19.25   98.00
cat("\n=== Statistik Deskriptif per Bulan ===\n")
## 
## === Statistik Deskriptif per Bulan ===
stat_perbulan <- aggregate(Index ~ Bulan, data = df,
FUN = function(x) c(
mean = mean(x, na.rm = TRUE),
median = median(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
max = max(x, na.rm = TRUE)
))
stat_perbulan <- do.call(data.frame, stat_perbulan)
print(stat_perbulan)
##   Bulan Index.mean Index.median  Index.sd Index.min Index.max
## 1    10   14.93548           16  2.682480        10        20
## 2    11   14.10000           15  4.188243         5        19
## 3    12   32.00000           30 18.647609        10        98

Visualisasi Pola Data

par(mfrow = c(1, 2))

# Boxplot keseluruhan

boxplot(df$Index,
main = "Boxplot Keseluruhan Data NO₂",
ylab = "Kadar Nitrogen Dioksida",
col = "skyblue", border = "darkblue")

# Boxplot per bulan

boxplot(Index ~ Bulan, data = df,
main = "Boxplot Indeks NO₂ per Bulan",
xlab = "Bulan", ylab = "Kadar NO₂",
col = "lightgreen", border = "darkgreen", las = 2)

par(mfrow = c(1, 1))

Identifikasi Time Series

index <- ts(df$Index, start = 1, frequency = 1)

plot(index, xlab = "Waktu", ylab = "Index NO₂",
main = "Indeks Standar Pencemaran Udara (NO₂)",
lwd = 2, col = "blue")

adf.test(index)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  index
## Dickey-Fuller = -3.6539, Lag order = 4, p-value = 0.03282
## alternative hypothesis: stationary

Differencing (Jika Data Tidak Stasioner)

diff_index <- diff(index, differences = 1)

plot(diff_index, xlab = "Waktu", ylab = "Index NO₂",
main = "Differencing (1) Indeks NO₂",
lwd = 2, col = "blue")

adf.test(diff_index)
## Warning in adf.test(diff_index): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_index
## Dickey-Fuller = -6.1974, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Permodelan Eksponensial Smoothing (DES)

# Model Holt (Double Exponential Smoothing)
fit_holt <- holt(index, h = 6)

# Ringkasan model
summary(fit_holt$model)
## Holt's method 
## 
## Call:
## holt(y = index, h = 6)
## 
##   Smoothing parameters:
##     alpha = 0.2542 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 18.2584 
##     b = 0.1036 
## 
##   sigma:  12.5376
## 
##      AIC     AICc      BIC 
## 887.2016 887.8993 899.8106 
## 
## Training set error measures:
##                        ME     RMSE      MAE      MPE     MAPE     MASE
## Training set -0.006299507 12.26201 6.213226 -16.4126 31.29868 0.871192
##                   ACF1
## Training set 0.1611286
# Plot hasil peramalan
plot(fit_holt,
     main = "Peramalan Double Exponential Smoothing (Holt) Indeks NO₂",
     xlab = "Waktu", ylab = "Index NO₂",
     col = "blue", fcol = "darkblue")

# Menambahkan data aktual
lines(index, col = "red", lwd = 2)
legend("topleft",
       legend = c("Data Aktual", "Peramalan Holt"),
       col = c("red", "blue"),
       lwd = 2, bty = "n")

Trend Analysis

time <- 1:length(df$Index)
model_regresi <- lm(Index ~ time, data = df)
summary(model_regresi)
## 
## Call:
## lm(formula = Index ~ time, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.712  -6.241  -1.141   3.808  72.695 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.83349    2.62999   3.739 0.000324 ***
## time         0.22752    0.04911   4.632 1.21e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.51 on 90 degrees of freedom
## Multiple R-squared:  0.1925, Adjusted R-squared:  0.1836 
## F-statistic: 21.46 on 1 and 90 DF,  p-value: 1.213e-05
plot(df$Waktu, df$Index, type = "l", col = "darkgreen",
main = "Analisis Tren Indeks NO₂",
xlab = "Waktu", ylab = "Index NO₂")
abline(model_regresi, col = "red", lwd = 2)

Permodelan ARIMA/SARIMA

Identifikasi Model

tsdisplay(diff_index)

eacf(diff_index, ar.max=15, ma.max=15)
## AR/MA
##    0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 0  x o x x o o o o o o x  x  o  o  o  o 
## 1  x x o x o o o o o o o  o  o  o  o  o 
## 2  x x o x o o o o o o o  o  o  o  o  o 
## 3  x x x o o o o o o o o  o  o  o  o  o 
## 4  x o o o o o o o o o o  o  o  o  o  o 
## 5  x o o o x o o o o o o  o  o  o  o  o 
## 6  x o o o o o o o o o o  o  o  o  o  o 
## 7  o o o o o o o o o o o  o  o  o  o  o 
## 8  o x x x o o o x o o o  o  o  o  o  o 
## 9  x x x o o o o x o o o  o  o  o  o  o 
## 10 x o o o o o o x o o o  o  o  o  o  o 
## 11 x x o x x o x o o x o  o  o  o  o  o 
## 12 x o o o x o o o o o o  o  o  o  o  o 
## 13 o x x x x x x o o o o  o  o  o  o  o 
## 14 o x o o o o o o o o o  o  o  o  o  o 
## 15 o x o o o o o x o o o  o  o  o  o  o

Estimasi Model

model1 <- Arima(df$Index, order = c(0,1,1), include.constant = TRUE)
model2 <- Arima(df$Index, order = c(1,1,1), include.constant = TRUE)
model3 <- Arima(df$Index, order = c(2,1,2), include.constant = TRUE)

fit <- Arima(df$Index, order = c(1,1,1), include.constant = TRUE)
fit
## Series: df$Index 
## ARIMA(1,1,1) with drift 
## 
## Coefficients:
##          ar1      ma1   drift
##       0.3663  -1.0000  0.2196
## s.e.  0.0993   0.0513  0.0709
## 
## sigma^2 = 140.2:  log likelihood = -354.39
## AIC=716.78   AICc=717.25   BIC=726.83

Diagnostik Model

checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1) with drift
## Q* = 16.064, df = 8, p-value = 0.04147
## 
## Model df: 2.   Total lags used: 10
qqnorm(fit$residuals, pch = 16)
qqline(fit$residuals, col = "blue", lwd = 2)

shapiro.test(fit$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit$residuals
## W = 0.6421, p-value = 1.204e-13
ks.test(fit$residuals, "pnorm", mean(fit$residuals), sd(fit$residuals))
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  fit$residuals
## D = 0.24622, p-value = 2.131e-05
## alternative hypothesis: two-sided
ad.test(fit$residuals)
## 
##  Anderson-Darling normality test
## 
## data:  fit$residuals
## A = 8.6615, p-value < 2.2e-16
jarque.bera.test(fit$residuals)
## 
##  Jarque Bera Test
## 
## data:  fit$residuals
## X-squared = 1946.8, df = 2, p-value < 2.2e-16

Overfitting

overfit1 <- Arima(df$Index, order = c(1,1,2), include.constant = TRUE)
overfit2 <- Arima(df$Index, order = c(2,1,1), include.constant = TRUE)

# Overfit MA

stat_uji_1 <- overfit1$coef[['ma2']] / 0.3256
derajat_bebas <- length(df$Index)-1
daerah_kritis <- qt(0.025, derajat_bebas)
stat_uji_1; daerah_kritis
## [1] -2.396436
## [1] -1.986377
2*(pt(stat_uji_1, derajat_bebas))
## [1] 0.01860132
# Overfit AR

stat_uji_2 <- overfit2$coef[['ar2']] / 0.1080
stat_uji_2; daerah_kritis
## [1] -4.727907
## [1] -1.986377
2*(pt(stat_uji_2, derajat_bebas))
## [1] 8.243791e-06

Peramalan

n <- length(index)
train_size <- round(0.8 * n)
train <- window(index, end = train_size)
test <- window(index, start = train_size + 1)

train_fit <- Arima(train, order = c(1,1,1), include.constant = TRUE)
forecast_train <- forecast(train_fit, h = length(test))

plot(forecast_train, fcol="blue", lwd = 2,
main = "Peramalan ARIMA(1,1,1) Data Training & Testing",
xlab="Waktu", ylab="Index NO₂")
lines(seq(train_size+1, n), test, col="red", lwd=2)
legend("topleft", col=c("blue", "red"),
legend=c("Peramalan", "Aktual"), lwd=2, bty="n")

forecast_final <- forecast(fit, h = 4)
forecast_final
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 93       26.05638 10.80064 41.31212 2.724741 49.38802
## 94       29.14655 12.84392 45.44917 4.213827 54.07926
## 95       30.41760 13.95941 46.87578 5.246975 55.58822
## 96       31.02231 14.53600 47.50863 5.808670 56.23595
plot(forecast_final, fcol="blue",
main = "Peramalan 4 Periode ke Depan (ARIMA 1,1,1)",
xlab="Waktu", ylab="Index NO₂", lwd = 2)