Library
library(readxl)
library(TSA)
library(forecast)
library(tseries)
library(nortest)
library(ggplot2)
Penentuan Data
df <- read_excel("C:/Users/Lenovo/Documents/ARW/Data ARW.xlsx")
names(df) <- c("Kategori", "Bulan", "Tanggal", "Index")
df$Index <- as.numeric(df$Index)
df$Waktu <- as.Date(paste("2024", df$Bulan, df$Tanggal, sep = "-"), format = "%Y-%m-%d")
head(df)
## # A tibble: 6 × 5
## Kategori Bulan Tanggal Index Waktu
## <chr> <chr> <chr> <dbl> <date>
## 1 TIDAK SEHAT 10 1 17 2024-10-01
## 2 SEDANG 10 2 16 2024-10-02
## 3 SEDANG 10 3 17 2024-10-03
## 4 SEDANG 10 4 16 2024-10-04
## 5 SEDANG 10 5 18 2024-10-05
## 6 SEDANG 10 6 15 2024-10-06
Penentuan Pola
Statistik Deskriptif
cat("=== Statistik Deskriptif Keseluruhan ===\n")
## === Statistik Deskriptif Keseluruhan ===
summary(df$Index)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 13.00 16.50 20.41 19.25 98.00
cat("\n=== Statistik Deskriptif per Bulan ===\n")
##
## === Statistik Deskriptif per Bulan ===
stat_perbulan <- aggregate(Index ~ Bulan, data = df,
FUN = function(x) c(
mean = mean(x, na.rm = TRUE),
median = median(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
min = min(x, na.rm = TRUE),
max = max(x, na.rm = TRUE)
))
stat_perbulan <- do.call(data.frame, stat_perbulan)
print(stat_perbulan)
## Bulan Index.mean Index.median Index.sd Index.min Index.max
## 1 10 14.93548 16 2.682480 10 20
## 2 11 14.10000 15 4.188243 5 19
## 3 12 32.00000 30 18.647609 10 98
Visualisasi Pola Data
par(mfrow = c(1, 2))
# Boxplot keseluruhan
boxplot(df$Index,
main = "Boxplot Keseluruhan Data NO₂",
ylab = "Kadar Nitrogen Dioksida",
col = "skyblue", border = "darkblue")
# Boxplot per bulan
boxplot(Index ~ Bulan, data = df,
main = "Boxplot Indeks NO₂ per Bulan",
xlab = "Bulan", ylab = "Kadar NO₂",
col = "lightgreen", border = "darkgreen", las = 2)

par(mfrow = c(1, 1))
Identifikasi Time Series
index <- ts(df$Index, start = 1, frequency = 1)
plot(index, xlab = "Waktu", ylab = "Index NO₂",
main = "Indeks Standar Pencemaran Udara (NO₂)",
lwd = 2, col = "blue")

adf.test(index)
##
## Augmented Dickey-Fuller Test
##
## data: index
## Dickey-Fuller = -3.6539, Lag order = 4, p-value = 0.03282
## alternative hypothesis: stationary
Differencing (Jika Data Tidak Stasioner)
diff_index <- diff(index, differences = 1)
plot(diff_index, xlab = "Waktu", ylab = "Index NO₂",
main = "Differencing (1) Indeks NO₂",
lwd = 2, col = "blue")

adf.test(diff_index)
## Warning in adf.test(diff_index): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_index
## Dickey-Fuller = -6.1974, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Permodelan Eksponensial Smoothing (DES)
# Model Holt (Double Exponential Smoothing)
fit_holt <- holt(index, h = 6)
# Ringkasan model
summary(fit_holt$model)
## Holt's method
##
## Call:
## holt(y = index, h = 6)
##
## Smoothing parameters:
## alpha = 0.2542
## beta = 1e-04
##
## Initial states:
## l = 18.2584
## b = 0.1036
##
## sigma: 12.5376
##
## AIC AICc BIC
## 887.2016 887.8993 899.8106
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006299507 12.26201 6.213226 -16.4126 31.29868 0.871192
## ACF1
## Training set 0.1611286
# Plot hasil peramalan
plot(fit_holt,
main = "Peramalan Double Exponential Smoothing (Holt) Indeks NO₂",
xlab = "Waktu", ylab = "Index NO₂",
col = "blue", fcol = "darkblue")
# Menambahkan data aktual
lines(index, col = "red", lwd = 2)
legend("topleft",
legend = c("Data Aktual", "Peramalan Holt"),
col = c("red", "blue"),
lwd = 2, bty = "n")

Trend Analysis
time <- 1:length(df$Index)
model_regresi <- lm(Index ~ time, data = df)
summary(model_regresi)
##
## Call:
## lm(formula = Index ~ time, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.712 -6.241 -1.141 3.808 72.695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.83349 2.62999 3.739 0.000324 ***
## time 0.22752 0.04911 4.632 1.21e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.51 on 90 degrees of freedom
## Multiple R-squared: 0.1925, Adjusted R-squared: 0.1836
## F-statistic: 21.46 on 1 and 90 DF, p-value: 1.213e-05
plot(df$Waktu, df$Index, type = "l", col = "darkgreen",
main = "Analisis Tren Indeks NO₂",
xlab = "Waktu", ylab = "Index NO₂")
abline(model_regresi, col = "red", lwd = 2)

Permodelan ARIMA/SARIMA
Identifikasi Model
tsdisplay(diff_index)

eacf(diff_index, ar.max=15, ma.max=15)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 0 x o x x o o o o o o x x o o o o
## 1 x x o x o o o o o o o o o o o o
## 2 x x o x o o o o o o o o o o o o
## 3 x x x o o o o o o o o o o o o o
## 4 x o o o o o o o o o o o o o o o
## 5 x o o o x o o o o o o o o o o o
## 6 x o o o o o o o o o o o o o o o
## 7 o o o o o o o o o o o o o o o o
## 8 o x x x o o o x o o o o o o o o
## 9 x x x o o o o x o o o o o o o o
## 10 x o o o o o o x o o o o o o o o
## 11 x x o x x o x o o x o o o o o o
## 12 x o o o x o o o o o o o o o o o
## 13 o x x x x x x o o o o o o o o o
## 14 o x o o o o o o o o o o o o o o
## 15 o x o o o o o x o o o o o o o o
Estimasi Model
model1 <- Arima(df$Index, order = c(0,1,1), include.constant = TRUE)
model2 <- Arima(df$Index, order = c(1,1,1), include.constant = TRUE)
model3 <- Arima(df$Index, order = c(2,1,2), include.constant = TRUE)
fit <- Arima(df$Index, order = c(1,1,1), include.constant = TRUE)
fit
## Series: df$Index
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## 0.3663 -1.0000 0.2196
## s.e. 0.0993 0.0513 0.0709
##
## sigma^2 = 140.2: log likelihood = -354.39
## AIC=716.78 AICc=717.25 BIC=726.83
Diagnostik Model
checkresiduals(fit)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1) with drift
## Q* = 16.064, df = 8, p-value = 0.04147
##
## Model df: 2. Total lags used: 10
qqnorm(fit$residuals, pch = 16)
qqline(fit$residuals, col = "blue", lwd = 2)

shapiro.test(fit$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit$residuals
## W = 0.6421, p-value = 1.204e-13
ks.test(fit$residuals, "pnorm", mean(fit$residuals), sd(fit$residuals))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: fit$residuals
## D = 0.24622, p-value = 2.131e-05
## alternative hypothesis: two-sided
ad.test(fit$residuals)
##
## Anderson-Darling normality test
##
## data: fit$residuals
## A = 8.6615, p-value < 2.2e-16
jarque.bera.test(fit$residuals)
##
## Jarque Bera Test
##
## data: fit$residuals
## X-squared = 1946.8, df = 2, p-value < 2.2e-16
Overfitting
overfit1 <- Arima(df$Index, order = c(1,1,2), include.constant = TRUE)
overfit2 <- Arima(df$Index, order = c(2,1,1), include.constant = TRUE)
# Overfit MA
stat_uji_1 <- overfit1$coef[['ma2']] / 0.3256
derajat_bebas <- length(df$Index)-1
daerah_kritis <- qt(0.025, derajat_bebas)
stat_uji_1; daerah_kritis
## [1] -2.396436
## [1] -1.986377
2*(pt(stat_uji_1, derajat_bebas))
## [1] 0.01860132
# Overfit AR
stat_uji_2 <- overfit2$coef[['ar2']] / 0.1080
stat_uji_2; daerah_kritis
## [1] -4.727907
## [1] -1.986377
2*(pt(stat_uji_2, derajat_bebas))
## [1] 8.243791e-06
Peramalan
n <- length(index)
train_size <- round(0.8 * n)
train <- window(index, end = train_size)
test <- window(index, start = train_size + 1)
train_fit <- Arima(train, order = c(1,1,1), include.constant = TRUE)
forecast_train <- forecast(train_fit, h = length(test))
plot(forecast_train, fcol="blue", lwd = 2,
main = "Peramalan ARIMA(1,1,1) Data Training & Testing",
xlab="Waktu", ylab="Index NO₂")
lines(seq(train_size+1, n), test, col="red", lwd=2)
legend("topleft", col=c("blue", "red"),
legend=c("Peramalan", "Aktual"), lwd=2, bty="n")

forecast_final <- forecast(fit, h = 4)
forecast_final
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 93 26.05638 10.80064 41.31212 2.724741 49.38802
## 94 29.14655 12.84392 45.44917 4.213827 54.07926
## 95 30.41760 13.95941 46.87578 5.246975 55.58822
## 96 31.02231 14.53600 47.50863 5.808670 56.23595
plot(forecast_final, fcol="blue",
main = "Peramalan 4 Periode ke Depan (ARIMA 1,1,1)",
xlab="Waktu", ylab="Index NO₂", lwd = 2)
