#Load Library
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TTR)
## Warning: package 'TTR' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(scales)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.4.3
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
#input data
data <- read_excel("C:/Users/user/Downloads/Data PM2.5.xlsx")

#Lihat Struktur data
head(data)
## # A tibble: 6 × 2
##   Date                PM2.5
##   <dttm>              <dbl>
## 1 2025-01-01 00:00:00    32
## 2 2025-01-02 00:00:00    33
## 3 2025-01-03 00:00:00    31
## 4 2025-01-04 00:00:00    24
## 5 2025-01-05 00:00:00    47
## 6 2025-01-06 00:00:00    36
# PREPROCESSING DATA PM2.5
library(dplyr)
library(lubridate)
library(TTR)

# 1. Pastikan tipe data benar
datal <- as.Date(data$Date)

# 2. Hapus NA
data <- na.omit(data)

# 3. Sort data
data <- data[order(data$Date), ]

# 4. Deteksi & hapus outlier
Q1 <- quantile(data$PM2.5, 0.25)
Q3 <- quantile(data$PM2.5, 0.75)
IQR <- Q3 - Q1
lower <- Q1 - 1.5*IQR
upper <- Q3 + 1.5*IQR
data <- data %>% filter(PM2.5 >= lower, PM2.5 <= upper)

# 5. Tambah kolom waktu
data$Tahun <- year(data$Date)
data$Bulan <- month(data$Date, label = TRUE, abbr = TRUE)
# Pastikan kolom tanggal bernama benar, misal "Tanggal"
data$Tanggal <- as.Date(data$Date)

# Tambah kolom bulan (format teks: Jan, Feb, dst)
data$Bulan <- format(data$Date, "%b")

# Jika mau urutan bulan rapi (Jan–Dec)
data$Bulan <- factor(data$Bulan, levels = month.abb)

# Buat boxplot bulanan
ggplot(data, aes(x = Bulan, y = PM2.5)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot PM2.5 per Bulan",
       x = "Bulan",
       y = "Konsentrasi PM2.5") +
  theme_minimal()

#Plot Time Series
ggplot(data, aes(x = Tanggal, y = PM2.5)) +
  geom_line() +
  labs(title = "Time Series PM2.5",
       x = "Tanggal", y = "PM2.5") +
  theme_minimal()

#Ubah jadi objek time series (harian 1 tahun)
ts_pm25 <- ts(data$PM2.5, frequency = 365)
#Splitting Data
data <- data %>% arrange(Date)
n <- nrow(data)
n_train <- round(0.8 * n)
train <- data[1:n_train, ]
test  <- data[(n_train + 1):n, ]

ggplot() +
  geom_line(data = train, aes(x = Date, y = PM2.5), linewidth = 1) +
  geom_line(data = test, aes(x = Date, y = PM2.5), color = "red", linewidth = 1) +
  labs(title = "Training (hitam) vs Testing (merah)",
       x = "Tanggal",
       y = "PM2.5")

# Tampilkan jumlah baris masing-masing
cat("Jumlah data total   :", n, "\n")
## Jumlah data total   : 270
cat("Jumlah data training:", n_train, "\n")
## Jumlah data training: 216
cat("Jumlah data testing :", n - n_train, "\n")
## Jumlah data testing : 54
#Uji ADF
adf.test(train$PM2.5)
## Warning in adf.test(train$PM2.5): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train$PM2.5
## Dickey-Fuller = -5.5067, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#Plot ACF dan PACF
acf(train$`PM2.5`, main = "ACF Data Training")

pacf(train$`PM2.5`, main = "PACF Data Training")

#Model dengan Auto ARIMA
model_arima <- auto.arima(train$`PM2.5`)
summary(model_arima)
## Series: train$PM2.5 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ma1     mean
##       -0.4974  0.3633  0.8186  34.3032
## s.e.   0.1152  0.0655  0.1059   1.0838
## 
## sigma^2 = 101.1:  log likelihood = -803.15
## AIC=1616.3   AICc=1616.58   BIC=1633.17
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.01119367 9.962296 7.964374 -9.748181 26.71903 0.8324455
##                     ACF1
## Training set 0.002453189
#Cek Heteroskedastisitas
library(FinTS)
## Warning: package 'FinTS' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'FinTS'
## The following object is masked from 'package:forecast':
## 
##     Acf
# Uji ARCH pada residual model ARIMA
ArchTest(residuals(model_arima), lags = 12)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  residuals(model_arima)
## Chi-squared = 10.857, df = 12, p-value = 0.5412
#DiagnostiK
#Cek residual white noise
checkresiduals(model_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,1) with non-zero mean
## Q* = 9.771, df = 7, p-value = 0.2019
## 
## Model df: 3.   Total lags used: 10
#Cek Autokorelasi residual
acf(residuals(model_arima))

#Cek Normalitas
hist(residuals(model_arima))

qqnorm(residuals(model_arima)); qqline(residuals(model_arima))

#Forecast
forecast_result <- forecast(model_arima, h = length(test$PM2.5))
autoplot(forecast_result)

#Nilai Forecast
# Lihat nilai peramalan ARIMA
forecast_values <- data.frame(
  Tanggal = test$Date,
  Actual = test$PM2.5,
  Forecast = forecast_result$mean,
  Lower_80 = forecast_result$lower[,1],
  Upper_80 = forecast_result$upper[,1],
  Lower_95 = forecast_result$lower[,2],
  Upper_95 = forecast_result$upper[,2]
)
knitr::kable(forecast_values, caption = "Tabel Hasil Peramalan PM2.5")
Tabel Hasil Peramalan PM2.5
Tanggal Actual Forecast Lower_80 Upper_80 Lower_95 Upper_95
2025-08-08 57 39.78120 26.89412 52.66828 20.07212 59.49029
2025-08-09 41 39.82416 26.28866 53.35967 19.12340 60.52493
2025-08-10 41 33.54712 19.75981 47.33442 12.46126 54.63298
2025-08-11 49 36.68515 22.89641 50.47389 15.59709 57.77321
2025-08-12 21 32.84370 19.02854 46.65886 11.71524 53.97216
2025-08-13 24 35.89464 22.07499 49.71429 14.75930 57.02997
2025-08-14 26 32.98137 19.15320 46.80955 11.83300 54.12975
2025-08-15 25 35.53895 21.70584 49.37207 14.38303 56.69488
2025-08-16 29 33.20832 19.37051 47.04613 12.04522 54.37142
2025-08-17 42 35.29684 21.45549 49.13820 14.12831 56.46537
2025-08-18 44 33.41121 19.56688 47.25553 12.23813 54.58428
2025-08-19 33 35.10796 21.26125 48.95466 13.93125 56.28467
2025-08-20 24 33.57887 19.73023 47.42752 12.39920 54.75855
2025-08-21 29 34.95593 21.10572 48.80615 13.77386 56.13801
2025-08-22 44 33.71541 19.86392 47.56690 12.53138 54.89944
2025-08-23 55 34.83278 20.98026 48.68531 13.64718 56.01839
2025-08-24 54 33.82627 19.97291 47.67964 12.63938 55.01317
2025-08-25 57 34.73289 20.87885 48.58694 13.54496 55.92083
2025-08-26 25 33.91624 20.06164 47.77084 12.72746 55.10502
2025-08-27 25 34.65185 20.79681 48.50690 13.46239 55.84131
2025-08-28 22 33.98924 20.13383 47.84465 12.79922 55.17926
2025-08-29 23 34.58610 20.73039 48.44180 13.39563 55.77657
2025-08-30 35 34.04847 20.19253 47.90441 12.85763 55.23930
2025-08-31 28 34.53275 20.67661 48.38888 13.34161 55.72388
2025-09-01 28 34.09652 20.24023 47.95282 12.90515 55.28790
2025-09-02 36 34.48946 20.63304 48.34588 13.29789 55.68103
2025-09-03 28 34.13552 20.27899 47.99204 12.94379 55.32724
2025-09-04 30 34.45433 20.59772 48.31094 13.26248 55.64619
2025-09-05 27 34.16716 20.31048 48.02383 12.97520 55.35912
2025-09-06 35 34.42584 20.56910 48.28257 13.23379 55.61788
2025-09-07 38 34.19283 20.33605 48.04960 13.00071 55.38494
2025-09-08 22 34.40271 20.54590 48.25953 13.21054 55.59488
2025-09-09 30 34.21365 20.35681 48.07050 13.02144 55.40587
2025-09-10 41 34.38395 20.52708 48.24082 13.19170 55.57620
2025-09-11 31 34.23055 20.37367 48.08744 13.03827 55.42283
2025-09-12 16 34.36873 20.51183 48.22563 13.17642 55.56103
2025-09-13 24 34.24427 20.38735 48.10118 13.05194 55.43659
2025-09-14 26 34.35638 20.49945 48.21330 13.16404 55.54872
2025-09-15 31 34.25539 20.39846 48.11233 13.06304 55.44774
2025-09-16 42 34.34636 20.48941 48.20330 13.15399 55.53872
2025-09-17 30 34.26442 20.40747 48.12137 13.07204 55.45679
2025-09-18 33 34.33823 20.48127 48.19518 13.14585 55.53061
2025-09-19 40 34.27174 20.41479 48.12870 13.07936 55.46413
2025-09-20 64 34.33163 20.47467 48.18859 13.13924 55.52402
2025-09-21 46 34.27768 20.42072 48.13465 13.08529 55.47008
2025-09-22 31 34.32627 20.46931 48.18324 13.13388 55.51867
2025-09-23 37 34.28251 20.42554 48.13947 13.09011 55.47491
2025-09-24 39 34.32193 20.46497 48.17890 13.12953 55.51433
2025-09-25 28 34.28642 20.42945 48.14339 13.09402 55.47882
2025-09-26 48 34.31841 20.46144 48.17538 13.12600 55.51081
2025-09-27 31 34.28959 20.43262 48.14656 13.09719 55.48200
2025-09-28 63 34.31555 20.45858 48.17252 13.12314 55.50795
2025-09-29 56 34.29217 20.43520 48.14914 13.09976 55.48458
2025-09-30 54 34.31323 20.45626 48.17020 13.12082 55.50564
#Evaluasi
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.4.3
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
# Hitung nilai error
rmse_val <- rmse(forecast_values$Actual, forecast_values$Forecast)
mae_val  <- mae(forecast_values$Actual, forecast_values$Forecast)
mape_val <- mape(forecast_values$Actual, forecast_values$Forecast) * 100

# MASE = MAE(model) / MAE(naive)
naive_forecast <- forecast::naive(train$PM2.5, h = length(test$PM2.5))
mase_val <- mae(forecast_values$Actual, forecast_values$Forecast) / 
            mae(forecast_values$Actual, naive_forecast$mean)

# Cetak nilai evaluasi
cat("=== Evaluasi Model ARIMA ===\n")
## === Evaluasi Model ARIMA ===
cat("RMSE :", rmse_val, "\n")
## RMSE : 11.51708
cat("MAE  :", mae_val, "\n")
## MAE  : 9.295394
cat("MAPE :", mape_val, "%\n")
## MAPE : 26.80502 %
cat("MASE :", mase_val, "\n")
## MASE : 0.43049