Model SARIMA - Suhu Rata-rata

📚 Load Library

library(readxl)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(portes)

#membaca Data

data_suhu <- read_excel("~/Suhu Rata rata.xlsx")

colnames(data_suhu) <- c("Tanggal","Suhu")

head(data_suhu)
## # A tibble: 6 × 2
##   Tanggal     Suhu
##   <chr>      <dbl>
## 1 2013-01-01 10   
## 2 2013-01-02  7.4 
## 3 2013-01-03  7.17
## 4 2013-01-04  8.67
## 5 2013-01-05  6   
## 6 2013-01-06  7

#cleaning data

data_suhu$Suhu[data_suhu$Suhu %in% c(8888,9999,"")] <- NA
data_suhu$Suhu <- as.numeric(data_suhu$Suhu)

sum(is.na(data_suhu$Suhu))
## [1] 0

#interpolasi

data_suhu$Suhu <- na.approx(data_suhu$Suhu)

#Format tanggal

data_suhu$Tanggal <- as.Date(data_suhu$Tanggal)

#agregasi mingguan

data_suhu$Minggu <- floor_date(data_suhu$Tanggal, unit="week", week_start=1)

data_mingguan <- data_suhu %>%
  group_by(Minggu) %>%
  summarise(Suhu_Mingguan = mean(Suhu, na.rm=TRUE))

head(data_mingguan)
## # A tibble: 6 × 2
##   Minggu     Suhu_Mingguan
##   <date>             <dbl>
## 1 2012-12-31          7.71
## 2 2013-01-07         12.3 
## 3 2013-01-14         13.6 
## 4 2013-01-21         12.3 
## 5 2013-01-28         15.7 
## 6 2013-02-04         15.9

#time series

data_ts <- ts(data_mingguan$Suhu_Mingguan, frequency=52)

ts.plot(data_ts, col="blue", main="Time Series Suhu Mingguan")

#seasonal pot

seasonplot(data_ts, col=rainbow(10))

monthplot(data_ts)

#spilt data

train.ts <- data_ts[1:84]
test.ts <- data_ts[85:length(data_ts)]

#uji stasioner

adf.test(train.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train.ts
## Dickey-Fuller = -1.9596, Lag order = 4, p-value = 0.5925
## alternative hypothesis: stationary

#diferencing

diff1 <- diff(train.ts)
diff2 <- diff(diff1)

ts.plot(diff2, col="red")

adf.test(diff2)
## Warning in adf.test(diff2): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff2
## Dickey-Fuller = -7.1381, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

#acfpacf

acf(diff2, lag.max=104)

pacf(diff2, lag.max=104)

#model sarima

# Membentuk model SARIMA kandidat

model1 <- Arima(
  train.ts,
  order = c(0,1,1),
  seasonal = list(order = c(0,1,1), period = 52)
)

model2 <- Arima(
  train.ts,
  order = c(1,1,1),
  seasonal = list(order = c(0,1,1), period = 52)
)

model3 <- Arima(
  train.ts,
  order = c(2,1,1),
  seasonal = list(order = c(0,1,1), period = 52)
)

AIC <- c(
  AIC(model1),
  AIC(model2),
  AIC(model3)
)
# 1. Pastikan library TSA aktif
library(TSA)

# 2. Membersihkan data diff2 dari nilai kosong (NA) agar tidak eror
data_bersih_eacf <- na.omit(diff2)

# 3. Menampilkan matriks EACF dengan batas orde yang stabil (anti-eror)
print("--- MATRIKS EACF DARI DATA STASIONER (diff2) ---")
## [1] "--- MATRIKS EACF DARI DATA STASIONER (diff2) ---"
TSA::eacf(data_bersih_eacf, ar.max = 5, ma.max = 7)
## AR/MA
##   0 1 2 3 4 5 6 7
## 0 x o o o o o o o
## 1 x o o o o o o o
## 2 x o o o o o o o
## 3 x x o o o o o o
## 4 x x o o o o o o
## 5 x o o o o x o o

#pemilihan Model

Model <- c("SARIMA(0,1,1)(0,1,1)",
           "SARIMA(1,1,1)(0,1,1)",
           "SARIMA(2,1,1)(0,1,1)")

data.frame(Model, AIC)
##                  Model      AIC
## 1 SARIMA(0,1,1)(0,1,1) 147.7567
## 2 SARIMA(1,1,1)(0,1,1) 147.4724
## 3 SARIMA(2,1,1)(0,1,1) 149.4294

#diagnostik

model_terbaik <- model1

checkresiduals(model_terbaik)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(0,1,1)[52]
## Q* = 15.331, df = 8, p-value = 0.05302
## 
## Model df: 2.   Total lags used: 10
LjungBox(residuals(model_terbaik), lags=seq(5,30,5))
##  lags statistic df    p-value
##     5  12.72182  5 0.02612983
##    10  15.33121 10 0.12044501
##    15  17.28037 15 0.30238195
##    20  20.84378 20 0.40637828
##    25  29.33810 25 0.25003034
##    30  30.32548 30 0.44907794
jarque.bera.test(residuals(model_terbaik))
## 
##  Jarque Bera Test
## 
## data:  residuals(model_terbaik)
## X-squared = 19.481, df = 2, p-value = 5.885e-05

#forecast

ramalan <- forecast(model_terbaik, h=length(test.ts))

plot(ramalan)
lines(test.ts, col="red")

#evalusi

hasil <- accuracy(ramalan, test.ts)

tabel <- data.frame(
  Dataset=c("Train","Test"),
  RMSE=c(hasil[1,"RMSE"], hasil[2,"RMSE"]),
  MAPE=c(hasil[1,"MAPE"], hasil[2,"MAPE"])
)

tabel$RMSE <- round(tabel$RMSE,2)
tabel$MAPE <- round(tabel$MAPE,2)

tabel
##   Dataset RMSE MAPE
## 1   Train 1.44 3.22
## 2    Test 2.45 8.74