Model SARIMA - Suhu Rata-rata
📚 Load Library
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
#membaca Data
data_suhu <- read_excel("~/Suhu Rata rata.xlsx")
colnames(data_suhu) <- c("Tanggal","Suhu")
head(data_suhu)## # A tibble: 6 × 2
## Tanggal Suhu
## <chr> <dbl>
## 1 2013-01-01 10
## 2 2013-01-02 7.4
## 3 2013-01-03 7.17
## 4 2013-01-04 8.67
## 5 2013-01-05 6
## 6 2013-01-06 7
#cleaning data
data_suhu$Suhu[data_suhu$Suhu %in% c(8888,9999,"")] <- NA
data_suhu$Suhu <- as.numeric(data_suhu$Suhu)
sum(is.na(data_suhu$Suhu))## [1] 0
#interpolasi
#Format tanggal
#agregasi mingguan
data_suhu$Minggu <- floor_date(data_suhu$Tanggal, unit="week", week_start=1)
data_mingguan <- data_suhu %>%
group_by(Minggu) %>%
summarise(Suhu_Mingguan = mean(Suhu, na.rm=TRUE))
head(data_mingguan)## # A tibble: 6 × 2
## Minggu Suhu_Mingguan
## <date> <dbl>
## 1 2012-12-31 7.71
## 2 2013-01-07 12.3
## 3 2013-01-14 13.6
## 4 2013-01-21 12.3
## 5 2013-01-28 15.7
## 6 2013-02-04 15.9
#time series
data_ts <- ts(data_mingguan$Suhu_Mingguan, frequency=52)
ts.plot(data_ts, col="blue", main="Time Series Suhu Mingguan")
#seasonal pot
#spilt data
#uji stasioner
##
## Augmented Dickey-Fuller Test
##
## data: train.ts
## Dickey-Fuller = -1.9596, Lag order = 4, p-value = 0.5925
## alternative hypothesis: stationary
#diferencing
## Warning in adf.test(diff2): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff2
## Dickey-Fuller = -7.1381, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#acfpacf
#model sarima
# Membentuk model SARIMA kandidat
model1 <- Arima(
train.ts,
order = c(0,1,1),
seasonal = list(order = c(0,1,1), period = 52)
)
model2 <- Arima(
train.ts,
order = c(1,1,1),
seasonal = list(order = c(0,1,1), period = 52)
)
model3 <- Arima(
train.ts,
order = c(2,1,1),
seasonal = list(order = c(0,1,1), period = 52)
)
AIC <- c(
AIC(model1),
AIC(model2),
AIC(model3)
)
# 1. Pastikan library TSA aktif
library(TSA)
# 2. Membersihkan data diff2 dari nilai kosong (NA) agar tidak eror
data_bersih_eacf <- na.omit(diff2)
# 3. Menampilkan matriks EACF dengan batas orde yang stabil (anti-eror)
print("--- MATRIKS EACF DARI DATA STASIONER (diff2) ---")## [1] "--- MATRIKS EACF DARI DATA STASIONER (diff2) ---"
## AR/MA
## 0 1 2 3 4 5 6 7
## 0 x o o o o o o o
## 1 x o o o o o o o
## 2 x o o o o o o o
## 3 x x o o o o o o
## 4 x x o o o o o o
## 5 x o o o o x o o
#pemilihan Model
Model <- c("SARIMA(0,1,1)(0,1,1)",
"SARIMA(1,1,1)(0,1,1)",
"SARIMA(2,1,1)(0,1,1)")
data.frame(Model, AIC)## Model AIC
## 1 SARIMA(0,1,1)(0,1,1) 147.7567
## 2 SARIMA(1,1,1)(0,1,1) 147.4724
## 3 SARIMA(2,1,1)(0,1,1) 149.4294
#diagnostik
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,1)[52]
## Q* = 15.331, df = 8, p-value = 0.05302
##
## Model df: 2. Total lags used: 10
## lags statistic df p-value
## 5 12.72182 5 0.02612983
## 10 15.33121 10 0.12044501
## 15 17.28037 15 0.30238195
## 20 20.84378 20 0.40637828
## 25 29.33810 25 0.25003034
## 30 30.32548 30 0.44907794
##
## Jarque Bera Test
##
## data: residuals(model_terbaik)
## X-squared = 19.481, df = 2, p-value = 5.885e-05
#forecast
#evalusi
hasil <- accuracy(ramalan, test.ts)
tabel <- data.frame(
Dataset=c("Train","Test"),
RMSE=c(hasil[1,"RMSE"], hasil[2,"RMSE"]),
MAPE=c(hasil[1,"MAPE"], hasil[2,"MAPE"])
)
tabel$RMSE <- round(tabel$RMSE,2)
tabel$MAPE <- round(tabel$MAPE,2)
tabel## Dataset RMSE MAPE
## 1 Train 1.44 3.22
## 2 Test 2.45 8.74