Model ARIMA - Analisis Suhu
Load Library
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#membaca data
## [1] "Tanggal" "Suhu rata rata"
## # A tibble: 6 × 2
## Tanggal Suhu
## <chr> <dbl>
## 1 2013-01-01 10
## 2 2013-01-02 7.4
## 3 2013-01-03 7.17
## 4 2013-01-04 8.67
## 5 2013-01-05 6
## 6 2013-01-06 7
#cleaning data
# Ubah nilai tidak valid jadi NA
data_suhu$Suhu[data_suhu$Suhu %in% c(8888, 9999, "")] <- NA
# Pastikan numerik
data_suhu$Suhu <- as.numeric(data_suhu$Suhu)
# Cek NA
sum(is.na(data_suhu$Suhu))## [1] 0
#interpolasi misisng value
data_suhu$Suhu <- na.approx(data_suhu$Suhu)
data_suhu$Tanggal <- as.Date(data_suhu$Tanggal)
data_suhu$Minggu <- floor_date(data_suhu$Tanggal, unit = "week", week_start = 1)
data_mingguan <- data_suhu %>%
group_by(Minggu) %>%
summarise(
Suhu_Mingguan = round(mean(Suhu, na.rm = TRUE), 2)
)
head(data_mingguan)## # A tibble: 6 × 2
## Minggu Suhu_Mingguan
## <date> <dbl>
## 1 2012-12-31 7.71
## 2 2013-01-07 12.3
## 3 2013-01-14 13.6
## 4 2013-01-21 12.3
## 5 2013-01-28 15.7
## 6 2013-02-04 15.9
#time series dan split data
ts_suhu <- ts(data_mingguan$Suhu_Mingguan, frequency = 52)
n <- length(ts_suhu)
indeks_split <- floor(0.8 * n)
train.Suhu.ts <- ts_suhu[1:indeks_split]
test.Suhu.ts <- ts_suhu[(indeks_split+1):n]
ts_suhu <- ts(data_mingguan$Suhu_Mingguan, frequency = 52)
plot(ts_suhu, main="Time Series Suhu Mingguan", col="blue")n <- length(ts_suhu)
indeks_split <- floor(0.8 * n)
train.Suhu.ts <- ts_suhu[1:indeks_split]
test.Suhu.ts <- ts_suhu[(indeks_split+1):n]#Uji Stasioneritas
##
## Augmented Dickey-Fuller Test
##
## data: train.Suhu.ts
## Dickey-Fuller = -3.6114, Lag order = 5, p-value = 0.03431
## alternative hypothesis: stationary
#Differencing
##
## Augmented Dickey-Fuller Test
##
## data: ts_diff
## Dickey-Fuller = -2.5831, Lag order = 5, p-value = 0.3329
## alternative hypothesis: stationary
#ACF & PACF
# Load package
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o x o o o o o o o o o
## 1 x o o o x o o o o o o o o o
## 2 x o o o x o o o o o o o o o
## 3 x o o o x o o o o o o o o o
## 4 x o o x x o o o o o o o o o
## 5 x x o o o o o o o o o o o o
## 6 o x o o o o o o o o o o o o
## 7 o o o o o o o o o o o o o o
## [1] 4.63 1.30 -1.31 3.41 0.12 0.15
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o x o o o o o o o o o
## 1 x o o o x o o o o o o o o o
## 2 x o o o x o o o o o o o o o
## 3 x o o o x o o o o o o o o o
## 4 x o o x x o o o o o o o o o
## 5 x x o o o o o o o o o o o o
## 6 o x o o o o o o o o o o o o
## 7 o o o o o o o o o o o o o o
#model Arima
data_ts <- ts_diff
model1 <- Arima(data_ts, order = c(5, 0, 2))
model2 <- Arima(data_ts, order = c(0, 0, 5))
model3 <- Arima(data_ts, order = c(5, 0, 0))
model4 <- Arima(data_ts, order = c(5, 0, 1))
# Membuat tabel penampung secara langsung tanpa membuat fungsi terpisah
tabel_kandidat <- data.frame(
Model = c("ARIMA(5,0,2)", "ARIMA(0,0,5)", "ARIMA(5,0,0)", "ARIMA(5,0,1)"),
AIC = round(c(model1$aic, model2$aic, model3$aic, model4$aic), 2),
AICc = round(c(model1$aicc, model2$aicc, model3$aicc, model4$aicc), 2),
BIC = round(c(model1$bic, model2$bic, model3$bic, model4$bic), 2)
)
# Memaksa R untuk langsung memunculkan tabel ke layar
tabel_kandidat## Model AIC AICc BIC
## 1 ARIMA(5,0,2) 719.49 720.65 747.50
## 2 ARIMA(0,0,5) 719.81 720.52 741.59
## 3 ARIMA(5,0,0) 718.96 719.67 740.74
## 4 ARIMA(5,0,1) 718.79 719.71 743.69
# 1. LOAD LIBRARY YANG DIBUTUHKAN
# Jika belum menginstal lmtest, jalankan: install.packages("lmtest")
library(forecast)
library(lmtest)
# ESTIMASI PARAMETER MODEL ARIMA(5,0,1)
# Menggunakan objek data_ts (data stasioner non-musiman Anda)
# Orde yang dimasukkan adalah c(p=5, d=0, q=1)
model_arima_terbaik <- Arima(data_ts,
order = c(5,0,1))
summary(model_arima_terbaik)## Series: data_ts
## ARIMA(5,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 mean
## 0.1705 0.0896 0.0796 0.1230 0.2446 -0.3161 0.2252
## s.e. 0.1840 0.0778 0.0782 0.0807 0.0860 0.1812 0.3552
##
## sigma^2 = 4.2: log likelihood = -351.4
## AIC=718.79 AICc=719.71 BIC=743.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03423561 2.005629 1.630906 48.13774 160.9516 0.6716908
## ACF1
## Training set -0.006330614
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.170527 0.183980 0.9269 0.353992
## ar2 0.089557 0.077762 1.1517 0.249452
## ar3 0.079564 0.078241 1.0169 0.309198
## ar4 0.122980 0.080675 1.5244 0.127413
## ar5 0.244616 0.086006 2.8442 0.004453 **
## ma1 -0.316059 0.181191 -1.7443 0.081100 .
## intercept 0.225210 0.355154 0.6341 0.526002
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 3. MENAMPILKAN TABEL HASIL ESTIMASI DAN UJI SIGNIFIKANSI (P-VALUE)
print(" HASIL ESTIMASI PARAMETER MODEL ARIMA(5,0,1) ")## [1] " HASIL ESTIMASI PARAMETER MODEL ARIMA(5,0,1) "
# Fungsi ini memunculkan nilai p-value secara langsung untuk uji signifikansi
coeftest(model_arima_terbaik)##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.170527 0.183980 0.9269 0.353992
## ar2 0.089557 0.077762 1.1517 0.249452
## ar3 0.079564 0.078241 1.0169 0.309198
## ar4 0.122980 0.080675 1.5244 0.127413
## ar5 0.244616 0.086006 2.8442 0.004453 **
## ma1 -0.316059 0.181191 -1.7443 0.081100 .
## intercept 0.225210 0.355154 0.6341 0.526002
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#diasnostic checking
## [1] "data_mingguan" "data_suhu" "data_ts"
## [4] "indeks_split" "model_arima_terbaik" "model1"
## [7] "model2" "model3" "model4"
## [10] "n" "tabel_kandidat" "test.Suhu.ts"
## [13] "train.Suhu.ts" "ts_diff" "ts_suhu"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,1) with non-zero mean
## Q* = 1.4028, df = 4, p-value = 0.8437
##
## Model df: 6. Total lags used: 10
sisaan <- residuals(model_arima_terbaik)
ks.test(sisaan, "pnorm", mean = mean(sisaan), sd = sd(sisaan))##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: sisaan
## D = 0.045334, p-value = 0.8846
## alternative hypothesis: two-sided
##
## One Sample t-test
##
## data: sisaan
## t = -0.2193, df = 165, p-value = 0.8267
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.3424766 0.2740054
## sample estimates:
## mean of x
## -0.03423561
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 16.747, df = 23, p-value = 0.8215
#forecasting
h_step <- length(test.Suhu.ts)
ramalan_suhu <- forecast(model_arima_terbaik, h = h_step)
plot(ramalan_suhu, main = "Peramalan Suhu Mingguan")
lines(test.Suhu.ts, col = "red", type = "b", pch = 20)
legend("topleft",
legend=c("Ramalan", "Data Aktual"),
col=c("blue", "red"),
lty=1)
#evaluasi akurasi
matriks_evaluasi <- accuracy(ramalan_suhu, test.Suhu.ts)
tabel_akurasi <- data.frame(
Dataset = c("Training", "Testing"),
RMSE = c(matriks_evaluasi[1, "RMSE"], matriks_evaluasi[2, "RMSE"]),
MAPE = c(matriks_evaluasi[1, "MAPE"], matriks_evaluasi[2, "MAPE"])
)
tabel_akurasi$RMSE <- round(tabel_akurasi$RMSE, 2)
tabel_akurasi$MAPE <- round(tabel_akurasi$MAPE, 2)
print(tabel_akurasi)## Dataset RMSE MAPE
## 1 Training 2.01 160.95
## 2 Testing 29.22 98.34