library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.5.3
# Baru jalankan fungsi baca file
data <- read.csv("C:/Users/HP/Downloads/Walmart.csv",sep = ";", quote = "")
head(data)
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price
## 1 1 05/02/2010 1643691 0 42.31 2.572
## 2 1 12/02/2010 1641957 1 38.51 2.548
## 3 1 19/02/2010 1611968 0 39.93 2.514
## 4 1 26/02/2010 1409728 0 46.63 2.561
## 5 1 05/03/2010 1554807 0 46.50 2.625
## 6 1 12/03/2010 1439542 0 57.79 2.667
## CPI Unemployment
## 1 2.110.963.582 8.106
## 2 2.112.421.698 8.106
## 3 2.112.891.429 8.106
## 4 2.113.196.429 8.106
## 5 2.113.501.429 8.106
## 6 2.113.806.429 8.106
data.ts <- ts(data.frame(Yt = data$Weekly_Sales), frequency = 12)
ts.plot(data.ts[, "Yt"], type = "l", ylab = "Yt", col = "pink")
title(main = "Time Series Plot of Yt", cex.sub = 0.8)
points(data.ts[, "Yt"], pch = 20, col = "pink")
seasonplot(data.ts, main="Seasonplot of Weekly Sales", ylab="Weekly Sales",
year.labels = TRUE, col=rainbow(18))
# Identifikasi Model
Uji asumsi formal terhadap kehomogenan ragam yang digunakan yaitu Fligner-Killen test, dimana:
H0: Ragam homogen
H1 : Ragam tidak homogen
grup_tahun <- floor(time(data.ts))
fligner.test(as.numeric(data.ts) ~ grup_tahun)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: as.numeric(data.ts) by grup_tahun
## Fligner-Killeen:med chi-squared = 2436.3, df = 536, p-value < 2.2e-16
logdat3 <- log10(data.ts)
# 2. Visualisasi
ts.plot(logdat3,
type="l",
ylab="Log Weekly Sales",
main="Time Series Plot of Log-Transformed Weekly Sales",
xlab="Time (Minggu)",
col="pink")
# Filter HANYA Toko 1
data_store1 <- subset(data, Store == 1)
n_total <- nrow(data_store1)
n_train <- round(0.8 * n_total)
train.Yt.ts <- ts(data_store1$Weekly_Sales[1:n_train], frequency = 12)
test.Yt.ts <- ts(data_store1$Weekly_Sales[(n_train + 1):n_total], frequency = 12)
ts.plot(train.Yt.ts, col="pink", ylab = "Weekly Sales", xlab = "Time")
title(main = "Train Time Series Plot", cex.sub = 0.8)
points(train.Yt.ts, pch = 20, col = "pink")
Mengidentifikasi kestasioneran data dilakukan dengan uji ADF. Uji Augmented Dickey-Fuller (ADF) adalah salah satu alat identifikasi untuk menguji kestasioneran data dan memiliki hipotesis sebagai berikut:
\(𝐻_0:\)Data tidak stasioner (unit roots mendekati 1)
\(𝐻_1:\)Data stasioner (unit roots tidak mendekati 1)
adf.test(train.Yt.ts, alternative = "stationary",
k=trunc((length(train.Yt.ts)-1)^(1/3)))
## Warning in adf.test(train.Yt.ts, alternative = "stationary", k =
## trunc((length(train.Yt.ts) - : p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train.Yt.ts
## Dickey-Fuller = -5.3549, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Berdasarkan hasil uji ADF dengan taraf signifikansi \(\alpha = 5\%\) di atas dapat diketahui bersama bahwa \(p-value = 0.01 < \alpha = 0.05\) sehingga keputusan yang diambil adalah Tolak \(H_0\) artinya data yang digunakan merupakan data yang stasioner.
Identifikasi kandidat model diperoleh berdasarkan nilai P dan Q dimana nilai D=0
acf(train.Yt.ts, lag.max = 20, col = "pink")
pacf(train.Yt.ts, lag.max = 20, col = "pink")
par(mfrow=c(2,1))
acf(as.numeric(train.Yt.ts), lag.max = 48, main = "Plot ACF (Mencari Nilai Q Musiman)", col = "pink")
pacf(as.numeric(train.Yt.ts), lag.max = 48, main = "Plot PACF (Mencari Nilai P Musiman)", col = "pink")
par(mfrow=c(1,1))
par(mfrow=c(1,1))
# Mengatur layout agar plot ACF dan PACF berdampingan atas-bawah
par(mfrow=c(2,1))
# Mengembalikan layout grafik ke standar
par(mfrow=c(1,1))
Berdasarkan plot di atas, terlihat adanya lonjakan yang signifikan pada lag musiman (lag 12) di plot PACF, sementara plot ACF diasumsikan terpotong (cut-off). Dari plot di atas diperoleh nilai \(P=1\) dan \(Q=0\), sehingga diperoleh model musiman \(ARIMA(P,D,Q)_s = ARIMA(1,0,0)_{12}\).
eacf(train.Yt.ts)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o x o o o o o o o o o o
## 1 o o o o x o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 o o o o o o o o o o o o o o
## 4 x x x o x o o o o o o o o o
## 5 o x o o o o o o o o o o o o
## 6 x x o x x x x o o o o o o o
## 7 x x o x x o x o o o o o o o
Karena kedua komponen telah stasioner, identifikasi komponen non-seasonal adalah \(ARIMA(0,0,4)\), \(ARIMA(5,0,0)\), \(ARIMA(0,0,3)\)
Identifikasi komponen seasonal adalah \(ARIMA(1,0,0)_{12}\), sehingga model tentatif yang diperoleh adalah: \(ARIMA(0,0,4) \times (1,0,0)_{12}\) \(ARIMA(5,0,0) \times (1,0,0)_{12}\) \(ARIMA(0,0,3) \times (1,0,0)_{12}\)
# ARIMA(0,0,4)
tmodel1 <- Arima(train.Yt.ts, order=c(0,0,4), seasonal=c(1,0,0))
summary(tmodel1)
## Series: train.Yt.ts
## ARIMA(0,0,4)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ma1 ma2 ma3 ma4 sar1 mean
## 0.4712 0.2898 0.0156 0.4485 0.0325 1556164.7
## s.e. 0.0866 0.1138 0.1264 0.1061 0.0989 30884.2
##
## sigma^2 = 2.226e+10: log likelihood = -1517.6
## AIC=3049.21 AICc=3050.26 BIC=3068.36
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -518.7885 145208.5 104862.3 -0.7494952 6.564416 0.6754357
## ACF1
## Training set -0.03810017
# ARIMA(5,0,0)
tmodel2 <- Arima(train.Yt.ts, order=c(5,0,0), seasonal=c(1,0,0), method="CSS")
summary(tmodel2)
## Series: train.Yt.ts
## ARIMA(5,0,0)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 sar1 mean
## 0.3516 0.0285 -0.0161 0.2758 -0.3416 0.0402 1561169.79
## s.e. 0.0899 0.0919 0.0920 0.0924 0.0895 0.0996 21955.63
##
## sigma^2 = 2.222e+10: log likelihood = -1525.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.224085 144420.3 96072.52 -0.7253345 5.970553 0.6188195
## ACF1
## Training set 0.01615472
# ARIMA(0,0,3)
tmodel3 <- Arima(train.Yt.ts, order=c(0,0,3), seasonal=c(1,0,0))
summary(tmodel3)
## Series: train.Yt.ts
## ARIMA(0,0,3)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ma1 ma2 ma3 sar1 mean
## 0.5383 0.1021 -0.3298 0.0721 1551725.24
## s.e. 0.1021 0.0814 0.1096 0.0954 20597.78
##
## sigma^2 = 2.564e+10: log likelihood = -1525.79
## AIC=3063.58 AICc=3064.36 BIC=3079.99
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 449.2427 156563.1 111918.2 -0.7552181 6.977522 0.7208845
## ACF1
## Training set -0.09984114
KandidatModelARIMA <- c("ARIMA(0,0,4)(1,0,0)12",
"ARIMA(5,0,0)(1,0,0)12",
"ARIMA(0,0,3)(1,0,0)12")
AICKandidatModel <- c(tmodel1$aic, NA, tmodel3$aic)
AICcKandidatModel <- c(tmodel1$aicc, NA, tmodel3$aicc)
BICKandidatModel <- c(tmodel1$bic, NA, tmodel3$bic)
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA
## Kandidat Model Nilai AIC Nilai AICc Nilai BIC
## 1 ARIMA(0,0,4)(1,0,0)12 3049.20627596277 3050.26287973636 3068.35966510154
## 2 ARIMA(5,0,0)(1,0,0)12 <NA> <NA> <NA>
## 3 ARIMA(0,0,3)(1,0,0)12 3063.57618175261 3064.36122848158 3079.99337244298
Model terbaik diperoleh berdasarkan nilai AIC, AICc dan BIC terkecil dari kandidat model yang valid. Oleh karena itu, model terbaik yang terpilih yaitu \(ARIMA(0,0,4) \times (1,0,0)_{12}\).
auto.arima(logdat3[,"Yt"], stepwise = FALSE)
## Series: logdat3[, "Yt"]
## ARIMA(0,1,5)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5
## -0.3026 -0.0323 -0.0879 0.1940 -0.2504
## s.e. 0.0121 0.0127 0.0130 0.0118 0.0123
##
## sigma^2 = 0.003048: log likelihood = 9509.9
## AIC=-19007.8 AICc=-19007.79 BIC=-18967.18
Jika berdasarkan fungsi auto.arima model terbaik yang diperoleh yaitu \(ARIMA(0,1,5)\), namun setelah dilakukan perbandingan melalui pendekatan prosedur Box-Jenkins, terpilih model terbaik berdasarkan nilai AIC terkecil yaitu model \(ARIMA(0,0,4) \times (1,0,0)_{12}\) yang akan dilanjutkan analisisnya. # Pengujian parameter model
printstatarima <- function(x, digits = 4, se = TRUE) {
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
# Menghitung t-statistic
statt <- coef / ses
# Menghitung p-value (menggunakan distribusi normal untuk sampel besar)
pval <- 2 * pnorm(abs(statt), lower.tail = FALSE)
# Menyusun matriks output
coef_mat <- cbind(Estimate = coef,
"S.E." = ses,
"t-stat" = round(statt, digits = digits),
"p-value" = round(pval, digits = digits))
print(coef_mat, print.gap = 2)
}
}
}
# Cara menggunakannya untuk model Anda:
printstatarima(tmodel1)
##
## Coefficients:
## Estimate S.E. t-stat p-value
## ma1 0.4712 0.0866 5.4411 0.0000
## ma2 0.2898 0.1138 2.5466 0.0109
## ma3 0.0156 0.1264 0.1234 0.9018
## ma4 0.4485 0.1061 4.2271 0.0000
## sar1 0.0325 0.0989 0.3286 0.7424
## intercept 1556164.6570 30884.1952 50.3871 0.0000
Model terbaik adalah \(ARIMA(0,0,4) \times (1,0,0)_{12}\), dimana dugaan parameter model yaitu \(\theta_3\) (\(ma3\)) dan \(\Phi_1\) (\(sar1\)) tidak berpengaruh nyata karena memiliki \(p\text{-value} > 0,05\).Untuk model \(ARIMA(0,0,4) \times (1,0,0)_{12}\), maka \(Z_t\) diperoleh dari penjabaran operator backshift sehingga:
\(p=0, d=0, q=4\)\(P=1, D=0, Q=0, s=12\)
\(\phi_p(B)\Phi_P(B^s)(1-B)^d(1-B^s)^DY_t = \theta_q(B)\Theta_Q(B^s)e_t\)
\(\phi_0(B)\Phi_1(B^{12})(1-B)^0(1-B^{12})^0Y_t = \theta_4(B)\Theta_0(B^{12})e_t\)
\((1-\Phi_1 B^{12})Y_t = (1+\theta_1 B + \theta_2 B^2 + \theta_3 B^3 + \theta_4 B^4)e_t\)
\(Y_t - \Phi_1 Y_{t-12} = e_t + \theta_1 e_{t-1} + \theta_2 e_{t-2} + \theta_3 e_{t-3} + \theta_4 e_{t-4}\)
\(Y_t = \Phi_1 Y_{t-12} + e_t + \theta_1 e_{t-1} + \theta_2 e_{t-2} + \theta_3 e_{t-3} + \theta_4 e_{t-4}\)
Penduga parameter yang diperoleh:
\(\hat{\theta}_1 = 0,4712\)
\(\hat{\theta}_2 = 0,2898\)
\(\hat{\theta}_3 = 0,0156\)
\(\hat{\theta}_4 = 0,4485\)
\(\hat{\Phi}_1 = 0,0325\)
dengan \(\hat{\sigma}^2 = 2,226 \times 10^{10}\) adalah nilai dugaan bagi \(\sigma^2_e\).Model terbaik yang diperoleh yaitu model \(ARIMA(0,0,4) \times (1,0,0)_{12}\): \(Y_t = 0,0325 Y_{t-12} + e_t + 0,4712 e_{t-1} + 0,2898 e_{t-2} + 0,0156 e_{t-3} + 0,4485 e_{t-4}\)
tsdisplay(residuals(tmodel1),
col = "pink",
lag.max = 45,
main = 'ARIMA(0,0,4)x(1,0,0)12 Model Residuals')
Berdasarkan plot di atas terlihat bahwa plot ACF dan PACF terlihat bahwa tidak ada lag yang signifikan. Hal tersebut menunjukkan bahwa tidak ada gejala autokorelasi pada sisaan. Selanjutnya, untuk memastikan kembali akan dilakukan uji asumsi secara formal:
checkresiduals(tmodel1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,4)(1,0,0)[12] with non-zero mean
## Q* = 10.579, df = 18, p-value = 0.9114
##
## Model df: 5. Total lags used: 23
Berdasarkan hasil pengujian diagnostik sisaan menggunakan uji Ljung-Box, diperoleh nilai \(Q^* = 10,579\) dengan \(p\text{-value} = 0,9114\). Karena nilai \(p\text{-value} > 0,05\), maka dapat disimpulkan bahwa tidak terdapat autokorelasi pada sisaan model. Hal ini mengindikasikan bahwa sisaan telah bersifat white noise dan model \(ARIMA(0,0,4) \times (1,0,0)_{12}\) telah memenuhi asumsi diagnostik
library(tseries)
jarque.bera.test(residuals(tmodel1))
##
## Jarque Bera Test
##
## data: residuals(tmodel1)
## X-squared = 39.469, df = 2, p-value = 2.687e-09
Berdasarkan hasil uji kenormalan dengan uji Jarque-Bera, sisaan model tidak menyebar normal karena nilai \(p\text{-value} = 2,687 \times 10^{-9} < \alpha = 5\% = 0,05\).
ramalan_arima1 = forecast(tmodel1, 6)
ramalan_arima1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 10 1715307 1524115 1906498 1422904 2007709
## Aug 10 1647434 1436080 1858787 1324197 1970671
## Sep 10 1588301 1369806 1806797 1254142 1922461
## Oct 10 1703119 1484603 1921635 1368928 2037310
## Nov 10 1564735 1330000 1799471 1205738 1923732
## Dec 10 1555619 1320884 1790355 1196622 1914616
accuracy(ramalan_arima1)
## ME RMSE MAE MPE MAPE MASE
## Training set -518.7885 145208.5 104862.3 -0.7494952 6.564416 0.6754357
## ACF1
## Training set -0.03810017
plot(ramalan_arima1, col="pink")
forecast_arima1 <- cbind(ramalan_arima1$mean,ramalan_arima1$lower,ramalan_arima1$upper)
forecast_arima1
## ramalan_arima1$mean ramalan_arima1$lower.80% ramalan_arima1$lower.95%
## Jul 10 1715307 1524115 1422904
## Aug 10 1647434 1436080 1324197
## Sep 10 1588301 1369806 1254142
## Oct 10 1703119 1484603 1368928
## Nov 10 1564735 1330000 1205738
## Dec 10 1555619 1320884 1196622
## ramalan_arima1$upper.80% ramalan_arima1$upper.95%
## Jul 10 1906498 2007709
## Aug 10 1858787 1970671
## Sep 10 1806797 1922461
## Oct 10 1921635 2037310
## Nov 10 1799471 1923732
## Dec 10 1790355 1914616