Import Library
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(ggplot2)
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
Membaca Data
data <- read_csv("~/Belajar kuliah lukas/Basis Data/Data Matkul Basis Data/Walmart.csv")
## Rows: 6435 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (7): Store, Weekly_Sales, Holiday_Flag, Temperature, Fuel_Price, CPI, Un...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 8
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 05-02-2010 1643691. 0 42.3 2.57 211.
## 2 1 12-02-2010 1641957. 1 38.5 2.55 211.
## 3 1 19-02-2010 1611968. 0 39.9 2.51 211.
## 4 1 26-02-2010 1409728. 0 46.6 2.56 211.
## 5 1 05-03-2010 1554807. 0 46.5 2.62 211.
## 6 1 12-03-2010 1439542. 0 57.8 2.67 211.
## # ℹ 1 more variable: Unemployment <dbl>
Forming Data Time Series
data.ts <- ts(data, start=c(2010,1), frequency = 12)
Plot
ts.plot(data.ts[,"Weekly_Sales"], type="l", ylab="Weekly Sales", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(data.ts, pch = 20, col = "blue")
monthplot(data.ts[,"Weekly_Sales"], ylab="Weekly_Sales", col="blue")
ggplot(data , aes(x = Date, y = Weekly_Sales)) +
geom_line(color = "blue") +
labs(title = "Plot Data Time Series Weekly Sales",
x = "Date",
y = "Weekly Sales")
seasonplot(data.ts,5,main="Seasonal Plot of Weekly Sales", ylab="times",year.labels = TRUE,
col=rainbow(20))
Splitting Data
fligner.test(Weekly_Sales ~ Date, data=data)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: Weekly_Sales by Date
## Fligner-Killeen:med chi-squared = 266.01, df = 142, p-value = 1.4e-09
train.ts <- data.ts[1:5149, "Weekly_Sales"]
test.ts <- data.ts[5150:6435, "Weekly_Sales"]
Train Plot
ts.plot(train.ts, ylab="Weekly Sales", xlab="Date", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")
Test Plot
ts.plot(test.ts, ylab="Weekly Sales", xlab="Date", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(test.ts, pch = 20, col = "blue")
Differencing Data
D1 <- diff(train.ts, 12)
ts.plot(D1, type="l", ylab="D1 Weekly_Sales", col="blue")
acf <- acf(D1, lag.max=48, xaxt="n", main="ACF D1", col="blue")
acf$lag <- acf$lag * 12
acf.1 <- as.data.frame(cbind(acf$acf,acf$lag))
acf.2 <- acf.1[which(acf.1$V2%%12==0),]
barplot(height = acf.2$V1, names.arg=acf.2$V2, ylab="ACF", xlab="Lag")
Differencing
d1D1 <- diff(D1)
ts.plot(d1D1, type="l", ylab="d1 D1 Xt", col="blue")
Identifikasi Model
acf3 <- acf(d1D1,lag.max=48,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:48/12, labels=0:48)
pacf3 <- pacf(d1D1,lag.max=48,xaxt="n", main="PACF d1D1", col="blue")
axis(1, at=0:48/12, labels=0:48)
par(mfrow=c(1,1))
eacf(as.matrix(d1D1))
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x x x o x x x x
## 1 x x x x x o x o x o x x x x
## 2 x x x o x o x o o o o x x x
## 3 x x x x x x x o o o o x x x
## 4 x x x x x o x o x o o x x o
## 5 x x x x x o x x x x x x x x
## 6 x x x x x x x x x x x x x x
## 7 x x x x x x x x x o x x x x
Kandidat Model
m1 <- Arima(train.ts,
order = c(0,1,1),
seasonal = list(order= c(0,1,1),
period=12))
summary(m1)
## Series: train.ts
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.4631 -0.9989
## s.e. 0.0140 0.0065
##
## sigma^2 = 3.335e+10: log likelihood = -69543.6
## AIC=139093.2 AICc=139093.2 BIC=139112.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 250.5564 182341.7 95257.64 -1.455348 8.713497 0.9958173 0.03229225
m2 <- Arima(train.ts,
order = c(1,1,0),
seasonal = list(order=c(0,1,1),
period=12))
summary(m2)
## Series: train.ts
## ARIMA(1,1,0)(0,1,1)[12]
##
## Coefficients:
## ar1 sma1
## -0.3475 -0.9970
## s.e. 0.0131 0.0034
##
## sigma^2 = 3.477e+10: log likelihood = -69647.76
## AIC=139301.5 AICc=139301.5 BIC=139321.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 225.3947 186207 95126.57 -1.22977 8.625421 0.9944472 -0.04663137
m3 <- Arima(train.ts,
order = c(1,1,1),
seasonal = list(order=c(0,1,1),
period=12))
summary(m3)
## Series: train.ts
## ARIMA(1,1,1)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 sma1
## 0.2453 -0.6730 -0.9998
## s.e. 0.0354 0.0291 0.0081
##
## sigma^2 = 3.305e+10: log likelihood = -69522.45
## AIC=139052.9 AICc=139052.9 BIC=139079.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 226.4751 181517.8 95519.39 -1.615278 8.809152 0.9985536
## ACF1
## Training set -0.01265612
m4 <- Arima(train.ts,
order = c(2,1,1),
seasonal = list(order=c(0,1,1),
period=12))
summary(m4)
## Series: train.ts
## ARIMA(2,1,1)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sma1
## 0.3832 0.1306 -0.8253 -0.9986
## s.e. 0.0252 0.0188 0.0201 0.0059
##
## sigma^2 = 3.283e+10: log likelihood = -69501.93
## AIC=139013.9 AICc=139013.9 BIC=139046.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 186.4537 180883.9 94948.7 -1.749158 8.813655 0.9925877 0.001188474
m5 <- Arima(train.ts,
order = c(1,1,1),
seasonal = list(order=c(1,1,0),
period=12))
summary(m5)
## Series: train.ts
## ARIMA(1,1,1)(1,1,0)[12]
##
## Coefficients:
## ar1 ma1 sar1
## 0.2158 -0.6549 -0.5192
## s.e. 0.0378 0.0318 0.0119
##
## sigma^2 = 4.966e+10: log likelihood = -70533.98
## AIC=141076 AICc=141076 BIC=141102.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 139.4526 222493.2 135714.5 -0.9880094 12.72744 1.418751
## ACF1
## Training set -0.01248345
m6 <- Arima(train.ts,
order = c(0,1,1),
seasonal = list(order=c(1,1,1),
period=12))
summary(m6)
## Series: train.ts
## ARIMA(0,1,1)(1,1,1)[12]
##
## Coefficients:
## ma1 sar1 sma1
## -0.4634 -0.0245 -0.9979
## s.e. 0.0140 0.0140 0.0045
##
## sigma^2 = 3.335e+10: log likelihood = -69542.06
## AIC=139092.1 AICc=139092.1 BIC=139118.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 245.3621 182342.6 95518.05 -1.468695 8.747028 0.9985397 0.03277074
Model Terbaik
AICKandidatModel <- c(m1$aic, m2$aic, m3$aic, m4$aic, m5$aic, m6$aic)
AICcKandidatModel <- c(m1$aicc, m2$aicc, m3$aicc, m4$aicc, m5$aicc, m6$aicc)
BICKandidatModel <- c(m1$bic, m2$bic, m3$bic, m4$bic, m5$bic, m6$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)(0,1,1)12", "ARIMA(1,1,0)(0,1,1)12", "ARIMA(1,1,1)(0,1,1)12","ARIMA(2,1,1)(0,1,1)12", "ARIMA(1,1,1)(1,1,0)12", "ARIMA(0,1,1)(1,1,1)12")
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA
## Kandidat Model Nilai AIC Nilai AICc Nilai BIC
## 1 ARIMA(0,1,1)(0,1,1)12 139093.204280759 139093.208957298 139112.836370295
## 2 ARIMA(1,1,0)(0,1,1)12 139301.516058481 139301.52073502 139321.148148017
## 3 ARIMA(1,1,1)(0,1,1)12 139052.896390963 139052.904186715 139079.072510345
## 4 ARIMA(2,1,1)(0,1,1)12 139013.854175436 139013.865871343 139046.574324663
## 5 ARIMA(1,1,1)(1,1,0)12 141075.958349642 141075.966145394 141102.134469024
## 6 ARIMA(0,1,1)(1,1,1)12 139092.12829942 139092.136095172 139118.304418802
Model terbaik diperoleh dari nilai aic dan aicc terkecil, sehingga
model terbaik adalah model 4 \[ARIMA(2,1,1)(0,1,1)_5\] dimana,
persamaannya adalah \[
ϕ_2(B)Φ_0(B12)(1−B)(1−B12)Xt​=θ_1(B)Θ_1(B12)εt​
\\(1−ϕ_1​B−ϕ_2​B2)(1−B)(1−B12)Xt​=(1+θ_1​B)(1+Θ_1​B12)εt
\\(1−ϕ_1​B−ϕ_2​B2)(Xt​−Xt−1​−Xt−12​+Xt−13​)=εt​+θ1​εt−1​+Θ1​εt−12​+θ1​Θ1​εt−13
\]
Sehingga persamaan akhir diperoleh \[
\\Xt​=(1+ϕ1​)Xt−1​+(ϕ2​−ϕ1​)Xt−2​−ϕ2​Xt−3
\\+Xt−12​−(1+ϕ1​)Xt−13​+(ϕ1​−ϕ2​)Xt−14​+ϕ2​Xt−15
\\+εt​+θ1​εt−1​+Θ1​εt−12​+θ1​Θ1​εt−13​
\]
Menggunakan Function auto.arima
auto.arima(train.ts)
## Series: train.ts
## ARIMA(4,1,5)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## -0.1576 -0.1936 -0.0658 0.1211 -0.2282 0.0681 -0.1565 0.0731
## s.e. 0.0554 0.0497 0.0428 0.0443 0.0545 0.0409 0.0398 0.0443
## ma5
## -0.2729
## s.e. 0.0267
##
## sigma^2 = 2.92e+10: log likelihood = -69327.78
## AIC=138675.6 AICc=138675.6 BIC=138741
Ternyata jika menggunakan function auto.arima, data tidak perlu diolah menggunakan ARIMA Seasonal
Pengujian Parameter Model
printstatarima <- function (x, digits = 4,se=TRUE){
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
statt <- coef[1,]/ses
pval <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
coef <- t(coef)
}
print.default(coef, print.gap = 2)
}
}
printstatarima(m4)
##
## Coefficients:
## s.e. t sign.
## ar1 0.3832 0.0252 15.2063 0
## ar2 0.1306 0.0188 6.9468 0
## ma1 -0.8253 0.0201 -41.0597 0
## sma1 -0.9986 0.0059 -169.2542 0
Diagnostic Model
tsdisplay(residuals(m4), lag.max=45, main='ARIMA(2,1,1)(0,1,1)12 Model Residuals', col="blue")
Box.test(residuals(m4),
lag = 5,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(m4)
## X-squared = 485.38, df = 5, p-value < 2.2e-16
Box.test(residuals(m4),
lag = 10,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(m4)
## X-squared = 534.47, df = 10, p-value < 2.2e-16
Box.test(residuals(m4),
lag = 15,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(m4)
## X-squared = 562.74, df = 15, p-value < 2.2e-16
jarque.bera.test(residuals(m4))
##
## Jarque Bera Test
##
## data: residuals(m4)
## X-squared = 81052, df = 2, p-value < 2.2e-16
Forecasting
ramalan_arima2=forecast(m4, 6)
ramalan_arima2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5150 418436.4 186103.00 650769.7 63113.143 773759.6
## 5151 399780.5 133744.32 665816.6 -7086.701 806647.6
## 5152 400550.6 108462.73 692638.4 -46159.228 847260.3
## 5153 381485.5 71532.63 691438.4 -92546.530 855517.6
## 5154 374775.2 50276.52 699273.9 -121502.691 871053.1
## 5155 366639.9 29641.13 703638.7 -148755.264 882035.1
prediksi <- forecast(m4, h = length(test.ts))
accuracy(prediksi, test.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 186.4537 180883.9 94948.7 -1.749158 8.813655 0.9925877
## Test set 546723.4052 670606.2 546865.2 64.385125 64.429175 5.7168937
## ACF1
## Training set 0.001188474
## Test set NA
plot(ramalan_arima2, col="blue")
forecast_arima2 <- cbind(ramalan_arima2$mean,ramalan_arima2$lower,ramalan_arima2$upper)
forecast_arima2
## Time Series:
## Start = 5150
## End = 5155
## Frequency = 1
## ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## 5150 418436.4 186103.00 63113.143
## 5151 399780.5 133744.32 -7086.701
## 5152 400550.6 108462.73 -46159.228
## 5153 381485.5 71532.63 -92546.530
## 5154 374775.2 50276.52 -121502.691
## 5155 366639.9 29641.13 -148755.264
## ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## 5150 650769.7 773759.6
## 5151 665816.6 806647.6
## 5152 692638.4 847260.3
## 5153 691438.4 855517.6
## 5154 699273.9 871053.1
## 5155 703638.7 882035.1