ARIMA Musiman atau Seasonal Autoregressive Integrated Moving Average (SARIMA) merupakan pengembangan dari model Autoregressive Integrated Moving Average (ARIMA) pada data deret waktu yang memiliki pola musiman. Metode ini dipopulerkan oleh George Box dan Gwilym Jenskins sekitar tahun 1970-an.
Model ARIMA musiman menggabungkan faktor-faktor non-musiman (regular) dan musiman dalam model multiplikasi, dengan notasi sebagai berikut :
ARIMA(p,d,q)×(P,D,Q)s
dengan :
p = non-seasonal AR order,
d = non-seasonal differencing,
q = non-seasonal MA order,
P = seasonal AR order,
D = seasonal differencing,
Q = seasonal MA order, dan
s = time span of repeating seasonal pattern.
Contoh :
ARIMA(0,0,1)×(0,0,1)12 Model ini terdiri dari non-musiman MA(1), musiman MA(1), tanpa pembedaan d=0,D=0 dan s=12
Tahapan Identifikasi Model ARIMA Musiman sama halnya seperti yang dilakukan pada model ARIMA regular atau model ARIMA tanpa musiman, yaitu :
Plot time series
Identifikasi model
Pendugaan parameter model
Seleksi Model
Melakukan peramalan menggunakan model terbaik
Jangan lupa, untuk memasukan library-library yang dibutuhkan.
library("tseries")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library("forecast")
library("TTR")
library("TSA")
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library("graphics")
library("astsa")
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library("car")
## Loading required package: carData
library("portes")
library(readxl)
USpass <-read_excel("datalatihan.xlsx")
class(USpass)
## [1] "tbl_df" "tbl" "data.frame"
head(USpass)
## # A tibble: 6 × 1
## `Sales (in thousands)`
## <dbl>
## 1 10618.
## 2 10538.
## 3 10209.
## 4 10553
## 5 9935.
## 6 10534.
Di sini dilakukan perubahan data USpass dari data.frame menjadi data ts atau Time Series
USpass.ts <- ts(USpass, frequency = 1, start = c(1960,1), end = c(2079,1))
class(USpass.ts)
## [1] "ts"
#1. Plot Time Series
# Plot untuk data penjualan tunggal
plot.ts(USpass.ts, main = "Monthly Sales Data", xlab = "Year", ylab = "Sales (in thousand)")
Plot data asal memperlihatkan pola musiman dengan s = 12, serta terdapat perilaku nonstasioner baik dalam rataan maupun ragam.
seasonplot(USpass.ts, 12, main = "Monthly Sales Data", ylab = "Sales (in thousand)",
year.labels = TRUE, col = rainbow(length(unique(floor(time(USpass.ts)))))
)
monthplot(USpass.ts, ylab = "Sales (in thousand)", main = "Average Monthly Sales")
# Boxplot bulanan untuk data penjualan
boxplot(USpass.ts ~ cycle(USpass.ts), xlab = "Month", ylab = "Sales (in thousand)",
main = "Monthly Sales Data - Boxplot")
Boxplot menunjukkan ukuran keragaman (panjang box) dan pemusatan
(median) yang berbeda pada masing-masing bulan # Identifikasi Model ##
Transformasi Logaritma
lnAPM=log(USpass.ts)
monthplot(lnAPM,ylab="Air Passenger (miles)")
boxplot(lnAPM ~ cycle(lnAPM), xlab = "Month", ylab = "ln(APM)", main = "Monthly U.S. Air Passenger Data - Boxplot")
Boxplot menunjukkan ukuran keragaman (panjang box) hampir sama akan
tetapi ukuran pemusatan (median) yang berbeda pada masing-masing
bulan
USpass
## # A tibble: 120 × 1
## `Sales (in thousands)`
## <dbl>
## 1 10618.
## 2 10538.
## 3 10209.
## 4 10553
## 5 9935.
## 6 10534.
## 7 10196.
## 8 10512.
## 9 10090.
## 10 10371.
## # ℹ 110 more rows
USpass.ts
## Time Series:
## Start = 1960
## End = 2079
## Frequency = 1
## Sales (in thousands)
## [1,] 10618.1
## [2,] 10537.9
## [3,] 10209.3
## [4,] 10553.0
## [5,] 9934.9
## [6,] 10534.5
## [7,] 10196.5
## [8,] 10511.8
## [9,] 10089.6
## [10,] 10371.2
## [11,] 10239.4
## [12,] 10472.4
## [13,] 10827.2
## [14,] 10640.8
## [15,] 10517.8
## [16,] 10154.2
## [17,] 9969.2
## [18,] 10260.4
## [19,] 10737.0
## [20,] 10430.0
## [21,] 10689.0
## [22,] 10430.4
## [23,] 10002.4
## [24,] 10135.7
## [25,] 10096.2
## [26,] 10288.7
## [27,] 10289.1
## [28,] 10589.9
## [29,] 10551.9
## [30,] 10208.3
## [31,] 10334.5
## [32,] 10480.1
## [33,] 10387.6
## [34,] 10202.6
## [35,] 10219.3
## [36,] 10382.7
## [37,] 10820.5
## [38,] 10358.7
## [39,] 10494.6
## [40,] 10497.6
## [41,] 10431.5
## [42,] 10447.8
## [43,] 10684.4
## [44,] 10176.5
## [45,] 10616.0
## [46,] 10627.7
## [47,] 10684.0
## [48,] 10246.7
## [49,] 10265.0
## [50,] 10090.4
## [51,] 9881.1
## [52,] 10449.7
## [53,] 10276.3
## [54,] 10175.2
## [55,] 10212.5
## [56,] 10395.5
## [57,] 10545.9
## [58,] 10635.7
## [59,] 10265.2
## [60,] 10551.6
## [61,] 10538.2
## [62,] 10286.2
## [63,] 10171.3
## [64,] 10393.1
## [65,] 10162.3
## [66,] 10164.5
## [67,] 10327.0
## [68,] 10365.1
## [69,] 10755.9
## [70,] 10463.6
## [71,] 10080.5
## [72,] 10479.6
## [73,] 9980.9
## [74,] 10039.2
## [75,] 10246.1
## [76,] 10368.0
## [77,] 10446.3
## [78,] 10535.3
## [79,] 10786.9
## [80,] 9975.8
## [81,] 10160.9
## [82,] 10422.1
## [83,] 10757.2
## [84,] 10463.8
## [85,] 10307.0
## [86,] 10134.7
## [87,] 10207.7
## [88,] 10488.0
## [89,] 10262.3
## [90,] 10785.9
## [91,] 10375.4
## [92,] 10123.4
## [93,] 10462.7
## [94,] 10205.5
## [95,] 10522.7
## [96,] 10253.2
## [97,] 10428.7
## [98,] 10615.8
## [99,] 10417.3
## [100,] 10445.4
## [101,] 10690.6
## [102,] 10271.8
## [103,] 10524.8
## [104,] 9815.0
## [105,] 10398.5
## [106,] 10553.1
## [107,] 10655.8
## [108,] 10199.1
## [109,] 10416.6
## [110,] 10391.3
## [111,] 10210.1
## [112,] 10352.5
## [113,] 10423.8
## [114,] 10519.3
## [115,] 10596.7
## [116,] 10650.0
## [117,] 10741.6
## [118,] 10246.0
## [119,] 10354.4
## [120,] 10155.4
str(USpass)
## tibble [120 × 1] (S3: tbl_df/tbl/data.frame)
## $ Sales (in thousands): num [1:120] 10618 10538 10209 10553 9935 ...
str(USpass.ts)
## Time-Series [1:120, 1] from 1960 to 2079: 10618 10538 10209 10553 9935 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "Sales (in thousands)"
dim(USpass.ts)
## [1] 120 1
fligner.test(USpass.ts[,1] ~ `Sales (in thousands)`, data = USpass)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: USpass.ts[, 1] by Sales (in thousands)
## Fligner-Killeen:med chi-squared = NaN, df = 119, p-value = NA
Nilai-p kurang dari 5% sehinga tolak H0 dengan kata lain ragam tidak homogen
fligner.test(log(USpass.ts[,1])~ `Sales (in thousands)`, data=USpass)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: log(USpass.ts[, 1]) by Sales (in thousands)
## Fligner-Killeen:med chi-squared = NaN, df = 119, p-value = NA
Nilai-p lebih dari 5% sehingga tidak tolak H0 dengan kata lain ragam data setelah di ln kan homogen ## Pembagian data lnAPM menjadi training-testing Data setahun terakhir digunakan untuk data testing sementara data sebelumnya untuk membangun model
lpass <- subset(lnAPM,start=1,end=204)
test <- subset(lnAPM,start=205,end=216)
class(lpass)
## [1] "ts"
any(is.na(lpass)) # mengecek nilai NA
## [1] TRUE
any(is.nan(lpass)) # mengecek nilai NAN
## [1] FALSE
any(is.infinite(lpass)) # mengecek nilai Inf
## [1] FALSE
lpass
## Time Series:
## Start = 1960
## End = 2163
## Frequency = 1
## [1] 9.270315 9.262734 9.231054 9.264165 9.203809 9.262411 9.229800 9.260254
## [9] 9.219260 9.246788 9.233998 9.256499 9.289817 9.272451 9.260824 9.225643
## [17] 9.207256 9.236047 9.281451 9.252442 9.276970 9.252480 9.210580 9.223819
## [25] 9.219914 9.238801 9.238840 9.267656 9.264061 9.230956 9.243243 9.257233
## [33] 9.248368 9.230398 9.232033 9.247896 9.289198 9.245582 9.258616 9.258902
## [41] 9.252585 9.254147 9.276540 9.227836 9.270118 9.271219 9.276503 9.234711
## [49] 9.236495 9.219340 9.198379 9.254329 9.237596 9.227709 9.231368 9.249128
## [57] 9.263492 9.271972 9.236515 9.264033 9.262762 9.238558 9.227325 9.248897
## [65] 9.226440 9.226657 9.242517 9.246200 9.283210 9.255658 9.218358 9.257186
## [73] 9.208429 9.214253 9.234652 9.246479 9.254003 9.262487 9.286088 9.207917
## [81] 9.226302 9.251684 9.283331 9.255677 9.240579 9.223720 9.230898 9.257987
## [89] 9.236232 9.285995 9.247193 9.222605 9.255572 9.230682 9.261290 9.235345
## [97] 9.252317 9.270099 9.251223 9.253917 9.277120 9.237158 9.261490 9.191667
## [105] 9.249417 9.264175 9.273860 9.230055 9.251156 9.248724 9.231133 9.244983
## [113] 9.251847 9.260967 9.268298 9.273315 9.281879 9.234643 9.245167 9.225761
## [121] NA NA NA NA NA NA NA NA
## [129] NA NA NA NA NA NA NA NA
## [137] NA NA NA NA NA NA NA NA
## [145] NA NA NA NA NA NA NA NA
## [153] NA NA NA NA NA NA NA NA
## [161] NA NA NA NA NA NA NA NA
## [169] NA NA NA NA NA NA NA NA
## [177] NA NA NA NA NA NA NA NA
## [185] NA NA NA NA NA NA NA NA
## [193] NA NA NA NA NA NA NA NA
## [201] NA NA NA NA
lpass <- na.omit(lpass)
acf0<-acf(lpass,main="ACF",lag.max=48,xaxt="n")
axis(1, at=0:48/12, labels=0:48)
acf0$lag=acf0$lag*12
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%12==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")
Plot ACF menunjukkan data ln US Pass tidak stasioner baik di series non musiman maupun musiman
diff1.lpass=diff(lpass)
plot(diff1.lpass, main = "Time series plot of lpas d=1")
Differencing non musiman d = 1 berhasil mengatasi ketidakstasioneran
dalam rataan untuk komponen non musimannya.
acf1 <- acf(diff1.lpass,lag.max=48,xaxt="n", main="ACF d1")
axis(1, at=0:48/12, labels=0:48)
acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
barplot(height=acf1.2$V1,names.arg=acf1.2$V2,ylab="ACF",xlab="lag")
Plot ACF data nonseasonal differencing d = 1, mengkonfirmasi
kestasioneran komponen non musiman (namun perhatikan lag 12,24, dst),
pada series musiman belum stasioner
diff12.lpass <- diff(lpass,12)
plot(diff12.lpass, main = "Time series plot of ln(APM) D=12")
axis(1, at=0:48/12, labels=0:48)
acf2<- acf(diff12.lpass,lag.max=48,xaxt="n", main="ACF d1D1")
acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
barplot(height=acf2.2$V1,names.arg=acf2.2$V2,ylab="ACF", xlab="Lag")
Nonseasonal differencing D = 12 berhasil mengatasi ketidakstasioneran
dalam rataan untuk komponen seasonalnya (namun tidak untuk komponen non
musimannya).
diff12.1lpass <- diff(diff12.lpass,1)
plot(diff12.1lpass, main = "Time series plot of ln(APM) d=1, D=12")
## Identifikasi Model
acf2(diff12.1lpass,48)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.41 0.07 -0.18 -0.04 0.0 -0.04 0.03 0.11 0.00 0.00 0.22 -0.52
## PACF -0.41 -0.11 -0.23 -0.25 -0.2 -0.26 -0.26 -0.11 -0.08 -0.08 0.38 -0.27
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.19 -0.05 0.11 0.06 -0.12 0.14 -0.05 -0.08 0.00 0.02 -0.01 0.16
## PACF -0.17 0.00 -0.11 -0.04 -0.17 -0.09 -0.07 -0.10 -0.14 -0.09 0.14 0.01
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF -0.16 0.08 -0.04 -0.04 0.05 -0.08 0.04 0.10 0.0 -0.06 -0.03 -0.12
## PACF -0.13 -0.03 0.04 0.02 -0.14 -0.08 -0.08 0.04 0.1 -0.09 0.09 0.00
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF 0.16 -0.04 -0.03 0.07 0.02 -0.04 0.07 -0.19 0.07 0.00 0.04 0.03
## PACF -0.06 0.02 -0.10 0.00 0.08 -0.14 0.13 0.05 0.04 0.04 0.07 -0.10
Kedua komponen telah stasioner. Identifikasi komponen non musiman adalah ARIMA(0,1,2) . Identifikasi komponen musiman adalah ARIMA(0,1,1)12 , sehingga model tentatif adalah ARIMA(0,1,2)×(0,1,1)12.
eacf(diff12.1lpass)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o x x o o
## 1 x o o o o o o o o o o x o o
## 2 x o o o o o o o o o o x o o
## 3 x x x o o o o o o o o x x o
## 4 x x o o o o o o o o o x o o
## 5 x x o x o o o o o o o x o o
## 6 x o x x x o o o o o o x x o
## 7 x o o o o o o o o o o x o o
auto.arima(lpass)
## Series: lpass
## ARIMA(1,0,3) with non-zero mean
##
## Coefficients:
## ar1 ma1 ma2 ma3 mean
## 0.6376 -0.5807 -0.0335 -0.2286 9.2471
## s.e. 0.2718 0.2687 0.1082 0.0949 0.0009
##
## sigma^2 = 0.0004139: log likelihood = 299.46
## AIC=-586.93 AICc=-586.18 BIC=-570.2
ARIMA(0,1,2) × (0,1,1)12
model1 <- Arima(lpass,order=c(0,1,2),seasonal=c(0,1,1))
summary(model1)
## Series: lpass
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -0.8887 -0.1113
## s.e. 0.0879 0.0847
##
## sigma^2 = 0.0004431: log likelihood = 289.41
## AIC=-572.81 AICc=-572.6 BIC=-564.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0003036455 0.02078416 0.01694869 -0.003783184 0.1833173
## MASE ACF1
## Training set 0.7440105 0.005837166
ARIMA(2,1,1) × (1,1,1)12
model2<-Arima(lpass,order=c(2,1,1),seasonal=c(1,1,1))
summary(model2)
## Series: lpass
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## 0.1220 0.0102 -1.0000
## s.e. 0.0925 0.0923 0.0232
##
## sigma^2 = 0.0004466: log likelihood = 289.48
## AIC=-570.96 AICc=-570.61 BIC=-559.84
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0003367187 0.02077734 0.01693904 -0.004139669 0.183214
## MASE ACF1
## Training set 0.7435869 0.0004204594
ARIMA(0,1,2) × (0,1,1)12
model3<-Arima(lpass,order=c(1,1,2),seasonal=c(1,1,1))
summary(model3)
## Series: lpass
## ARIMA(1,1,2)
##
## Coefficients:
## ar1 ma1 ma2
## 0.8566 -1.9326 0.9327
## s.e. 0.0988 0.0943 0.0932
##
## sigma^2 = 0.0004408: log likelihood = 289.4
## AIC=-570.8 AICc=-570.45 BIC=-559.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0001840284 0.02064214 0.0167977 -0.002487716 0.1816748
## MASE ACF1
## Training set 0.7373826 0.16235
Di sini digunakan User Defined Function “printstatarima”
printstatarima <- function (x, digits = 4,se=TRUE){
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
statt <- coef[1,]/ses
pval <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
coef <- t(coef)
}
print.default(coef, print.gap = 2)
}
}
printstatarima(model1)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.8887 0.0879 -10.1104 0.0000
## ma2 -0.1113 0.0847 -1.3140 0.1914
printstatarima(model2)
##
## Coefficients:
## s.e. t sign.
## ar1 0.1220 0.0925 1.3189 0.1897
## ar2 0.0102 0.0923 0.1105 0.9122
## ma1 -1.0000 0.0232 -43.1034 0.0000
printstatarima(model3)
##
## Coefficients:
## s.e. t sign.
## ar1 0.8566 0.0988 8.6700 0
## ma1 -1.9326 0.0943 -20.4942 0
## ma2 0.9327 0.0932 10.0075 0
Model terbaik adalah model 1 karena semua dugaan parameternya berpengaruh nyata. Jika ada beberapa model yang semua dugaan parameternya nyata maka yang dipilih adalah yang nilai keakuratannya paling tinggi
#checkresiduals(model1,lag=c(10,30))
Residual telah menyebar normal dan tidak terdapat autokorelasi
tsdisplay(residuals(model1), lag.max=45, main='ARIMA(0,1,2)(0,1,1)12 Model Residuals')
library(portes)
ljbtest <- LjungBox(residuals(model1), lags = seq(5, 30, 5))
print(ljbtest)
## lags statistic df p-value
## 5 9.419668 5 0.09345111
## 10 11.178171 10 0.34380646
## 15 13.208586 15 0.58619056
## 20 15.341774 20 0.75652865
## 25 18.586141 25 0.81652056
## 30 20.719504 30 0.89645343
Tidak terdapat autokorelasi pada sisaan
library(tseries)
jarque.bera.test(residuals(model1))
##
## Jarque Bera Test
##
## data: residuals(model1)
## X-squared = 1.3309, df = 2, p-value = 0.514
Sisaan tidak menyebar normal
##Validasi Model
ramalan_arima = forecast(model1, 12)
accuracy(ramalan_arima,h = length(test))
## ME RMSE MAE MPE MAPE
## Training set -0.0003036455 0.02078416 0.01694869 -0.003783184 0.1833173
## MASE ACF1
## Training set 0.7440105 0.005837166
plot(ramalan_arima)
forecast_arima <- cbind(ramalan_arima$mean,ramalan_arima$lower,ramalan_arima$upper)
nrow(forecast_arima)
## [1] 12
# Mengambil Baris Yang Benar
forecast_arima <- cbind(USpass
[109:120, 1], exp(forecast_arima))
# Pastikan Jumlah Baris Sama
if (nrow(forecast_arima) == nrow(exp(forecast_arima))) {
forecast_arima <- as.data.frame(forecast_arima)
} else {
stop("Jumlah baris tidak se
suai antara USpass dan forecast
_arima.")
}
# Melihat ringkasan hasil
summary(forecast_arima)
## Sales (in thousands) ramalan_arima$mean ramalan_arima$lower.80%
## Min. :10155 Min. :10352 Min. :10075
## 1st Qu.:10326 1st Qu.:10377 1st Qu.:10098
## Median :10404 Median :10377 Median :10098
## Mean :10421 Mean :10375 Mean :10096
## 3rd Qu.:10539 3rd Qu.:10377 3rd Qu.:10098
## Max. :10742 Max. :10377 Max. :10098
## ramalan_arima$lower.95% ramalan_arima$upper.80% ramalan_arima$upper.95%
## Min. :9932 Min. :10636 Min. :10790
## 1st Qu.:9953 1st Qu.:10664 1st Qu.:10819
## Median :9953 Median :10664 Median :10819
## Mean :9951 Mean :10662 Mean :10817
## 3rd Qu.:9953 3rd Qu.:10664 3rd Qu.:10819
## Max. :9953 Max. :10664 Max. :10819
# Plot penjualan aktual
plot(forecast_arima[, 2], forecast_arima[, 1],
xlab = 'Month', ylab = 'Air Passenger (miles)',
type = 'l', col = 'black', lwd = 1, ylim = c(10000, 11000))
# Menambahkan garis ramalan
lines(forecast_arima[, 2], forecast_arima[, 3], col = 'red', lwd = 1) # lower.80%
lines(forecast_arima[, 2], forecast_arima[, 4], col = 'blue', lwd = 1) # lower.95%
lines(forecast_arima[, 2], forecast_arima[, 5], col = 'green', lwd = 1) # upper.80%
lines(forecast_arima[, 2], forecast_arima[, 6], col = 'purple', lwd = 1) # upper.95%
# Menambahkan legend
legend("topleft", c("Actual", "Lower 80%", "Lower 95%", "Upper 80%", "Upper 95%"),
col = c("black", "red", "blue", "green", "purple"), lty = 1, lwd = 1,
cex = 0.6, inset = 0.02, box.lty = 0)