Pendahuluan

Persiapan

Jangan lupa, untuk memasukan library-library yang dibutuhkan.

library("tseries")
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library("forecast")
library("TTR")
library("TSA")
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library("graphics")
library("astsa")
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
library("car")
## Loading required package: carData
library("portes")
library(readxl)

Input Data

USpass<-read_excel("D:/TUGAS/R/datalatihan.xlsx")
     class(USpass)
## [1] "tbl_df"     "tbl"        "data.frame"
head(USpass)
## # A tibble: 6 × 1
##   `Sales (in thousands)`
##                    <dbl>
## 1                 10618.
## 2                 10538.
## 3                 10209.
## 4                 10553 
## 5                  9935.
## 6                 10534.

Merubah Class

Di sini dilakukan perubahan data USpass dari data.frame menjadi data ts atau Time Series

USpass.ts <- ts(USpass, frequency = 12, start = c(1960,1), end = c(1977,12))
     class(USpass.ts)
## [1] "ts"

#1. Plot Time Series

Membuat Plot Time Series

# Plot untuk data penjualan tunggal
plot.ts(USpass.ts, main = "Monthly Sales Data", xlab = "Year", ylab = "Sales (in thousand)")

Plot data asal memperlihatkan pola musiman dengan s = 12, serta terdapat perilaku nonstasioner baik dalam rataan maupun ragam.

Plot per Musim

seasonplot(USpass.ts, 12, main = "Monthly Sales Data", ylab = "Sales (in thousand)", 
           year.labels = TRUE, col = rainbow(length(unique(floor(time(USpass.ts)))))
)

Deskriptif USpass

monthplot(USpass.ts, ylab = "Sales (in thousand)", main = "Average Monthly Sales")

# Boxplot bulanan untuk data penjualan
boxplot(USpass.ts ~ cycle(USpass.ts), xlab = "Month", ylab = "Sales (in thousand)", 
        main = "Monthly Sales Data - Boxplot")

Boxplot menunjukkan ukuran keragaman (panjang box) dan pemusatan (median) yang berbeda pada masing-masing bulan # Identifikasi Model ## Transformasi Logaritma

lnAPM=log(USpass.ts)

Plot Hasil Transformasi Logaritma

monthplot(lnAPM,ylab="Air Passenger (miles)")

boxplot(lnAPM ~ cycle(lnAPM), xlab = "Month", ylab = "ln(APM)", main = "Monthly U.S. Air Passenger Data - Boxplot")

Boxplot menunjukkan ukuran keragaman (panjang box) hampir sama akan tetapi ukuran pemusatan (median) yang berbeda pada masing-masing bulan

Tes Kehomogenan Raga

fligner.test(USpass.ts~cycle(USpass.ts))
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  USpass.ts by cycle(USpass.ts)
## Fligner-Killeen:med chi-squared = 21.074, df = 11, p-value = 0.03261

Nilai-p kurang dari 5% sehinga tolak H0 dengan kata lain ragam tidak homogen

fligner.test(USpass.ts~cycle(USpass.ts))
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  USpass.ts by cycle(USpass.ts)
## Fligner-Killeen:med chi-squared = 21.074, df = 11, p-value = 0.03261

Nilai-p lebih dari 5% sehingga tidak tolak H0 dengan kata lain ragam data setelah di ln kan homogen ## Pembagian data lnAPM menjadi training-testing Data setahun terakhir digunakan untuk data testing sementara data sebelumnya untuk membangun model

lpass<- subset(lnAPM,start=1,end=204)
     test <- subset(lnAPM,start=205,end=216)

Identifikasi kestasioneran data ln US Pass

acf0<-acf(lpass,main="ACF",lag.max=48,xaxt="n")
     axis(1, at=0:48/12, labels=0:48)

acf0$lag=acf0$lag*12
     acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
     acf0.2 <- acf0.1[which(acf0.1$V2%%12==0),]
     barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")

Plot ACF menunjukkan data ln US Pass tidak stasioner baik di series non musiman maupun musiman

Melakukan proses pembedaan pertama pada series non musiman data lnAPM

diff1.lpass=diff(lpass)
     plot(diff1.lpass, main = "Time series plot of lpas d=1")

Differencing non musiman d = 1 berhasil mengatasi ketidakstasioneran dalam rataan untuk komponen non musimannya.

Identifikasi kestasioneran data ln US Pass

acf1 <- acf(diff1.lpass,lag.max=48,xaxt="n", main="ACF d1")
     axis(1, at=0:48/12, labels=0:48)

acf1$lag <- acf1$lag * 12
     acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
     acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
     barplot(height=acf1.2$V1,names.arg=acf1.2$V2,ylab="ACF",xlab="lag")

Plot ACF data nonseasonal differencing d = 1, mengkonfirmasi kestasioneran komponen non musiman (namun perhatikan lag 12,24, dst), pada series musiman belum stasioner

Melakukan proses pembedaan pada series musiman

diff12.lpass <- diff(lpass,12)
     plot(diff12.lpass, main = "Time series plot of ln(APM) D=12")
     axis(1, at=0:48/12, labels=0:48)

acf2<- acf(diff12.lpass,lag.max=48,xaxt="n", main="ACF d1D1")

acf2$lag <- acf2$lag * 12
     acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
     acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
     barplot(height=acf2.2$V1,names.arg=acf2.2$V2,ylab="ACF", xlab="Lag")

Nonseasonal differencing D = 12 berhasil mengatasi ketidakstasioneran dalam rataan untuk komponen seasonalnya (namun tidak untuk komponen non musimannya).

Melakukan pembedaan pertama pada data yang telah dilakukan pembedaan pada series musimannya

diff12.1lpass <- diff(diff12.lpass,1)
     plot(diff12.1lpass, main = "Time series plot of ln(APM) d=1, D=12")

## Identifikasi Model

acf2(diff12.1lpass,48)

##       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  -0.41  0.09 -0.23  0.02 -0.04 -0.04  0.08  0.06  0.05 -0.05  0.23 -0.48
## PACF -0.41 -0.09 -0.27 -0.23 -0.19 -0.29 -0.21 -0.12 -0.06 -0.06  0.37 -0.22
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF   0.13 -0.04  0.10  0.05 -0.07  0.15 -0.07 -0.08  0.00 -0.02  0.02  0.06
## PACF -0.20 -0.01 -0.19 -0.12 -0.19 -0.12 -0.06 -0.13 -0.05 -0.11  0.15 -0.06
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.00  0.03  0.02 -0.10  0.05 -0.14  0.04  0.10  0.05 -0.06 -0.01 -0.04
## PACF -0.13  0.00  0.02 -0.03 -0.06 -0.10 -0.13 -0.02  0.14 -0.13  0.12  0.04
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF   0.04 -0.04 -0.07  0.09  0.06 -0.03  0.10 -0.20  0.02  0.04  0.00  0.00
## PACF -0.07  0.09 -0.11 -0.11  0.12 -0.15  0.07 -0.03 -0.03  0.07  0.05 -0.11

Kedua komponen telah stasioner. Identifikasi komponen non musiman adalah ARIMA(0,1,2) . Identifikasi komponen musiman adalah ARIMA(0,1,1)12 , sehingga model tentatif adalah ARIMA(0,1,2)×(0,1,1)12 .

eacf(diff12.1lpass)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o x  x  o  o 
## 1 x x x x o o o o o o o  x  o  o 
## 2 x x x o o o o o o o o  x  o  o 
## 3 x o x o o o o o o o o  x  x  x 
## 4 x o x o o o o o o o o  x  o  o 
## 5 x x x x o o o o o o o  x  o  x 
## 6 x x o o x o o o o o o  x  x  o 
## 7 x o o o x o o o o o o  x  o  o
auto.arima(lpass)
## Series: lpass 
## ARIMA(2,0,2)(1,0,0)[12] with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     sar1    mean
##       1.2168  -0.7331  -1.0939  0.5399  -0.0194  9.2470
## s.e.  0.1446   0.1528   0.1762  0.1861   0.0792  0.0012
## 
## sigma^2 = 0.0004248:  log likelihood = 505.37
## AIC=-996.75   AICc=-996.18   BIC=-973.52

Pendugaan Parameter Model

ARIMA(0,1,2) × (0,1,1)12

model1 <- Arima(lpass,order=c(0,1,2),seasonal=c(0,1,1))
     summary(model1)
## Series: lpass 
## ARIMA(0,1,2)(0,1,1)[12] 
## 
## Coefficients:
##           ma1      ma2     sma1
##       -0.8389  -0.1611  -1.0000
## s.e.   0.0667   0.0627   0.0616
## 
## sigma^2 = 0.0004305:  log likelihood = 450.88
## AIC=-893.76   AICc=-893.54   BIC=-880.75
## 
## Training set error measures:
##                         ME       RMSE        MAE          MPE     MAPE
## Training set -0.0003240709 0.01991827 0.01580201 -0.003916568 0.170899
##                   MASE       ACF1
## Training set 0.6346118 0.01782754

ARIMA(2,1,1) × (1,1,1)12

model2<-Arima(lpass,order=c(2,1,1),seasonal=c(1,1,1))
     summary(model2)
## Series: lpass 
## ARIMA(2,1,1)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     sar1     sma1
##       0.1881  -0.0220  -1.0000  -0.1349  -1.0000
## s.e.  0.0725   0.0736   0.0236   0.0784   0.0669
## 
## sigma^2 = 0.0004201:  log likelihood = 452.65
## AIC=-893.3   AICc=-892.84   BIC=-873.79
## 
## Training set error measures:
##                         ME       RMSE        MAE          MPE      MAPE
## Training set -0.0001950609 0.01957233 0.01559679 -0.002509951 0.1686852
##                   MASE         ACF1
## Training set 0.6263701 -0.006421489

ARIMA(0,1,2) × (0,1,1)12

model3<-Arima(lpass,order=c(1,1,2),seasonal=c(1,1,1))
     summary(model3)
## Series: lpass 
## ARIMA(1,1,2)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1      ma2     sar1     sma1
##       0.1588  -0.9732  -0.0268  -0.1329  -1.0000
## s.e.  0.1970   0.1896   0.1882   0.0780   0.0668
## 
## sigma^2 = 0.0004205:  log likelihood = 452.62
## AIC=-893.23   AICc=-892.77   BIC=-873.72
## 
## Training set error measures:
##                         ME       RMSE        MAE          MPE      MAPE
## Training set -0.0001789155 0.01957988 0.01562929 -0.002335584 0.1690369
##                   MASE        ACF1
## Training set 0.6276754 0.001679716

4. Seleksi Model

Pengujian Parameter Model

Di sini digunakan User Defined Function “printstatarima”

printstatarima <- function (x, digits = 4,se=TRUE){
       if (length(x$coef) > 0) {
         cat("\nCoefficients:\n")
         coef <- round(x$coef, digits = digits)
         if (se && nrow(x$var.coef)) {
           ses <- rep(0, length(coef))
           ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
           coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
           coef <- rbind(coef, s.e. = ses)
           statt <- coef[1,]/ses
           pval  <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
           coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
           coef <- t(coef)
         }
         print.default(coef, print.gap = 2)
       }
}
printstatarima(model1)
## 
## Coefficients:
##                  s.e.         t   sign.
## ma1   -0.8389  0.0667  -12.5772  0.0000
## ma2   -0.1611  0.0627   -2.5694  0.0109
## sma1  -1.0000  0.0616  -16.2338  0.0000
printstatarima(model2)
## 
## Coefficients:
##                  s.e.         t   sign.
## ar1    0.1881  0.0725    2.5945  0.0102
## ar2   -0.0220  0.0736   -0.2989  0.7653
## ma1   -1.0000  0.0236  -42.3729  0.0000
## sar1  -0.1349  0.0784   -1.7207  0.0868
## sma1  -1.0000  0.0669  -14.9477  0.0000
printstatarima(model3)
## 
## Coefficients:
##                  s.e.         t   sign.
## ar1    0.1588  0.1970    0.8061  0.4211
## ma1   -0.9732  0.1896   -5.1329  0.0000
## ma2   -0.0268  0.1882   -0.1424  0.8869
## sar1  -0.1329  0.0780   -1.7038  0.0899
## sma1  -1.0000  0.0668  -14.9701  0.0000

Model terbaik adalah model 1 karena semua dugaan parameternya berpengaruh nyata. Jika ada beberapa model yang semua dugaan parameternya nyata maka yang dipilih adalah yang nilai keakuratannya paling tinggi

Diagnostik Model

#checkresiduals(model1,lag=c(10,30))

Residual telah menyebar normal dan tidak terdapat autokorelasi

Alternatif Tampilan

tsdisplay(residuals(model1), lag.max=45, main='ARIMA(0,1,2)(0,1,1)12 Model Residuals')

library(portes)
     ljbtest <- LjungBox(residuals(model1),lags=seq(5,30,5))
     ljbtest
##  lags statistic df     p-value
##     5  20.48645  5 0.001012458
##    10  24.55089 10 0.006265017
##    15  27.77288 15 0.023034753
##    20  33.17072 20 0.032313293
##    25  37.49339 25 0.051797969
##    30  44.67411 30 0.041381099

Tidak terdapat autokorelasi pada sisaan

library(tseries)
     jarque.bera.test(residuals(model1))
## 
##  Jarque Bera Test
## 
## data:  residuals(model1)
## X-squared = 1.2282, df = 2, p-value = 0.5411

Sisaan tidak menyebar normal

5. Melakukan Peramalan menggunakan Model Terbaik

##Validasi Model

ramalan_arima = forecast(model1, 12)
     accuracy(ramalan_arima,test)
##                         ME       RMSE        MAE          MPE      MAPE
## Training set -0.0003240709 0.01991827 0.01580201 -0.003916568 0.1708990
## Test set     -0.0026494849 0.01682258 0.01321211 -0.028979995 0.1428423
##                   MASE        ACF1 Theil's U
## Training set 0.6346118  0.01782754        NA
## Test set     0.5306009 -0.16044177 0.5859166
plot(ramalan_arima)

forecast_arima <- cbind(ramalan_arima$mean,ramalan_arima$lower,ramalan_arima$upper)

Transformasi balik

forecast_arima <- cbind(USpass[205:216, 1], exp(forecast_arima))
forecast_arima <- as.data.frame(forecast_arima)
str(forecast_arima)
## 'data.frame':    12 obs. of  6 variables:
##  $ Sales (in thousands)   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ ramalan_arima$mean     : num  10437 10324 10255 10423 10282 ...
##  $ ramalan_arima$lower.80%: num  10153 10039 9972 10136 9998 ...
##  $ ramalan_arima$lower.95%: num  10006 9891 9826 9987 9851 ...
##  $ ramalan_arima$upper.80%: num  10729 10617 10546 10719 10574 ...
##  $ ramalan_arima$upper.95%: num  10886 10776 10704 10879 10732 ...
ncol(forecast_arima)
## [1] 6

Plot ramalan perbulan

# Tentukan range untuk x dan y
x_range <- 1:nrow(forecast_arima)
y_min <- min(forecast_arima[,4], na.rm=TRUE)  # kolom lower.95%
y_max <- max(forecast_arima[,2], na.rm=TRUE)  # kolom mean

# Buat plot
plot(x_range,
     forecast_arima[,2],  # kolom mean
     type = "l",
     col = "black",
     xlim = c(min(x_range), max(x_range)),
     ylim = c(y_min, y_max),
     xlab = "Period",
     ylab = "Sales (in thousands)",
     main = "Forecast Plot with Confidence Intervals")

# Tambahkan garis confidence intervals
lines(x_range, forecast_arima[,3], col='blue', lwd=1)  # kolom lower.80%
lines(x_range, forecast_arima[,4], col='red', lwd=1)   # kolom lower.95%

# Tambahkan legend
legend("topleft",
       legend=c("Mean Forecast", "Lower 80%", "Lower 95%"),
       col=c("black", "blue", "red"),
       lty=1,
       lwd=1,
       cex=0.8,
       box.lty=0)

head(forecast_arima)  
##   Sales (in thousands) ramalan_arima$mean ramalan_arima$lower.80%
## 1                   NA           10436.83               10152.945
## 2                   NA           10323.94               10038.926
## 3                   NA           10255.34                9972.216
## 4                   NA           10423.43               10135.673
## 5                   NA           10281.95                9998.096
## 6                   NA           10359.81               10073.806
##   ramalan_arima$lower.95% ramalan_arima$upper.80% ramalan_arima$upper.95%
## 1               10005.803                10728.66                10886.43
## 2                9891.248                10617.04                10775.56
## 3                9825.519                10546.49                10703.95
## 4                9986.572                10719.36                10879.41
## 5                9851.019                10573.86                10731.73
## 6                9925.615                10653.93                10813.00