Tugas Responsi 7 ADW - Pemodelan ARIMA

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tseries)
library(TTR)
library(graphics)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar

Import data

Dataset ini berisi data bulanan okupansi kereta api selama periode tahunan. rentang data selama tahun 1999-2011. Kolom okupansi terdiri dari persentase okupansi kereta api pada bulan tertentu dengan total 150 Observasi atau 150 Bulan.

okupansi<-read.csv("C:/Users/DELL/Documents/Dataset R/Occupancy_train.csv", header= TRUE)
print(head(okupansi))
##   X    Time Total.Occupancy.rate..percent.
## 1 0 1999M01                           43.7
## 2 1 1999M02                           41.0
## 3 2 1999M03                           36.5
## 4 3 1999M04                           32.8
## 5 4 1999M05                           24.5
## 6 5 1999M06                           24.1
print(tail(okupansi,10))
##       X    Time Total.Occupancy.rate..percent.
## 141 140 2010M09                           31.6
## 142 141 2010M10                           33.4
## 143 142 2010M11                           38.9
## 144 143 2010M12                           39.0
## 145 144 2011M01                           48.9
## 146 145 2011M02                           47.8
## 147 146 2011M03                           42.6
## 148 147 2011M04                           37.7
## 149 148 2011M05                           29.6
## 150 149 2011M06                           27.6

Menampilkan Data Univariat yaitu Kolom Ketiga

occupancy <- okupansi [,3]
occupancy
##   [1] 43.7 41.0 36.5 32.8 24.5 24.1 26.0 26.2 30.7 29.9 35.2 33.8 44.1 41.8 36.9
##  [16] 34.3 25.1 23.5 26.9 26.9 28.3 31.5 36.5 37.5 47.7 44.8 40.8 34.6 27.1 25.3
##  [31] 28.9 29.2 30.5 32.8 36.2 37.8 48.1 46.6 45.1 36.5 28.7 26.7 30.6 29.8 31.7
##  [46] 35.0 39.9 40.3 48.8 48.8 44.0 39.6 29.4 26.6 31.3 29.9 32.8 35.2 40.0 42.1
##  [61] 50.4 48.9 45.3 39.5 30.1 29.0 32.5 30.8 32.9 34.2 41.0 40.8 50.9 49.7 46.4
##  [76] 38.1 29.0 29.6 31.6 29.7 32.1 33.3 39.1 38.8 48.8 48.7 44.1 37.7 29.2 27.6
##  [91] 30.7 30.7 32.9 34.8 41.0 40.4 51.1 51.6 46.9 38.5 30.8 28.8 31.7 31.4 33.4
## [106] 33.7 40.5 40.1 50.8 50.8 47.6 38.3 30.8 27.5 31.3 30.5 32.1 34.4 38.2 38.4
## [121] 48.3 48.2 43.6 37.8 30.0 25.4 30.4 29.1 31.6 33.4 37.8 39.6 49.5 48.5 43.7
## [136] 37.3 28.1 26.9 30.5 29.1 31.6 33.4 38.9 39.0 48.9 47.8 42.6 37.7 29.6 27.6

Membentuk Data time-series

#occupancy.ts<-ts(occupancy, start= c(1999,1), end=c(2011,6), frequency= 12) 
#occupancy.ts

occupancy.ts<-ts(occupancy, start= 1, end=150) 
occupancy.ts
## Time Series:
## Start = 1 
## End = 150 
## Frequency = 1 
##   [1] 43.7 41.0 36.5 32.8 24.5 24.1 26.0 26.2 30.7 29.9 35.2 33.8 44.1 41.8 36.9
##  [16] 34.3 25.1 23.5 26.9 26.9 28.3 31.5 36.5 37.5 47.7 44.8 40.8 34.6 27.1 25.3
##  [31] 28.9 29.2 30.5 32.8 36.2 37.8 48.1 46.6 45.1 36.5 28.7 26.7 30.6 29.8 31.7
##  [46] 35.0 39.9 40.3 48.8 48.8 44.0 39.6 29.4 26.6 31.3 29.9 32.8 35.2 40.0 42.1
##  [61] 50.4 48.9 45.3 39.5 30.1 29.0 32.5 30.8 32.9 34.2 41.0 40.8 50.9 49.7 46.4
##  [76] 38.1 29.0 29.6 31.6 29.7 32.1 33.3 39.1 38.8 48.8 48.7 44.1 37.7 29.2 27.6
##  [91] 30.7 30.7 32.9 34.8 41.0 40.4 51.1 51.6 46.9 38.5 30.8 28.8 31.7 31.4 33.4
## [106] 33.7 40.5 40.1 50.8 50.8 47.6 38.3 30.8 27.5 31.3 30.5 32.1 34.4 38.2 38.4
## [121] 48.3 48.2 43.6 37.8 30.0 25.4 30.4 29.1 31.6 33.4 37.8 39.6 49.5 48.5 43.7
## [136] 37.3 28.1 26.9 30.5 29.1 31.6 33.4 38.9 39.0 48.9 47.8 42.6 37.7 29.6 27.6

Create training and validation Data

okup.test <- tail(occupancy.ts, 10)
okup.train<- head(occupancy.ts, length(occupancy.ts) - length(okup.test))
n <- length(okup.train)

Plot Time Series

par(col="coral")
ts.plot(okup.train, xlab="Period", ylab="Occupancy Rate", lty=1)
title("Occupancy Of Train")
points(okup.train)

Cek Kestasioneran

#Cek Kestasioneran
acf(okup.train, lag.max=20)

adf.test(okup.train,alternative=c("stationary"),
         k=trunc((length(okup.train)-1)^(1/3))) #Augmented Dickey-Fuller
## Warning in adf.test(okup.train, alternative = c("stationary"), k =
## trunc((length(okup.train) - : p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  okup.train
## Dickey-Fuller = -7.772, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

Terlihat bahwa nilai p value < dari 0.05 maka H0 ditolak dan H1 diterima. Hal ini berarti bahwa plot data sudah stasioner. Dan juga jika dilihat dari plot yang dihasilkan, nampak jelas bahwa data merupakan data seasonal yang stasioner dalam nilai tengah dan ragam. Sehingga tidak perlu dilakukan differencing dan akan dilakukan pengidentifikasian Model Terbaik.

Pengidentifikasian Model

#Pengidentifikasian Model
acf(okup.train, lag.max=20)

pacf(okup.train, lag.max=20)

eacf(okup.train)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x o x x x x x o x x  x  x  x 
## 1 x x o x x x x x o x x  x  x  x 
## 2 x x o o o x o o o o x  x  x  x 
## 3 x o o o o x o o o o o  x  x  x 
## 4 x o o o o x o o o o o  x  x  o 
## 5 x o o x o x o o o o o  x  x  o 
## 6 x o o x o x o o o o o  x  x  o 
## 7 x x x x x o x o o x o  x  o  x

Kandidat Model Hasil Identifikasi

#Pendugaan Parameter dan Penentuan Model Terbaik
#Berdasarkan Kandidat model Hasil Identifikasi

arima(okup.train, order=c(0,0,2), method="ML") #ARIMA (0,0,2) #Berdasarkan ACF
## 
## Call:
## arima(x = okup.train, order = c(0, 0, 2), method = "ML")
## 
## Coefficients:
##          ma1     ma2  intercept
##       0.9725  0.5317    36.2004
## s.e.  0.0853  0.0675     0.9379
## 
## sigma^2 estimated as 19.87:  log likelihood = -408.47,  aic = 822.95
arima(okup.train, order=c(3,0,0), method="ML") #ARIMA (3,0,0) #Berdasarkan PACF
## 
## Call:
## arima(x = okup.train, order = c(3, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.9476  0.0227  -0.4331    36.2785
## s.e.  0.0758  0.1109   0.0762     0.6730
## 
## sigma^2 estimated as 13.45:  log likelihood = -381.62,  aic = 771.24
arima(okup.train, order=c(2,0,2), method="ML") #ARIMA (2,0,2) #Berdasarkan EACF
## 
## Call:
## arima(x = okup.train, order = c(2, 0, 2), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ma1     ma2  intercept
##       1.1943  -0.6302  -0.2328  0.5614    36.2197
## s.e.  0.0873   0.0836   0.0929  0.1218     0.9544
## 
## sigma^2 estimated as 13.83:  log likelihood = -383.66,  aic = 777.31
arima(okup.train, order=c(3,0,1), method="ML") #ARIMA (3,0,1) #Berdasarkan EACF
## 
## Call:
## arima(x = okup.train, order = c(3, 0, 1), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3      ma1  intercept
##       1.3603  -0.4634  -0.2241  -0.5217    36.3133
## s.e.  0.1296   0.1884   0.1060   0.1131     0.4492
## 
## sigma^2 estimated as 12.86:  log likelihood = -378.57,  aic = 767.15
#Kompare AIC

model.arima1 <- c("ARIMA(0,0,2)","ARIMA(3,0,0)","ARIMA(2,0,2)","ARIMA(3,0,1)")
AIC.kandididat1 = c (822.95,  771.24, 777.31, 767.15)
comp.arima1 <- cbind(model.arima1, AIC.kandididat1)
colnames(comp.arima1) <- c("Kandidat Model", "Nilai AIC")
comp.aic1 <- as.data.frame(comp.arima1)
comp.aic1
##   Kandidat Model Nilai AIC
## 1   ARIMA(0,0,2)    822.95
## 2   ARIMA(3,0,0)    771.24
## 3   ARIMA(2,0,2)    777.31
## 4   ARIMA(3,0,1)    767.15

Untuk komparasi model di atas, diperoleh model terbaik untuk ARIMA(3,0,1) karena menghasilkan nilai ‘AIC’ terendah yaitu sebesar 767.15.

#Plot Data Aktual dan Nilai Dugaan Berdasarkan Model Terbaik

model <- arima(okup.train, order=c(3,0,1), method="ML") #ARIMA (3,0,1)
dugaan <- fitted(model)
cbind(okup.train, dugaan)
## Time Series:
## Start = 1 
## End = 140 
## Frequency = 1 
##     okup.train   dugaan
##   1       43.7 40.12078
##   2       41.0 41.83935
##   3       36.5 37.99794
##   4       32.8 33.41193
##   5       24.5 30.66004
##   6       24.1 25.01702
##   7       26.0 26.43772
##   8       26.2 30.81773
##   9       30.7 32.48028
##  10       29.9 36.60326
##  11       35.2 35.95357
##  12       33.8 39.42155
##  13       44.1 37.78026
##  14       41.8 45.02253
##  15       36.9 42.41324
##  16       34.3 35.70076
##  17       25.1 32.80417
##  18       23.5 25.88064
##  19       26.9 25.77227
##  20       26.9 31.36969
##  21       28.3 33.07310
##  22       31.5 34.37406
##  23       36.5 37.08755
##  24       37.5 40.89969
##  25       47.7 40.69333
##  26       44.8 47.55550
##  27       40.8 43.75334
##  28       34.6 37.47365
##  29       27.1 31.50136
##  30       25.3 25.86521
##  31       28.9 26.27969
##  32       29.2 32.02937
##  33       30.5 34.01595
##  34       32.8 35.19696
##  35       36.2 37.07227
##  36       37.8 39.54481
##  37       48.1 40.08574
##  38       46.6 47.50212
##  39       45.1 44.98237
##  40       36.5 40.79708
##  41       28.7 32.43279
##  42       26.7 25.84902
##  43       30.6 26.27812
##  44       29.8 32.44691
##  45       31.7 33.63551
##  46       35.0 35.34583
##  47       39.9 38.30432
##  48       40.3 42.00206
##  49       48.8 41.25679
##  50       48.8 46.71264
##  51       44.0 45.53082
##  52       39.6 38.98450
##  53       29.4 34.10347
##  54       26.6 26.11767
##  55       31.3 25.31547
##  56       29.9 32.42116
##  57       32.8 33.40397
##  58       35.2 35.94428
##  59       40.0 38.25215
##  60       42.1 41.71956
##  61       50.4 42.52769
##  62       48.9 47.86096
##  63       45.3 45.06908
##  64       39.5 39.42891
##  65       30.1 33.62668
##  66       29.0 26.21097
##  67       32.5 27.07479
##  68       30.8 33.07640
##  69       32.9 33.40676
##  70       34.2 35.34368
##  71       41.0 36.85221
##  72       40.8 42.26870
##  73       50.9 41.48473
##  74       49.7 48.11443
##  75       46.4 45.93178
##  76       38.1 40.31872
##  77       29.0 32.22795
##  78       29.6 24.96102
##  79       31.6 27.74919
##  80       29.7 32.64198
##  81       32.1 32.54027
##  82       33.3 34.93210
##  83       39.1 36.49992
##  84       38.8 41.08789
##  85       48.8 40.27357
##  86       48.7 47.07397
##  87       44.1 45.97157
##  88       37.7 39.34466
##  89       29.2 32.67415
##  90       27.6 26.06224
##  91       30.7 26.64352
##  92       30.7 32.19234
##  93       32.9 34.00942
##  94       34.8 36.10776
##  95       41.0 37.77642
##  96       40.4 42.47288
##  97       51.1 41.12139
##  98       51.6 48.27792
##  99       46.9 47.60733
## 100       38.5 40.68698
## 101       30.8 32.09811
## 102       28.8 26.10535
## 103       31.7 26.75165
## 104       31.4 32.17278
## 105       33.4 33.85396
## 106       33.7 35.89751
## 107       40.5 36.35575
## 108       40.1 41.71005
## 109       50.8 40.94997
## 110       50.8 48.18796
## 111       47.6 47.09576
## 112       38.3 41.44501
## 113       30.8 32.18077
## 114       27.5 26.08431
## 115       31.3 25.69533
## 116       30.5 31.88861
## 117       32.1 33.42761
## 118       34.4 35.09158
## 119       38.2 37.32632
## 120       38.4 40.25461
## 121       48.3 39.67395
## 122       48.2 46.72884
## 123       43.6 45.69353
## 124       37.8 39.12402
## 125       30.0 32.98661
## 126       25.4 26.96182
## 127       30.4 24.87489
## 128       29.1 31.85818
## 129       31.6 33.12529
## 130       33.4 35.36495
## 131       37.8 37.17574
## 132       39.6 40.41603
## 133       49.5 41.17389
## 134       48.5 48.05129
## 135       43.7 45.81015
## 136       37.3 38.86084
## 137       28.1 32.31645
## 138       26.9 25.22815
## 139       30.5 26.22068
## 140       29.1 32.37484
plot.ts(okup.train, xlab="Waktu", ylab="Data Okupansi")

points(okup.train)
par(col="red")
lines(dugaan)

par(col="black")

Fungsi auto.arima

auto.arima(okup.train, trace=TRUE)
## 
##  ARIMA(2,0,2) with non-zero mean : 779.9442
##  ARIMA(0,0,0) with non-zero mean : 965.2238
##  ARIMA(1,0,0) with non-zero mean : 835.3565
##  ARIMA(0,0,1) with non-zero mean : 873.7494
##  ARIMA(0,0,0) with zero mean     : 1410.202
##  ARIMA(1,0,2) with non-zero mean : 800.0209
##  ARIMA(2,0,1) with non-zero mean : 771.9043
##  ARIMA(1,0,1) with non-zero mean : 820.1745
##  ARIMA(2,0,0) with non-zero mean : 800.3439
##  ARIMA(3,0,1) with non-zero mean : 769.7803
##  ARIMA(3,0,0) with non-zero mean : 773.6833
##  ARIMA(4,0,1) with non-zero mean : 771.7272
##  ARIMA(3,0,2) with non-zero mean : Inf
##  ARIMA(4,0,0) with non-zero mean : 773.1771
##  ARIMA(4,0,2) with non-zero mean : Inf
##  ARIMA(3,0,1) with zero mean     : Inf
## 
##  Best model: ARIMA(3,0,1) with non-zero mean
## Series: okup.train 
## ARIMA(3,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1     mean
##       1.3603  -0.4634  -0.2241  -0.5217  36.3132
## s.e.  0.1296   0.1884   0.1060   0.1131   0.4492
## 
## sigma^2 = 13.33:  log likelihood = -378.57
## AIC=769.15   AICc=769.78   BIC=786.8

Model terbaik berdasarkan auto.arima ialah ARIMA (3,0,1) dengan AIC terkecil yaitu 769.78. Sedangkan model terbaik berdasarkan ARIMA step-by-step,ARIMA(3,0,1) AIC = 767.15. Oleh karna itu, Model terbaik diperoleh menggunakan ARIMA step by step. Selanjutnya akan dilakukan analisis diagnostik untuk menentukan kelayakan model.

Analisis Diagnostik

# Model ARIMA (3,0,1) menggunakan Data Awal Y
model.diag301 = stats::arima(okup.train, order = c(3,0,1), method="ML")
model.diag301
## 
## Call:
## stats::arima(x = okup.train, order = c(3, 0, 1), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3      ma1  intercept
##       1.3603  -0.4634  -0.2241  -0.5217    36.3133
## s.e.  0.1296   0.1884   0.1060   0.1131     0.4492
## 
## sigma^2 estimated as 12.86:  log likelihood = -378.57,  aic = 769.15
sisaan = checkresiduals(model.diag301)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,1) with non-zero mean
## Q* = 43.866, df = 5, p-value = 2.466e-08
## 
## Model df: 5.   Total lags used: 10
sisaan
## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,1) with non-zero mean
## Q* = 43.866, df = 5, p-value = 2.466e-08
sisaan2 <- model.diag301$residuals
# Eksplorasi
par(mfrow=c(2,2))
qqnorm(sisaan2)
qqline(sisaan2, col = "blue", lwd = 2)
plot(c(1:length(sisaan2)),sisaan2)
acf(sisaan2)
pacf(sisaan2)

Terlihat dari plot di atas bahwa sisaan tidak mengikuti sebaran normal. Selanjutnya ditinjau dari plot ACF dan PACFterlihat ada lag-lag yang signifikan. Hal ini berarti bahwa ada gejala autokorelasi pada sisaan.

Overfitting Model

Pada langkah ini akan dilakukan overfitting dimana kita mencoba untuk menggunakan model ARIMA dengan paramater yang lebih. Saya akan mencoba dua model yaitu ARIMA(3,0,2) dan ARIMA(4,0,1)

# Overfitting 
(model.diag<- arima(ts(occupancy), order = c(3,0,2), method = "ML")) #Overfitting MA
## 
## Call:
## arima(x = ts(occupancy), order = c(3, 0, 2), method = "ML")
## 
## Coefficients:
##          ar1     ar2      ar3     ma1      ma2  intercept
##       0.6058  0.7448  -0.8607  0.3774  -0.6152    36.3165
## s.e.  0.0468  0.0244   0.0418  0.0655   0.0651     0.4193
## 
## sigma^2 estimated as 11.48:  log likelihood = -398.37,  aic = 808.73
(model.diag<- arima(ts(occupancy), order = c(4,0,1), method = "ML")) #Overfitting AR
## 
## Call:
## arima(x = ts(occupancy), order = c(4, 0, 1), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3     ar4      ma1  intercept
##       1.4094  -0.4792  -0.2857  0.0573  -0.5731    36.3088
## s.e.  0.1457   0.1759   0.1405  0.1058   0.1193     0.4213
## 
## sigma^2 estimated as 12.64:  log likelihood = -404.29,  aic = 820.58

Hasil pengujian:

Normalitas Data

H0: sisaan menyebar normal

H1: sisaan tidak mengikuti sebaran normal

Hasil : p−value=0.000000002466<α=0.05 yang berarti TOLAK H0.Sisaan tidak menyebar normal

Autokorelasi

Plot ACF dan PACFterlihat ada lag-lag yang signifikan. Hal ini berarti bahwa ada gejala autokorelasi pada sisaan.

Kesimpulan : Asumsi tidak terpenuhi

Peramalan 10 waktu ke depan

ramalan<- forecast::forecast(ts(okup.train),model=model.diag301, h=10)
ramalan
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 141       33.01243 28.41697 37.60789 25.98428 40.04058
## 142       36.46809 30.47067 42.46551 27.29583 45.64035
## 143       39.66965 32.91255 46.42675 29.33556 50.00375
## 144       41.54690 34.64241 48.45139 30.98739 52.10641
## 145       41.84274 34.92804 48.75744 31.26762 52.41786
## 146       40.65795 33.49593 47.81997 29.70458 51.61131
## 147       38.48854 30.84077 46.13630 26.79229 50.18478
## 148       36.02016 27.90985 44.13047 23.61651 48.42381
## 149       33.93309 25.57702 42.28917 21.15358 46.71261
## 150       32.72389 24.32112 41.12665 19.87297 45.57481
plot(model.diag301, n.ahead=5, type='b', xlab="Waktu", ylab="Data Y")

par(col="red")
lines(dugaan)

par(col="black")

Model terbaik yang dihasilkan dalam asalisis data time series ini yaitu ARIMA(3,0,1). Adapun untuk hasil diagnostik tidak memenuhi asumsi akan dipelajari lebih lanjut.

Terimakasih.