Transjakarta

lapply(c("dplyr","readxl","forecast","TTR","graphics","smooth","Mcomp","knitr","tseries",
         "readr","TSA","ggplot2","MLmetrics","cowplot","gridExtra","gtable","grid","MASS",
         "lmtest","GGally", "lubridate", "caret", "tseries", "tidyverse"), library, character.only = T)[[1]]

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: greybox

## Package "greybox", v1.0.6 loaded.

## This is package "smooth", v3.1.6

## 
## Attaching package: 'smooth'

## The following object is masked from 'package:TTR':
## 
##     lags

## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast

## 
## Attaching package: 'TSA'

## The following object is masked from 'package:readr':
## 
##     spec

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

## 
## Attaching package: 'MLmetrics'

## The following objects are masked from 'package:greybox':
## 
##     MAE, MAPE, MSE

## The following object is masked from 'package:base':
## 
##     Recall

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## Warning: package 'lubridate' was built under R version 4.2.3

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:cowplot':
## 
##     stamp

## The following object is masked from 'package:greybox':
## 
##     hm

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

## Loading required package: lattice

## Registered S3 method overwritten by 'lava':
##   method     from   
##   print.pcor greybox

## 
## Attaching package: 'caret'

## The following objects are masked from 'package:MLmetrics':
## 
##     MAE, RMSE

## The following object is masked from 'package:greybox':
## 
##     MAE

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ stringr 1.5.0
## ✔ tidyr   1.2.1     ✔ forcats 0.5.2
## ✔ purrr   0.3.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ gridExtra::combine()     masks dplyr::combine()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ lubridate::hm()          masks greybox::hm()
## ✖ lubridate::intersect()   masks base::intersect()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ purrr::lift()            masks caret::lift()
## ✖ MASS::select()           masks dplyr::select()
## ✖ lubridate::setdiff()     masks base::setdiff()
## ✖ TSA::spec()              masks readr::spec()
## ✖ tidyr::spread()          masks greybox::spread()
## ✖ lubridate::stamp()       masks cowplot::stamp()
## ✖ lubridate::union()       masks base::union()

## [1] "dplyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"

library(forecast)
df = read_xlsx('C:/Users/User/Downloads/transjakarta.xlsx')

data2 = log(df$jumlah_penumpang)

data2

##  [1] 15.98315 15.84764 16.05450 15.99394 16.01666 15.97798 15.84576 15.99056
##  [9] 15.95034 15.98233 15.95758 15.95750 15.95687 15.91303 16.01367 16.02729
## [17] 16.12029 16.13874 16.03085 16.26482 16.26063 16.32560 16.30448 16.30435
## [25] 16.28332 16.18019 16.33996 16.25726 16.30975 16.11033 16.29947 16.35696
## [33] 16.31470 16.41047 16.39616 16.37732 16.42954 16.32791 16.48530 16.50934
## [41] 16.49848 16.29646 16.59809 16.69724 16.70136 16.75402 16.71063 16.72194
## [49] 16.73473 16.66313 16.83519 16.80522 16.81111 16.72296 16.96402 16.97368
## [57] 17.01629 17.09415 17.06536 17.09271 17.07603 17.01717 16.62855 14.73414
## [65] 14.66134 15.27831 15.93798 15.93746 15.93772 15.99542 16.02307 16.07617
## [73] 16.09762 16.05425 16.26609 16.26864 16.19722 16.21764 15.70002 15.68704
## [81] 15.95928 16.18260 16.33469 16.24407

data.ts<-ts(data2, frequency = 12, start = c(2015,1), end = c(2021,12))

# Box-Cox
#lambda <- BoxCox.lambda(data.ts)
# data.ts <- BoxCox(data.ts, lambda)

#Plot semua data
plot(data.ts,xlab ="Bulan", ylab = "Jumlah Penumpang", col="black", main = "Plot Deret Waktu Data")
points(data.ts)

data.train <- ts(data.ts[1:63])
data.test <- ts(data.ts[64:84])

#Time Series Data
training.ts<-ts(data.train,frequency=12, start = c(2015,1), end = c(2020,3))

testing.ts<-ts(data.test,frequency=12, start = c(2020,4), end = c(2021,12))

#Plot Data

plot(training.ts, xlab ="Periode", ylab = "Jumlah Kedatangan", col="red", main = "Plot Data Training")
points(training.ts)

plot(testing.ts, xlab ="Periode", ylab = "Jumlah Kedatangan", col="red", main = "Plot Data Testing")
points(testing.ts)

## Plot Data Training dan testing Inflasi
ts.plot(data.ts, xlab = "Periode", ylab ="Jumlah Kedatangan", 
        main = "Plot Deret Waktu Data Jumlah Penumpang Transjakarta")
lines(training.ts, col = "blue")
lines(testing.ts, col="Red")
legend("bottomleft",c("Data Training","Data Testing"), 
       lty=1, col=c("blue","red"), cex=0.8)
abline(v=2020.25, col=c("black"), lty=1, lwd=1)

acf(data.train, lag.max = 24, main = "Plot ACF")

pacf(data.train, lag.max = 24, main = "Plot PACF")

adf.test(data.train)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.train
## Dickey-Fuller = -2.9329, Lag order = 3, p-value = 0.1971
## alternative hypothesis: stationary

# DIFFERENCING
data.dif<-diff(data.train,differences = 1) 
plot.ts(ts(data.dif,frequency=12, start = c(2015,1), end = c(2021,12)),lty=1,xlab = "Periode", ylab= "Data Inflasi Pembedaan 1", main="Plot Differencing Data Inflasi")

adf.test(data.dif)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.dif
## Dickey-Fuller = -3.491, Lag order = 3, p-value = 0.04983
## alternative hypothesis: stationary

acf(data.dif, lag.max = 36, main = "Plot ACF Setelah Differencing satu kali")

pacf(data.dif, lag.max = 48, main = "Plot PACF Setelah Differencing satu kali")

eacf(data.dif)

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o  x  o  o 
## 1 o o o o o o o o o o o  x  o  o 
## 2 x o o o o o o o o o o  x  o  o 
## 3 x o o o o o o o o o o  x  o  o 
## 4 o o o o o o o o o o o  x  o  o 
## 5 x o o o o o o o o o o  x  o  o 
## 6 o x o o o o o o o o o  o  o  o 
## 7 o x o o o o o o o o o  o  o  o

ARIMA(0,1,1) ARIMA(0,1,2) ARIMA(0,1,3) ARIMA(1,1,0) ARIMA(1,1,1) ARIMA(1,1,2) ARIMA(1,1,3) ARIMA(2,1,1) ARIMA(2,1,2) ARIMA(2,1,3) ARIMA(3,1,1) ARIMA(3,1,2) ARIMA(3,1,3)

auto.arima(data.dif)

## Series: data.dif 
## ARIMA(0,0,1) with non-zero mean 
## 
## Coefficients:
##           ma1    mean
##       -0.4811  0.0145
## s.e.   0.1530  0.0071
## 
## sigma^2 = 0.01115:  log likelihood = 52.29
## AIC=-98.57   AICc=-98.16   BIC=-92.19

model <- Arima(data.dif, order=c(0,0,1), method="ML") 
model1 <- Arima(data.dif, order=c(0,0,2), method="ML")   
model2 <- Arima(data.dif, order=c(0,0,3), method="ML")  
model3 <- Arima(data.dif, order=c(1,0,0), method="ML")
model4 <- Arima(data.dif, order=c(1,0,1), method="ML") 
model5 <- Arima(data.dif, order=c(1,0,2), method="ML")   
model6 <- Arima(data.dif, order=c(1,0,3), method="ML")  
model7 <- Arima(data.dif, order=c(2,0,1), method="ML")
model8 <- Arima(data.dif, order=c(2,0,2), method="ML")
model9 <- Arima(data.dif, order=c(2,0,3), method="ML")
model10 <- Arima(data.dif, order=c(3,0,1), method="ML")
model11 <- Arima(data.dif, order=c(3,0,2), method="ML")
model12 <- Arima(data.dif, order=c(3,0,3), method="ML")

summary(model)

## Series: data.dif 
## ARIMA(0,0,1) with non-zero mean 
## 
## Coefficients:
##           ma1    mean
##       -0.4811  0.0145
## s.e.   0.1530  0.0071
## 
## sigma^2 = 0.01115:  log likelihood = 52.29
## AIC=-98.57   AICc=-98.16   BIC=-92.19
## 
## Training set error measures:
##                         ME      RMSE        MAE      MPE     MAPE     MASE
## Training set -0.0006470904 0.1038929 0.07376048 978.0486 1038.697 0.538449
##                      ACF1
## Training set -0.001874654

coeftest(model)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)   
## ma1       -0.4811094  0.1529869 -3.1448 0.001662 **
## intercept  0.0144688  0.0070803  2.0435 0.041000 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model1)

## Series: data.dif 
## ARIMA(0,0,2) with non-zero mean 
## 
## Coefficients:
##           ma1    ma2    mean
##       -0.4813  0.004  0.0144
## s.e.   0.1535  0.172  0.0073
## 
## sigma^2 = 0.01134:  log likelihood = 52.29
## AIC=-96.57   AICc=-95.87   BIC=-88.07
## 
## Training set error measures:
##                         ME      RMSE       MAE      MPE     MAPE      MASE
## Training set -0.0006476582 0.1038943 0.0737577 980.2098 1041.306 0.5384286
##                      ACF1
## Training set -0.001116443

coeftest(model1)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)   
## ma1       -0.4813236  0.1535053 -3.1356 0.001715 **
## ma2        0.0039747  0.1720249  0.0231 0.981566   
## intercept  0.0144373  0.0072661  1.9869 0.046930 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model2)

## Series: data.dif 
## ARIMA(0,0,3) with non-zero mean 
## 
## Coefficients:
##           ma1      ma2      ma3    mean
##       -0.5800  -0.0769  -0.3431  0.0184
## s.e.   0.1635   0.1658   0.1283  0.0012
## 
## sigma^2 = 0.01065:  log likelihood = 53.2
## AIC=-96.4   AICc=-95.33   BIC=-85.76
## 
## Training set error measures:
##                       ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.01174072 0.09983093 0.07413183 1446.955 1909.065 0.5411598
##                    ACF1
## Training set 0.03368266

coeftest(model2)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ma1       -0.5800339  0.1635496 -3.5465 0.0003903 ***
## ma2       -0.0768714  0.1658073 -0.4636 0.6429208    
## ma3       -0.3430888  0.1283104 -2.6739 0.0074975 ** 
## intercept  0.0184438  0.0012004 15.3649 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model3)

## Series: data.dif 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1    mean
##       -0.4058  0.0130
## s.e.   0.1336  0.0095
## 
## sigma^2 = 0.01132:  log likelihood = 51.87
## AIC=-97.74   AICc=-97.32   BIC=-91.35
## 
## Training set error measures:
##                         ME      RMSE        MAE      MPE     MAPE     MASE
## Training set -0.0007694027 0.1046679 0.07439282 993.7082 1077.506 0.543065
##                     ACF1
## Training set -0.02347896

coeftest(model3)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)   
## ar1       -0.4057506  0.1335927 -3.0372 0.002388 **
## intercept  0.0129731  0.0095205  1.3627 0.172990   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model4)

## Series: data.dif 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##           ar1      ma1    mean
##       -0.0166  -0.4651  0.0144
## s.e.   0.5208   0.5281  0.0074
## 
## sigma^2 = 0.01134:  log likelihood = 52.29
## AIC=-96.57   AICc=-95.87   BIC=-88.07
## 
## Training set error measures:
##                       ME      RMSE        MAE      MPE     MAPE      MASE
## Training set -0.00063722 0.1038953 0.07375324 982.1883 1043.767 0.5383961
##                       ACF1
## Training set -0.0002651442

coeftest(model4)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)  
## ar1       -0.0165633  0.5207727 -0.0318  0.97463  
## ma1       -0.4651211  0.5280534 -0.8808  0.37841  
## intercept  0.0144019  0.0074363  1.9367  0.05278 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model5)

## Series: data.dif 
## ARIMA(1,0,2) with non-zero mean 
## 
## Coefficients:
##           ar1     ma1      ma2    mean
##       -0.5276  0.0391  -0.2022  0.0142
## s.e.   0.7482  0.7564   0.4002  0.0075
## 
## sigma^2 = 0.01152:  log likelihood = 52.34
## AIC=-94.68   AICc=-93.6   BIC=-84.04
## 
## Training set error measures:
##                         ME      RMSE     MAE      MPE     MAPE      MASE
## Training set -0.0006672126 0.1038164 0.07374 1041.672 1112.115 0.5382995
##                    ACF1
## Training set 0.01002608

coeftest(model5)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)  
## ar1       -0.5276473  0.7482448 -0.7052  0.48070  
## ma1        0.0390563  0.7564460  0.0516  0.95882  
## ma2       -0.2022184  0.4001846 -0.5053  0.61334  
## intercept  0.0142398  0.0074653  1.9075  0.05646 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model6)

## Series: data.dif 
## ARIMA(1,0,3) with non-zero mean 
## 
## Coefficients:
##         ar1      ma1     ma2      ma3    mean
##       0.691  -1.2696  0.3894  -0.1198  0.0178
## s.e.  0.230   0.2707  0.2615   0.1576  0.0019
## 
## sigma^2 = 0.01059:  log likelihood = 54.29
## AIC=-96.57   AICc=-95.04   BIC=-83.81
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.007186347 0.09868318 0.07103453 1483.329 1547.874 0.5185496
##                     ACF1
## Training set 0.004768468

coeftest(model6)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1        0.6909591  0.2300028  3.0041  0.002663 ** 
## ma1       -1.2696124  0.2706797 -4.6905 2.726e-06 ***
## ma2        0.3894380  0.2614915  1.4893  0.136410    
## ma3       -0.1197902  0.1576058 -0.7601  0.447217    
## intercept  0.0177728  0.0018617  9.5466 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model7)

## Series: data.dif 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2     ma1    mean
##       0.4146  0.2381  -1.000  0.0177
## s.e.  0.1464  0.1453   0.048  0.0019
## 
## sigma^2 = 0.01041:  log likelihood = 54.31
## AIC=-98.61   AICc=-97.54   BIC=-87.98
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.006941607 0.09870595 0.07114576 1479.191 1546.267 0.5193616
##                    ACF1
## Training set 0.01176516

coeftest(model7)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1        0.4145611  0.1464177   2.8314  0.004635 ** 
## ar2        0.2380787  0.1453428   1.6381  0.101411    
## ma1       -1.0000000  0.0480123 -20.8280 < 2.2e-16 ***
## intercept  0.0177064  0.0018985   9.3264 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model8)

## Series: data.dif 
## ARIMA(2,0,2) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2    mean
##       0.2584  0.3202  -0.8359  -0.1639  0.0178
## s.e.  0.5653  0.3094   0.5879   0.5857  0.0018
## 
## sigma^2 = 0.01057:  log likelihood = 54.34
## AIC=-96.69   AICc=-95.16   BIC=-83.92
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.007196214 0.09859786 0.07142285 1502.572 1575.874 0.5213843
##                     ACF1
## Training set 0.006837753

coeftest(model8)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## ar1        0.2583955  0.5652661  0.4571   0.6476    
## ar2        0.3202129  0.3094055  1.0349   0.3007    
## ma1       -0.8359356  0.5879301 -1.4218   0.1551    
## ma2       -0.1638972  0.5857315 -0.2798   0.7796    
## intercept  0.0177774  0.0018454  9.6333   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model9)

## Series: data.dif 
## ARIMA(2,0,3) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2      ma3    mean
##       0.2618  0.3148  -0.8383  -0.1561  -0.0056  0.0178
## s.e.  0.6452  0.4753   0.6581   0.8114   0.2772  0.0019
## 
## sigma^2 = 0.01076:  log likelihood = 54.34
## AIC=-94.69   AICc=-92.61   BIC=-79.8
## 
## Training set error measures:
##                        ME       RMSE        MAE     MPE     MAPE      MASE
## Training set -0.007207981 0.09858895 0.07141248 1499.05 1572.595 0.5213086
##                     ACF1
## Training set 0.006057755

coeftest(model9)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## ar1        0.2618493  0.6452077  0.4058   0.6849    
## ar2        0.3147763  0.4752605  0.6623   0.5078    
## ma1       -0.8382652  0.6580790 -1.2738   0.2027    
## ma2       -0.1560986  0.8113665 -0.1924   0.8474    
## ma3       -0.0055903  0.2771593 -0.0202   0.9839    
## intercept  0.0177810  0.0018597  9.5614   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model10)

## Series: data.dif 
## ARIMA(3,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3      ma1    mean
##       0.4233  0.2535  -0.0415  -1.0000  0.0178
## s.e.  0.1497  0.1563   0.1562   0.0489  0.0019
## 
## sigma^2 = 0.01057:  log likelihood = 54.34
## AIC=-96.68   AICc=-95.16   BIC=-83.92
## 
## Training set error measures:
##                       ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.00723574 0.09858138 0.07134105 1491.254 1564.862 0.5207872
##                     ACF1
## Training set 0.006289166

coeftest(model10)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1        0.4233245  0.1497170   2.8275  0.004691 ** 
## ar2        0.2534887  0.1562520   1.6223  0.104738    
## ar3       -0.0415071  0.1562355  -0.2657  0.790493    
## ma1       -0.9999986  0.0488781 -20.4590 < 2.2e-16 ***
## intercept  0.0177894  0.0018517   9.6073 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model11)

## Series: data.dif 
## ARIMA(3,0,2) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3      ma1      ma2    mean
##       0.2946  0.3063  -0.0103  -0.8712  -0.1288  0.0178
## s.e.  2.1692  0.9042   0.5497   2.1623   2.1616  0.0019
## 
## sigma^2 = 0.01076:  log likelihood = 54.34
## AIC=-94.69   AICc=-92.61   BIC=-79.8
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.007191589 0.09858895 0.07141256 1498.963 1572.465 0.5213092
##                     ACF1
## Training set 0.006204726

coeftest(model11)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## ar1        0.2945631  2.1692247  0.1358   0.8920    
## ar2        0.3063095  0.9042278  0.3388   0.7348    
## ar3       -0.0102628  0.5497105 -0.0187   0.9851    
## ma1       -0.8711709  2.1623050 -0.4029   0.6870    
## ma2       -0.1287918  2.1616164 -0.0596   0.9525    
## intercept  0.0177784  0.0018599  9.5587   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model12)

## Series: data.dif 
## ARIMA(3,0,3) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1      ma2      ma3    mean
##       -1.0538  0.1836  0.6339  0.6839  -0.6839  -1.0000  0.0177
## s.e.   0.1272  0.2111  0.1297  0.0917   0.0932   0.0904  0.0018
## 
## sigma^2 = 0.008958:  log likelihood = 58.44
## AIC=-100.88   AICc=-98.17   BIC=-83.87
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.006359759 0.08914422 0.06605995 1568.965 1720.922 0.4822353
##                    ACF1
## Training set 0.03893473

coeftest(model12)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -1.0538331  0.1272500  -8.2816 < 2.2e-16 ***
## ar2        0.1836456  0.2110524   0.8701    0.3842    
## ar3        0.6338810  0.1297301   4.8862 1.028e-06 ***
## ma1        0.6838572  0.0917202   7.4559 8.925e-14 ***
## ma2       -0.6838524  0.0931722  -7.3397 2.141e-13 ***
## ma3       -0.9999937  0.0903916 -11.0629 < 2.2e-16 ***
## intercept  0.0177425  0.0018007   9.8532 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ARIMA(0,1,1) ARIMA(0,1,2) ARIMA(0,1,3) ARIMA(1,1,0) ARIMA(1,1,1) ARIMA(1,1,2) ARIMA(1,1,3) ARIMA(2,1,1) ARIMA(2,1,2) ARIMA(2,1,3) ARIMA(3,1,1) ARIMA(3,1,2) ARIMA(3,1,3)

#Overfitting

model12b <- Arima(data.dif, order=c(4,0,3), method="ML")
model12c <- Arima(data.dif, order=c(3,0,4), method = "ML")

summary(model12b)

## Series: data.dif 
## ARIMA(4,0,3) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4     ma1      ma2      ma3    mean
##       -1.0170  0.1957  0.5684  -0.0628  0.6860  -0.6860  -1.0000  0.0178
## s.e.   0.1576  0.2122  0.2115   0.1611  0.0933   0.0949   0.0931  0.0017
## 
## sigma^2 = 0.009046:  log likelihood = 58.52
## AIC=-99.04   AICc=-95.57   BIC=-79.89
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.006810885 0.08876458 0.06581016 1534.649 1686.456 0.4804119
##                    ACF1
## Training set 0.02214229

coeftest(model12b)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -1.0169676  0.1575784  -6.4537 1.091e-10 ***
## ar2        0.1956820  0.2121533   0.9224  0.356340    
## ar3        0.5683667  0.2114786   2.6876  0.007197 ** 
## ar4       -0.0628191  0.1610932  -0.3900  0.696570    
## ma1        0.6860261  0.0932989   7.3530 1.938e-13 ***
## ma2       -0.6860227  0.0948837  -7.2301 4.825e-13 ***
## ma3       -0.9999889  0.0931261 -10.7380 < 2.2e-16 ***
## intercept  0.0178463  0.0017233  10.3556 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model12c)

## Series: data.dif 
## ARIMA(3,0,4) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1      ma2      ma3     ma4    mean
##       -0.6747  0.2762  0.5270  0.2544  -0.6269  -0.8754  0.2479  0.0178
## s.e.   0.3057  0.2797  0.1566  0.3406   0.1579   0.1728  0.3144  0.0019
## 
## sigma^2 = 0.009422:  log likelihood = 57.57
## AIC=-97.15   AICc=-93.69   BIC=-78
## 
## Training set error measures:
##                        ME       RMSE        MAE      MPE     MAPE      MASE
## Training set -0.006464599 0.09058627 0.06930584 2063.158 2171.524 0.5059302
##                    ACF1
## Training set 0.01357103

coeftest(model12c)

## 
## z test of coefficients:
## 
##            Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.674698   0.305692 -2.2071 0.0273059 *  
## ar2        0.276248   0.279725  0.9876 0.3233634    
## ar3        0.527050   0.156642  3.3647 0.0007663 ***
## ma1        0.254411   0.340615  0.7469 0.4551138    
## ma2       -0.626866   0.157910 -3.9698 7.194e-05 ***
## ma3       -0.875449   0.172783 -5.0667 4.047e-07 ***
## ma4        0.247917   0.314448  0.7884 0.4304506    
## intercept  0.017757   0.001869  9.5009 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tetap menggunakan model -> ARIMA(3,0,3)

#Diagnostik Sisaan
sisaan <- model12$residuals

#kenormalan
shapiro.test(sisaan)

## 
##  Shapiro-Wilk normality test
## 
## data:  sisaan
## W = 0.9439, p-value = 0.006859

#sisaan saling bebas
Box.test(sisaan, type = "Ljung")

## 
##  Box-Ljung test
## 
## data:  sisaan
## X-squared = 0.098609, df = 1, p-value = 0.7535

data ga normal (bisa di abaikan) data gaada autokol

# Forecasting
ramalan <- forecast(Arima(data.ts, order=c(3,1,3),method="ML",include.drift = TRUE),h = length(data.test))
data.ramalan <- ramalan$mean
data.ramalan.ts <- ts(data.ramalan, start = 2015, frequency = 12)
plot(ramalan,col="black",col.sub ="black",col.axis="black",
     col.lab="black",col.main="black",lwd=2)
box(col="black",lwd=2)

hasilforecast<-matrix(data=c(data.ramalan[1:12]), nrow = 12, ncol = 1)
colnames(hasilforecast)<-c("Hasil Forecast")
head(hasilforecast)

##      Hasil Forecast
## [1,]       16.24902
## [2,]       16.28519
## [3,]       16.31182
## [4,]       16.31766
## [5,]       16.32478
## [6,]       16.33799

hasilforecast <- matrix(data=exp(data.ramalan[1:12]), nrow=12, ncol=1)
colnames(hasilforecast) <- c("Hasil Forecast")

# Menampilkan baris pertama dari hasil forecast yang telah dikembalikan ke skala asli
hasilforecast

##       Hasil Forecast
##  [1,]       11398849
##  [2,]       11818694
##  [3,]       12137584
##  [4,]       12208745
##  [5,]       12295926
##  [6,]       12459493
##  [7,]       12573797
##  [8,]       12626475
##  [9,]       12686542
## [10,]       12759114
## [11,]       12813773
## [12,]       12853363

# Contoh tanggal dalam format "dd/mm/yyyy"
start_date <- as.Date("01/01/2022", format="%d/%m/%Y")

# Membuat vektor tanggal untuk forecast dengan asumsi bahwa forecast dimulai pada bulan berikutnya
forecast_dates <- seq(from=start_date, length.out=24, by="month")

# Menambahkan vektor tanggal ke hasil forecast
hasilforecast <- matrix(data=exp(data.ramalan[1:24]), nrow=24, ncol=1)
colnames(hasilforecast) <- c("Hasil Forecast")
hasilforecast <- cbind(ForecastDate = forecast_dates, hasilforecast)

# Mengubah matrix menjadi data frame
hasilforecast_df <- as.data.frame(hasilforecast)

# Mengkonversi angka menjadi tanggal
hasilforecast_df$ForecastDate <- as.Date(hasilforecast_df$ForecastDate, origin = "1970-01-01")

# Menampilkan hasil forecast dengan tanggal yang benar
print(hasilforecast_df)

##    ForecastDate Hasil Forecast
## 1    2022-01-01       11398849
## 2    2022-02-01       11818694
## 3    2022-03-01       12137584
## 4    2022-04-01       12208745
## 5    2022-05-01       12295926
## 6    2022-06-01       12459493
## 7    2022-07-01       12573797
## 8    2022-08-01       12626475
## 9    2022-09-01       12686542
## 10   2022-10-01       12759114
## 11   2022-11-01       12813773
## 12   2022-12-01       12853363
## 13   2023-01-01       12894816
## 14   2023-02-01       12937200
## 15   2023-03-01       12973744
## 16   2023-04-01       13006533
## 17   2023-05-01       13039554
## 18   2023-06-01       13072143
## 19   2023-07-01       13102933
## 20   2023-08-01       13132755
## 21   2023-09-01       13162525
## 22   2023-10-01             NA
## 23   2023-11-01             NA
## 24   2023-12-01             NA

# Validasi model 

## Hitung error
## Hitung error
error <- data.frame(data.test)-data.frame(data.ramalan[1:21])


## SSE (Sum Square Error)
SSE <- sum(error^2, na.rm = T)

## MSE (Mean Squared Error)
MSE<- sapply(error^2, mean, na.rm = T)


## RMSE (Root Mean Square Error)
RMSE1 <- sqrt(MSE)
# Menghitung rata-rata dari nilai aktual
mean_actual <- mean(data.test, na.rm = TRUE)

# Menghitung persentase RMSE
RMSE <- (RMSE1 / mean_actual) * 100

## MAD (Mean Absolute Deviation)
MAD <- sapply(abs(error), mean, na.rm = T)

## MAPE (Mean Absolute Percentage Error)
r.error <- (error/data.frame(data.test))*100 # Relative Error
MAPE <- sapply(abs(r.error), mean, na.rm = T)

akurasifarima <- data.frame(
  "Ukuran Keakuratan" = c("SSE", "MSE", "MAPE", "RMSE", "MAD"), 
  "Forecasting" = c(SSE, MSE, MAPE, RMSE, MAD))
akurasifarima

##   Ukuran.Keakuratan Forecasting
## 1               SSE   8.2085322
## 2               MSE   0.3908825
## 3              MAPE   2.9695864
## 4              RMSE   3.9333999
## 5               MAD   0.4588478

MAPE 3% RMSE 3.93% model baik

Transjakarta

Maulana Ahsan Fadillah

2023-11-26