Transjakarta

lapply(c("dplyr","readxl","forecast","TTR","graphics","smooth","Mcomp","knitr","tseries",
         "readr","TSA","ggplot2","MLmetrics","cowplot","gridExtra","gtable","grid","MASS",
         "lmtest","GGally", "lubridate", "caret", "tseries", "tidyverse"), library, character.only = T)[[1]]

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: greybox

## Package "greybox", v1.0.6 loaded.

## This is package "smooth", v3.1.6

## 
## Attaching package: 'smooth'

## The following object is masked from 'package:TTR':
## 
##     lags

## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast

## 
## Attaching package: 'TSA'

## The following object is masked from 'package:readr':
## 
##     spec

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

## 
## Attaching package: 'MLmetrics'

## The following objects are masked from 'package:greybox':
## 
##     MAE, MAPE, MSE

## The following object is masked from 'package:base':
## 
##     Recall

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## Warning: package 'lubridate' was built under R version 4.2.3

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:cowplot':
## 
##     stamp

## The following object is masked from 'package:greybox':
## 
##     hm

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

## Loading required package: lattice

## Registered S3 method overwritten by 'lava':
##   method     from   
##   print.pcor greybox

## 
## Attaching package: 'caret'

## The following objects are masked from 'package:MLmetrics':
## 
##     MAE, RMSE

## The following object is masked from 'package:greybox':
## 
##     MAE

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ stringr 1.5.0
## ✔ tidyr   1.2.1     ✔ forcats 0.5.2
## ✔ purrr   0.3.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ gridExtra::combine()     masks dplyr::combine()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ lubridate::hm()          masks greybox::hm()
## ✖ lubridate::intersect()   masks base::intersect()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ purrr::lift()            masks caret::lift()
## ✖ MASS::select()           masks dplyr::select()
## ✖ lubridate::setdiff()     masks base::setdiff()
## ✖ TSA::spec()              masks readr::spec()
## ✖ tidyr::spread()          masks greybox::spread()
## ✖ lubridate::stamp()       masks cowplot::stamp()
## ✖ lubridate::union()       masks base::union()

## [1] "dplyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"

#BELAWAN

# Set base path where the Excel files are located
belawan <- read_xlsx("C:/Users/User/Documents/Portofolio/Data Jumlah Penumpang Pelabuhan 2006-2023Belawan_2006_2023.xlsx")
belawan$`Total Keberangkatan` <- as.numeric(belawan$`Total Keberangkatan`)
belawan$`Total Kedatangan` <- as.numeric(belawan$`Total Kedatangan`)
str(belawan)

## tibble [213 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Tahun              : num [1:213] 2006 2006 2006 2006 2006 ...
##  $ Bulan              : chr [1:213] "Januari" "Februari" "Maret" "April" ...
##  $ Pelabuhan Utama    : chr [1:213] "Belawan" "Belawan" "Belawan" "Belawan" ...
##  $ Total Keberangkatan: num [1:213] 11610 5044 4483 4075 4404 ...
##  $ Total Kedatangan   : num [1:213] 7968 6707 4775 3469 3955 ...

# Memeriksa nilai NA, NaN, atau Inf
sum(is.na(belawan))  # Jumlah NA

## [1] 0

boxplot(belawan$`Total Keberangkatan` ~ belawan$Tahun)

data_belawan <- log(belawan$`Total Keberangkatan` + 1)

data.ts<-ts(data_belawan, frequency = 12, start = c(2006,1), end = c(2023,9))

# Box-Cox
#lambda <- BoxCox.lambda(data.ts)
# data.ts <- BoxCox(data.ts, lambda)

#Plot semua data
plot(data.ts,xlab ="Bulan", ylab = "Jumlah Penumpang", col="black", main = "Plot Deret Waktu Data")
points(data.ts)

seasonplot(data.ts,12,main="Jumlah Penumpang Pelabuhan Belawan", ylab="Air Passenger (miles)",year.labels = TRUE, col=rainbow(18))

fligner.test(data.ts ~ Tahun, data=belawan)

## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  data.ts by Tahun
## Fligner-Killeen:med chi-squared = 26.324, df = 17, p-value = 0.06876

data.train <- ts(data.ts[1:170])
data.test <- ts(data.ts[171:213])

#Time Series Data
training.ts<-ts(data.train,frequency=12, start = c(2006,1), end = c(2020,2))

testing.ts<-ts(data.test,frequency=12, start = c(2020,3), end = c(2023,9))

#Plot Data

plot(training.ts, xlab ="Periode", ylab = "Jumlah Kedatangan", col="red", main = "Plot Data Training")
points(training.ts)

plot(testing.ts, xlab ="Periode", ylab = "Jumlah Kedatangan", col="red", main = "Plot Data Testing")
points(testing.ts)

## Plot Data Training dan testing Inflasi
ts.plot(data.ts, xlab = "Periode", ylab ="Jumlah Kedatangan", 
        main = "Plot Deret Waktu Data Jumlah kedatangan FKTP APBN")
lines(training.ts, col = "blue")
lines(testing.ts, col="Red")
legend("bottomleft",c("Data Training","Data Testing"), 
       lty=1, col=c("blue","red"), cex=0.8)
abline(v=2020.20, col=c("black"), lty=1, lwd=1)

acf(data.train, lag.max = 24, main = "Plot ACF")

pacf(data.train, lag.max = 24, main = "Plot PACF")

adf.test(data.train)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.train
## Dickey-Fuller = -3.43, Lag order = 5, p-value = 0.05179
## alternative hypothesis: stationary

# DIFFERENCING
data.dif<-diff(data.train,differences = 1) 
plot.ts(ts(data.dif,frequency=12, start = c(2015,1), end = c(2021,12)),lty=1,xlab = "Periode", ylab= "Data Inflasi Pembedaan 1", main="Plot Differencing Data Inflasi")

adf.test(data.dif)

## Warning in adf.test(data.dif): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.dif
## Dickey-Fuller = -10.141, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

acf(data.dif, lag.max = 36, main = "Plot ACF Setelah Differencing 1 kali")

pacf(data.dif, lag.max = 48, main = "Plot PACF Setelah Differencing 1 kali")

eacf(data.dif)

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x o o o x o o o x o  x  o  x 
## 1 x x o o o x o o o x o  x  x  x 
## 2 x o o o o x o o o o o  x  x  x 
## 3 x o o o o x o o o o o  x  o  x 
## 4 x o o x x o o x o o o  x  x  o 
## 5 o x x x x o o x o x o  x  o  x 
## 6 x o x o x o x o o o o  x  o  x 
## 7 x x o o x x x x o o x  o  o  o

ARIMA(0,1,2) ARIMA(0,1,3) ARIMA(1,1,2) ARIMA(1,1,3) ARIMA(2,1,1) ARIMA(2,1,2) ARIMA(2,1,3) ARIMA(3,1,1) ARIMA(3,1,2) ARIMA(3,1,3) ARIMA(5,1,0)

auto.arima(data.dif)

## Series: data.dif 
## ARIMA(5,0,0) with zero mean 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.6364  -0.7774  -0.6964  -0.5464  -0.4591
## s.e.   0.0684   0.0729   0.0778   0.0733   0.0684
## 
## sigma^2 = 0.3776:  log likelihood = -156.07
## AIC=324.14   AICc=324.66   BIC=342.92

model <- Arima(data.dif, order=c(0,0,2), method="ML") 
model1 <- Arima(data.dif, order=c(0,0,3), method="ML")   
model2 <- Arima(data.dif, order=c(1,0,2), method="ML")  
model3 <- Arima(data.dif, order=c(1,0,3), method="ML")
model4 <- Arima(data.dif, order=c(2,0,1), method="ML") 
model5 <- Arima(data.dif, order=c(2,0,2), method="ML")   
model6 <- Arima(data.dif, order=c(2,0,3), method="ML")  
model7 <- Arima(data.dif, order=c(3,0,1), method="ML")
model8 <- Arima(data.dif, order=c(3,0,2), method="ML")
model9 <- Arima(data.dif, order=c(3,0,3), method="ML")
model10 <- Arima(data.dif, order=c(5,0,0), method="ML")

summary(model)

## Series: data.dif 
## ARIMA(0,0,2) with non-zero mean 
## 
## Coefficients:
##           ma1      ma2    mean
##       -0.6485  -0.3515  0.0023
## s.e.   0.0756   0.0731  0.0014
## 
## sigma^2 = 0.4202:  log likelihood = -167.35
## AIC=342.7   AICc=342.95   BIC=355.22
## 
## Training set error measures:
##                       ME      RMSE      MAE MPE MAPE      MASE        ACF1
## Training set -0.03184583 0.6424263 0.505148 Inf  Inf 0.5316993 -0.02592382

coeftest(model)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ma1       -0.6485040  0.0756117 -8.5768 < 2.2e-16 ***
## ma2       -0.3514956  0.0730648 -4.8107 1.504e-06 ***
## intercept  0.0022704  0.0013539  1.6769   0.09356 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model1)

## Series: data.dif 
## ARIMA(0,0,3) with non-zero mean 
## 
## Coefficients:
##           ma1      ma2     ma3    mean
##       -0.7169  -0.3799  0.0968  0.0023
## s.e.   0.0931   0.0732  0.0855  0.0012
## 
## sigma^2 = 0.419:  log likelihood = -166.73
## AIC=343.46   AICc=343.82   BIC=359.11
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE       ACF1
## Training set -0.03396382 0.6395893 0.5025076 Inf  Inf 0.5289201 0.02035327

coeftest(model1)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ma1       -0.7169437  0.0931369 -7.6977 1.385e-14 ***
## ma2       -0.3798852  0.0732192 -5.1883 2.122e-07 ***
## ma3        0.0968291  0.0855484  1.1319   0.25769    
## intercept  0.0022860  0.0011878  1.9246   0.05428 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model2)

## Series: data.dif 
## ARIMA(1,0,2) with non-zero mean 
## 
## Coefficients:
##           ar1      ma1      ma2    mean
##       -0.1112  -0.5626  -0.4374  0.0023
## s.e.   0.1575   0.1338   0.1324  0.0013
## 
## sigma^2 = 0.4213:  log likelihood = -167.1
## AIC=344.2   AICc=344.57   BIC=359.85
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE         ACF1
## Training set -0.03255776 0.6413296 0.5042171 Inf  Inf 0.5307195 -0.009005403

coeftest(model2)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.1112390  0.1574910 -0.7063 0.4799892    
## ma1       -0.5625917  0.1338363 -4.2036 2.627e-05 ***
## ma2       -0.4373798  0.1323811 -3.3039 0.0009534 ***
## intercept  0.0022777  0.0012951  1.7587 0.0786328 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model3)

## Series: data.dif 
## ARIMA(1,0,3) with non-zero mean 
## 
## Coefficients:
##          ar1      ma1      ma2     ma3    mean
##       0.4612  -1.1859  -0.0678  0.3108  0.0047
## s.e.  0.1728   0.1586   0.1517  0.0663  0.0054
## 
## sigma^2 = 0.4187:  log likelihood = -164.71
## AIC=341.42   AICc=341.94   BIC=360.2
## 
## Training set error measures:
##                      ME      RMSE       MAE MPE MAPE      MASE       ACF1
## Training set -0.0120605 0.6374609 0.4870183 Inf  Inf 0.5126167 0.03271562

coeftest(model3)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1        0.4612348  0.1727609  2.6698   0.00759 ** 
## ma1       -1.1858570  0.1586157 -7.4763 7.645e-14 ***
## ma2       -0.0678148  0.1517438 -0.4469   0.65494    
## ma3        0.3108217  0.0663499  4.6846 2.805e-06 ***
## intercept  0.0047366  0.0054354  0.8714   0.38351    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model4)

## Series: data.dif 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       0.2701  -0.3041  -0.9307  0.0037
## s.e.  0.0796   0.0776   0.0442  0.0037
## 
## sigma^2 = 0.4108:  log likelihood = -163.74
## AIC=337.49   AICc=337.86   BIC=353.14
## 
## Training set error measures:
##                       ME      RMSE     MAE MPE MAPE      MASE       ACF1
## Training set -0.01566578 0.6333281 0.48923 Inf  Inf 0.5149447 -0.0349622

coeftest(model4)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1        0.2701310  0.0796295   3.3923  0.000693 ***
## ar2       -0.3040776  0.0776356  -3.9167 8.976e-05 ***
## ma1       -0.9306814  0.0441885 -21.0616 < 2.2e-16 ***
## intercept  0.0037199  0.0036640   1.0153  0.309975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model5)

## Series: data.dif 
## ARIMA(2,0,2) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2    mean
##       0.6370  -0.4282  -1.3327  0.4194  0.0045
## s.e.  0.1393   0.0784   0.1408  0.1385  0.0055
## 
## sigma^2 = 0.4027:  log likelihood = -161.47
## AIC=334.95   AICc=335.47   BIC=353.73
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE        ACF1
## Training set -0.01157864 0.6251408 0.4734702 Inf  Inf 0.4983565 -0.01152289

coeftest(model5)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1        0.6370072  0.1392640  4.5741 4.783e-06 ***
## ar2       -0.4281611  0.0784342 -5.4589 4.792e-08 ***
## ma1       -1.3327314  0.1408451 -9.4624 < 2.2e-16 ***
## ma2        0.4194274  0.1384708  3.0290  0.002454 ** 
## intercept  0.0044537  0.0054683  0.8145  0.415381    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model6)

## Series: data.dif 
## ARIMA(2,0,3) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2     ma1      ma2      ma3    mean
##       -0.9739  -0.5326  0.3228  -0.5697  -0.7531  0.0023
## s.e.   0.1451   0.1784  0.1514   0.1668   0.0969  0.0012
## 
## sigma^2 = 0.4025:  log likelihood = -162.44
## AIC=338.88   AICc=339.58   BIC=360.79
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE        ACF1
## Training set -0.03287781 0.6230307 0.4909989 Inf  Inf 0.5168065 0.002433312

coeftest(model6)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.9739431  0.1450689 -6.7137 1.898e-11 ***
## ar2       -0.5326489  0.1783520 -2.9865 0.0028219 ** 
## ma1        0.3227545  0.1514439  2.1312 0.0330742 *  
## ma2       -0.5696944  0.1668352 -3.4147 0.0006385 ***
## ma3       -0.7530521  0.0969064 -7.7709 7.792e-15 ***
## intercept  0.0023372  0.0011966  1.9532 0.0507924 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model7)

## Series: data.dif 
## ARIMA(3,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1    mean
##       0.1916  -0.2874  -0.1593  -0.8890  0.0042
## s.e.  0.0877   0.0760   0.0842   0.0514  0.0045
## 
## sigma^2 = 0.4052:  log likelihood = -162.02
## AIC=336.05   AICc=336.57   BIC=354.83
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE        ACF1
## Training set -0.01333594 0.6270353 0.4772887 Inf  Inf 0.5023756 -0.01562436

coeftest(model7)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1        0.1915576  0.0876566   2.1853 0.0288655 *  
## ar2       -0.2873985  0.0759666  -3.7832 0.0001548 ***
## ar3       -0.1593131  0.0841786  -1.8926 0.0584162 .  
## ma1       -0.8889775  0.0514115 -17.2914 < 2.2e-16 ***
## intercept  0.0042030  0.0045205   0.9298 0.3524941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model8)

## Series: data.dif 
## ARIMA(3,0,2) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2      ar3      ma1      ma2    mean
##       -0.5889  -0.0732  -0.3350  -0.0738  -0.7772  0.0039
## s.e.   0.1299   0.1020   0.0769   0.1213   0.1173  0.0039
## 
## sigma^2 = 0.4075:  log likelihood = -162.05
## AIC=338.1   AICc=338.8   BIC=360.01
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE     MASE        ACF1
## Training set -0.01475585 0.6269483 0.4825039 Inf  Inf 0.507865 -0.02097627

coeftest(model8)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.5889447  0.1298596 -4.5352 5.754e-06 ***
## ar2       -0.0731575  0.1020368 -0.7170    0.4734    
## ar3       -0.3349900  0.0769099 -4.3556 1.327e-05 ***
## ma1       -0.0738185  0.1212756 -0.6087    0.5427    
## ma2       -0.7772287  0.1172670 -6.6279 3.406e-11 ***
## intercept  0.0038855  0.0039337  0.9878    0.3233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model9)

## Series: data.dif 
## ARIMA(3,0,3) with non-zero mean 
## 
## Coefficients:
##           ar1     ar2      ar3      ma1      ma2     ma3    mean
##       -0.2959  0.1224  -0.4217  -0.4022  -0.7557  0.3314  0.0044
## s.e.   0.1646  0.1389   0.0816   0.1707   0.1362  0.1436  0.0054
## 
## sigma^2 = 0.4036:  log likelihood = -160.64
## AIC=337.29   AICc=338.19   BIC=362.33
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE         ACF1
## Training set -0.01148596 0.6219606 0.4660784 Inf  Inf 0.4905762 -0.001067397

coeftest(model9)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.2959278  0.1645786 -1.7981   0.07216 .  
## ar2        0.1223986  0.1389416  0.8809   0.37835    
## ar3       -0.4216525  0.0815765 -5.1688 2.356e-07 ***
## ma1       -0.4021696  0.1707087 -2.3559   0.01848 *  
## ma2       -0.7557424  0.1362474 -5.5468 2.909e-08 ***
## ma3        0.3313898  0.1435848  2.3080   0.02100 *  
## intercept  0.0044029  0.0054045  0.8147   0.41526    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model10)

## Series: data.dif 
## ARIMA(5,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5    mean
##       -0.6369  -0.7780  -0.6969  -0.5467  -0.4594  0.0035
## s.e.   0.0684   0.0729   0.0778   0.0733   0.0683  0.0115
## 
## sigma^2 = 0.3797:  log likelihood = -156.02
## AIC=326.04   AICc=326.74   BIC=347.95
## 
## Training set error measures:
##                        ME      RMSE       MAE  MPE MAPE      MASE        ACF1
## Training set -0.006282219 0.6051259 0.4519219 -Inf  Inf 0.4756756 -0.03090491

coeftest(model10)

## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -0.6368975  0.0683826  -9.3137 < 2.2e-16 ***
## ar2       -0.7779711  0.0729434 -10.6654 < 2.2e-16 ***
## ar3       -0.6969115  0.0778069  -8.9569 < 2.2e-16 ***
## ar4       -0.5466870  0.0732836  -7.4599 8.660e-14 ***
## ar5       -0.4594229  0.0683392  -6.7227 1.784e-11 ***
## intercept  0.0035018  0.0114504   0.3058    0.7597    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Overfitting

model10b <- Arima(data.dif, order=c(6,0,0), method="ML")
model10c <- Arima(data.dif, order=c(5,0,1), method = "ML")

summary(model10b)

## Series: data.dif 
## ARIMA(6,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6    mean
##       -0.6694  -0.8163  -0.7459  -0.6014  -0.5047  -0.0702  0.0035
## s.e.   0.0770   0.0841   0.0945   0.0947   0.0844   0.0772  0.0107
## 
## sigma^2 = 0.3801:  log likelihood = -155.61
## AIC=327.22   AICc=328.12   BIC=352.26
## 
## Training set error measures:
##                        ME      RMSE       MAE  MPE MAPE      MASE        ACF1
## Training set -0.006370254 0.6035983 0.4528699 -Inf  Inf 0.4766733 -0.00728627

coeftest(model10b)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.6693654  0.0770169 -8.6912 < 2.2e-16 ***
## ar2       -0.8163052  0.0841224 -9.7038 < 2.2e-16 ***
## ar3       -0.7458703  0.0945334 -7.8900 3.021e-15 ***
## ar4       -0.6014350  0.0947053 -6.3506 2.145e-10 ***
## ar5       -0.5047100  0.0843935 -5.9804 2.225e-09 ***
## ar6       -0.0701510  0.0772427 -0.9082    0.3638    
## intercept  0.0034739  0.0106777  0.3253    0.7449    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model10c)

## Series: data.dif 
## ARIMA(5,0,1) with non-zero mean 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ma1    mean
##       -0.4788  -0.7003  -0.6044  -0.4783  -0.4081  -0.2019  0.0035
## s.e.   0.1521   0.0989   0.1125   0.0944   0.0888   0.1677  0.0102
## 
## sigma^2 = 0.3792:  log likelihood = -155.41
## AIC=326.83   AICc=327.73   BIC=351.87
## 
## Training set error measures:
##                        ME      RMSE       MAE  MPE MAPE      MASE        ACF1
## Training set -0.006561576 0.6028646 0.4529101 -Inf  Inf 0.4767157 0.003862243

coeftest(model10c)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.4787572  0.1520740 -3.1482  0.001643 ** 
## ar2       -0.7002507  0.0988572 -7.0835 1.406e-12 ***
## ar3       -0.6044196  0.1125189 -5.3717 7.799e-08 ***
## ar4       -0.4782802  0.0944448 -5.0641 4.103e-07 ***
## ar5       -0.4080629  0.0887664 -4.5970 4.285e-06 ***
## ma1       -0.2019196  0.1677205 -1.2039  0.228626    
## intercept  0.0034989  0.0102289  0.3421  0.732302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tetap menggunakan model -> ARIMA(5,0,0)

#Diagnostik Sisaan
sisaan <- model10$residuals

#kenormalan
shapiro.test(sisaan)

## 
##  Shapiro-Wilk normality test
## 
## data:  sisaan
## W = 0.96676, p-value = 0.0004503

#sisaan saling bebas
Box.test(sisaan, type = "Ljung")

## 
##  Box-Ljung test
## 
## data:  sisaan
## X-squared = 0.1643, df = 1, p-value = 0.6852

# Forecasting
ramalan <- forecast(Arima(data.ts, order=c(5,1,0),method="ML",include.drift = TRUE),h = length(data.test))

data.ramalan <- ramalan$mean
data.ramalan.ts <- ts(data.ramalan, start = 2006, frequency = 12)
plot(ramalan,col="black",col.sub ="black",col.axis="black",
     col.lab="black",col.main="black",lwd=2)
box(col="black",lwd=2)

hasilforecast<-matrix(data=c(data.ramalan[1:12]), nrow = 12, ncol = 1)
colnames(hasilforecast)<-c("Hasil Forecast")
head(hasilforecast)

##      Hasil Forecast
## [1,]       8.168776
## [2,]       8.901080
## [3,]       8.986021
## [4,]       8.889284
## [5,]       8.912894
## [6,]       8.625313

hasilforecast <- matrix(data=exp(data.ramalan[1:15]), nrow=15, ncol=1)
colnames(hasilforecast) <- c("Hasil Forecast")

# Menampilkan baris pertama dari hasil forecast yang telah dikembalikan ke skala asli
hasilforecast

##       Hasil Forecast
##  [1,]       3529.023
##  [2,]       7339.897
##  [3,]       7990.602
##  [4,]       7253.821
##  [5,]       7427.125
##  [6,]       5570.906
##  [7,]       5054.168
##  [8,]       5879.813
##  [9,]       6602.481
## [10,]       6557.396
## [11,]       6511.253
## [12,]       6209.339
## [13,]       5923.222
## [14,]       5979.447
## [15,]       6207.540

Hasil forecast 15 bulan, sampai akhir 2024 -> awal 2025 dan selanjutnya plot forecast sudah lurus

# Contoh tanggal dalam format "dd/mm/yyyy"
start_date <- as.Date("01/10/2023", format="%d/%m/%Y")

# Membuat vektor tanggal untuk forecast dengan asumsi bahwa forecast dimulai pada bulan berikutnya
forecast_dates <- seq(from=start_date, length.out=15, by="month")

# Menambahkan vektor tanggal ke hasil forecast
hasilforecast <- matrix(data=exp(data.ramalan[1:15]), nrow=15, ncol=1)
colnames(hasilforecast) <- c("Hasil Forecast")
hasilforecast <- cbind(ForecastDate = forecast_dates, hasilforecast)

# Mengubah matrix menjadi data frame
hasilforecast_df <- as.data.frame(hasilforecast)

# Mengkonversi angka menjadi tanggal
hasilforecast_df$ForecastDate <- as.Date(hasilforecast_df$ForecastDate, origin = "1970-01-01")

# Menampilkan hasil forecast dengan tanggal yang benar
print(hasilforecast_df)

##    ForecastDate Hasil Forecast
## 1    2023-10-01       3529.023
## 2    2023-11-01       7339.897
## 3    2023-12-01       7990.602
## 4    2024-01-01       7253.821
## 5    2024-02-01       7427.125
## 6    2024-03-01       5570.906
## 7    2024-04-01       5054.168
## 8    2024-05-01       5879.813
## 9    2024-06-01       6602.481
## 10   2024-07-01       6557.396
## 11   2024-08-01       6511.253
## 12   2024-09-01       6209.339
## 13   2024-10-01       5923.222
## 14   2024-11-01       5979.447
## 15   2024-12-01       6207.540

# Ubah data test menjadi dataframe
data_test_df <- data.frame(Actual = data.test)

# Ubah prediksi menjadi dataframe dan pastikan jumlah barisnya sama dengan data test
data_forecast_df <- data.frame(Forecast = head(data.ramalan, length(data.test)))

ramalan <- data.ramalan[1:43]

calculate_sMAPE <- function(actual, forecast) {
  # Pastikan bahwa panjang vektor aktual dan forecast sama
  if(length(actual) != length(forecast)) {
    stop("The lengths of actual and forecast must be equal")
  }
  
  # Menghindari peringatan atau error ketika ada nilai nol
  smape <- mean(2 * abs(forecast - actual) / (abs(actual) + abs(forecast) + .Machine$double.eps), na.rm = TRUE) * 100
  
  return(smape)
}



# Validasi model 

## Hitung error
error <- data.frame(data.test)-data.frame(data.ramalan[1:43])

## SSE (Sum Square Error)
SSE <- sum(error^2, na.rm = T)

## MSE (Mean Squared Error)
MSE<- sapply(error^2, mean, na.rm = T)

## RMSE (Root Mean Square Error)
RMSE1 <- sqrt(MSE)
# Menghitung rata-rata dari nilai aktual
mean_actual <- mean(data.test, na.rm = TRUE)

# Menghitung persentase RMSE
RMSE <- (RMSE1 / mean_actual) * 100

## MAD (Mean Absolute Deviation)
MAD <- sapply(abs(error), mean, na.rm = T)

## MAPE (Mean Absolute Percentage Error)
r.error <- (error/data.frame(data.test))*100 # Relative Error
#MAPE <- sapply(abs(r.error), mean, na.rm = T)
sMAPE <- calculate_sMAPE(data.test, ramalan)

akurasifarima <- data.frame(
  "Ukuran Keakuratan" = c("SSE", "MSE", "sMAPE", "RMSE", "MAD"), 
  "Forecasting" = c(SSE, MSE, sMAPE, RMSE, MAD))
akurasifarima

##   Ukuran.Keakuratan Forecasting
## 1               SSE  163.090388
## 2               MSE    3.792800
## 3             sMAPE   17.191577
## 4              RMSE   24.481428
## 5               MAD    1.192321

sMAPE 17% RMSE 24%

Transjakarta

Maulana Ahsan Fadillah

2023-11-26