Data import

require(fpp2)
set.seed(123)

setwd("C:/Users/Ryu Uezato/Desktop/Monash/Forecasting/AS2")

dt = read.csv("Visitor Arrivals.csv", header = FALSE)

dt.1 = ts(dt[,-1], start = c(1978,1), end = c(2019,7), frequency = 12)

Phase1 Model Identification

Plotting Full Set

autoplot(dt.1)+
    stat_smooth(color = "#FC4E07", fill = "#FC4E07", method = "loess")+
    ylab("Number of Visitor Arrivals")+
    xlab("Years")+
    ggtitle("Number of Visitors Arrivals in Singapore from 1978 to 2019")+
    theme_light()+
    theme(plot.title = element_text(size = 13))

Correcting outliers

dt = dt[,-1]
dt[12*25+4] = round(mean(dt[c(12*24+4, 12*26+4)]), digits = 0)
dt[12*25+5] = round(mean(dt[c(12*24+5, 12*26+5)]), digits = 0)
dt[12*25+6] = round(mean(dt[c(12*24+6, 12*26+6)]), digits = 0)  #taking average of the same month of a year before and later of outliers month ie mean(2002,4 and 2004,4) etc

dt = ts(dt, start = c(1978,1), end = c(2019,7), frequency = 12) 


autoplot(dt)+
    stat_smooth(color = "#FC4E07", fill = "#FC4E07", method = "loess")+
    ylab("Number of Visitor Arrivals")+
    xlab("Years")+
    ggtitle("Number of Visitors Arrivals in Singapore from 1978 to 2019")+
    theme_light()+
    theme(plot.title = element_text(size = 20))

Chopping Data into train and test

train = head(dt, round(length(dt)*0.8)) %>%  as.ts()
h = length(dt) -length(train)
test = tail(dt, h) %>% as.ts()

autoplot(train, series = "Training Set")+autolayer(test, series = "Test Set")

Plotting trainig set to determine if transformation is needed

autoplot(train)+
    theme_light()+
    stat_smooth(color = "lightgreen", fill = "lightgreen", method = "loess")+
    ylab("Number of Visitor Arrivals")+
    xlab("Years")+
    ggtitle("Number of Visitors Arrivals in Singapore from 1978 to 2011(Training Set)")+
    theme_light()+
    theme(plot.title = element_text(size = 20))

Plotting Seasonal Plot of trainig set

ggseasonplot(train, year.labels = TRUE, year.labels.left = TRUE)+
    ylab("Number of Visitor Arrivals ") +
    xlab("Months") +
    ggtitle("Seasonal plot : Number of Visitors Arrivals in Singapore")  
ggsubseriesplot(train)+
    ylab("Number of Visitor Arrivals")+
    xlab("Years")+
    ggtitle("Subseries plot : Number of Visitors Arrivals in Singapore")
ggAcf(train, lag.max = 48)+
   ggtitle("ACF plot : Number of Visitors Arrivals in Singapore")

Variance and Mean stabilization

lambda.dt = BoxCox.lambda(train)

train.v = BoxCox(train, lambda = lambda.dt)

train.v %>% nsdiffs()
[1] 1
train.v %>% diff(lag = 12) %>% ndiffs()
[1] 0
train.v %>% diff(lag = 12) %>% autoplot()
train.v %>% diff(lag = 12) %>% ggtsdisplay(main = "Seasonally differenced Visitors Arrivals in Singapore ")

Possible Model List

Based on the ggtsdisplay above we may consider

- ARIMA(3,0,0)(3,1,0)[12] with constant - ARIMA(4,0,0)(3,1,0)[12] with constant - ARIMA(3,0,0)(2,1,0)[12] with constant - ARIMA(2,0,0)(2,1,0)[12] with contant


Phase2 Estimation and Testing

Estimating Potential Model

fit1 = Arima(train, order = c(3,0,0), seasonal = c(3,1,0), include.constant = TRUE, lambda=lambda.dt) 
summary(fit1)
Series: train 
ARIMA(3,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     sar1     sar2     sar3   drift
      0.5335  0.2545  0.1306  -0.5219  -0.3968  -0.2349  0.0032
s.e.  0.0505  0.0559  0.0508   0.0508   0.0533   0.0501  0.0008

sigma^2 estimated as 0.001073:  log likelihood=774.73
AIC=-1533.47   AICc=-1533.09   BIC=-1501.8

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -282.8799 27287.52 17644.32 -0.07659795 3.296094 0.4049379 -0.04304853
fit2 = Arima(train, order = c(4,0,0), seasonal = c(3,1,0), include.constant = TRUE, lambda=lambda.dt) 
summary(fit2)
Series: train 
ARIMA(4,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4     sar1     sar2     sar3   drift
      0.5113  0.2094  0.0414  0.1728  -0.5499  -0.4045  -0.2426  0.0033
s.e.  0.0502  0.0566  0.0564  0.0506   0.0510   0.0538   0.0500  0.0009

sigma^2 estimated as 0.001044:  log likelihood=780.47
AIC=-1542.93   AICc=-1542.45   BIC=-1507.31

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -234.7073 26988.03 17538.64 -0.08664446 3.258615 0.4025127 -0.01952831

AutoARIMA

arima.auto = auto.arima(train, stepwise = FALSE, lambda = lambda.dt)
summary(arima.auto)
Series: train 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ma1     sar1     sma1     sma2   drift
      0.9772  -0.466  -0.0497  -0.6165  -0.2078  0.0032
s.e.  0.0123   0.049   0.2772   0.2724   0.2131  0.0006

sigma^2 estimated as 0.0009618:  log likelihood=792.12
AIC=-1570.24   AICc=-1569.95   BIC=-1542.53

Training set error measures:
                  ME     RMSE      MAE         MPE     MAPE      MASE      ACF1
Training set -164.44 25788.03 16733.45 -0.03250302 3.150999 0.3840334 0.0098187
fitness.table = cbind(rbind(fit1[["aicc"]],fit2[["aicc"]], arima.auto[["aicc"]]),
                      rbind(summary(fit1)[,"MAPE"], summary(fit2)[,"MAPE"], summary(arima.auto)[,"MAPE"]),
                      rbind(summary(fit1)[,"MASE"], summary(fit2)[,"MASE"], summary(arima.auto)[,"MASE"]),
                      rbind(summary(fit1)[,"RMSE"], summary(fit2)[,"RMSE"], summary(arima.auto)[,"RMSE"]),
                      rbind("3", "2", "1"))
Series: train 
ARIMA(3,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     sar1     sar2     sar3   drift
      0.5335  0.2545  0.1306  -0.5219  -0.3968  -0.2349  0.0032
s.e.  0.0505  0.0559  0.0508   0.0508   0.0533   0.0501  0.0008

sigma^2 estimated as 0.001073:  log likelihood=774.73
AIC=-1533.47   AICc=-1533.09   BIC=-1501.8

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -282.8799 27287.52 17644.32 -0.07659795 3.296094 0.4049379 -0.04304853
Series: train 
ARIMA(4,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4     sar1     sar2     sar3   drift
      0.5113  0.2094  0.0414  0.1728  -0.5499  -0.4045  -0.2426  0.0033
s.e.  0.0502  0.0566  0.0564  0.0506   0.0510   0.0538   0.0500  0.0009

sigma^2 estimated as 0.001044:  log likelihood=780.47
AIC=-1542.93   AICc=-1542.45   BIC=-1507.31

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -234.7073 26988.03 17538.64 -0.08664446 3.258615 0.4025127 -0.01952831
Series: train 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ma1     sar1     sma1     sma2   drift
      0.9772  -0.466  -0.0497  -0.6165  -0.2078  0.0032
s.e.  0.0123   0.049   0.2772   0.2724   0.2131  0.0006

sigma^2 estimated as 0.0009618:  log likelihood=792.12
AIC=-1570.24   AICc=-1569.95   BIC=-1542.53

Training set error measures:
                  ME     RMSE      MAE         MPE     MAPE      MASE      ACF1
Training set -164.44 25788.03 16733.45 -0.03250302 3.150999 0.3840334 0.0098187
Series: train 
ARIMA(3,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     sar1     sar2     sar3   drift
      0.5335  0.2545  0.1306  -0.5219  -0.3968  -0.2349  0.0032
s.e.  0.0505  0.0559  0.0508   0.0508   0.0533   0.0501  0.0008

sigma^2 estimated as 0.001073:  log likelihood=774.73
AIC=-1533.47   AICc=-1533.09   BIC=-1501.8

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -282.8799 27287.52 17644.32 -0.07659795 3.296094 0.4049379 -0.04304853
Series: train 
ARIMA(4,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4     sar1     sar2     sar3   drift
      0.5113  0.2094  0.0414  0.1728  -0.5499  -0.4045  -0.2426  0.0033
s.e.  0.0502  0.0566  0.0564  0.0506   0.0510   0.0538   0.0500  0.0009

sigma^2 estimated as 0.001044:  log likelihood=780.47
AIC=-1542.93   AICc=-1542.45   BIC=-1507.31

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -234.7073 26988.03 17538.64 -0.08664446 3.258615 0.4025127 -0.01952831
Series: train 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ma1     sar1     sma1     sma2   drift
      0.9772  -0.466  -0.0497  -0.6165  -0.2078  0.0032
s.e.  0.0123   0.049   0.2772   0.2724   0.2131  0.0006

sigma^2 estimated as 0.0009618:  log likelihood=792.12
AIC=-1570.24   AICc=-1569.95   BIC=-1542.53

Training set error measures:
                  ME     RMSE      MAE         MPE     MAPE      MASE      ACF1
Training set -164.44 25788.03 16733.45 -0.03250302 3.150999 0.3840334 0.0098187
Series: train 
ARIMA(3,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     sar1     sar2     sar3   drift
      0.5335  0.2545  0.1306  -0.5219  -0.3968  -0.2349  0.0032
s.e.  0.0505  0.0559  0.0508   0.0508   0.0533   0.0501  0.0008

sigma^2 estimated as 0.001073:  log likelihood=774.73
AIC=-1533.47   AICc=-1533.09   BIC=-1501.8

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -282.8799 27287.52 17644.32 -0.07659795 3.296094 0.4049379 -0.04304853
Series: train 
ARIMA(4,0,0)(3,1,0)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4     sar1     sar2     sar3   drift
      0.5113  0.2094  0.0414  0.1728  -0.5499  -0.4045  -0.2426  0.0033
s.e.  0.0502  0.0566  0.0564  0.0506   0.0510   0.0538   0.0500  0.0009

sigma^2 estimated as 0.001044:  log likelihood=780.47
AIC=-1542.93   AICc=-1542.45   BIC=-1507.31

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -234.7073 26988.03 17538.64 -0.08664446 3.258615 0.4025127 -0.01952831
Series: train 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ma1     sar1     sma1     sma2   drift
      0.9772  -0.466  -0.0497  -0.6165  -0.2078  0.0032
s.e.  0.0123   0.049   0.2772   0.2724   0.2131  0.0006

sigma^2 estimated as 0.0009618:  log likelihood=792.12
AIC=-1570.24   AICc=-1569.95   BIC=-1542.53

Training set error measures:
                  ME     RMSE      MAE         MPE     MAPE      MASE      ACF1
Training set -164.44 25788.03 16733.45 -0.03250302 3.150999 0.3840334 0.0098187
colnames(fitness.table) = c("AICc", "MAPE", "MASE","RMSE", "Ranking") ; rownames(fitness.table) = c("ARIMA(3,0,0)(3,1,0)[12] with constant","ARIMA(4,0,0)(3,1,0)[12] with constant","ARIMA(1,0,1)(1,1,2)[12] with constant")

fitness.table
                                      AICc                MAPE               MASE               
ARIMA(3,0,0)(3,1,0)[12] with constant "-1533.08885564579" "3.29609407994998" "0.404937873265623"
ARIMA(4,0,0)(3,1,0)[12] with constant "-1542.45459694574" "3.25861482846603" "0.402512659770671"
ARIMA(1,0,1)(1,1,2)[12] with constant "-1569.94713575454" "3.15099869411926" "0.384033387016071"
                                      RMSE               Ranking
ARIMA(3,0,0)(3,1,0)[12] with constant "27287.5191750791" "3"    
ARIMA(4,0,0)(3,1,0)[12] with constant "26988.0313059203" "2"    
ARIMA(1,0,1)(1,1,2)[12] with constant "25788.0262804677" "1"    

Residual diagnosis

checkresiduals(fit2)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,0)(3,1,0)[12] with drift
Q* = 29.649, df = 16, p-value = 0.01991

Model df: 8.   Total lags used: 24
checkresiduals(arima.auto)

    Ljung-Box test

data:  Residuals from ARIMA(1,0,1)(1,1,2)[12] with drift
Q* = 30.113, df = 18, p-value = 0.03637

Model df: 6.   Total lags used: 24

Re-indentification

fit2 %>% resid() %>% ggtsdisplay(main = "Residuals from ARIMA(4,0,0)(3,1,0)[12] with constant")
arima.auto %>% residuals() %>% ggtsdisplay(main = "Residuals from ARIMA(1,0,1)(1,1,2)[12] with constant")

Based on ggtsdisplay of resid, increasing MA non

#ARIMA(4,0,0)(3,1,0)[12] with constant

fit2.2 = Arima(train, order = c(4,0,0), seasonal = c(3,1,3), include.constant = TRUE, lambda=lambda.dt)
checkresiduals(fit2.2)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,0)(3,1,3)[12] with drift
Q* = 27.52, df = 13, p-value = 0.01055

Model df: 11.   Total lags used: 24
ggtsdisplay(resid(fit2.2), main = "Residuals from ARIMA(4,0,0)(3,1,3)[12] with constant") 
summary(fit2.2)
Series: train 
ARIMA(4,0,0)(3,1,3)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4     sar1     sar2    sar3     sma1    sma2     sma3   drift
      0.5289  0.2011  0.0568  0.1701  -0.4740  -0.7426  0.0966  -0.1981  0.2757  -0.6989  0.0032
s.e.  0.0511  0.0579  0.0573  0.0507   0.4604   0.2849  0.1157   0.4554  0.4907   0.2419  0.0006

sigma^2 estimated as 0.0009604:  log likelihood=794.98
AIC=-1565.95   AICc=-1565.12   BIC=-1518.45

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE     MASE        ACF1
Training set -148.5519 25564.33 16820.18 -0.03497417 3.149243 0.386024 -0.02552786
fit2.3 = Arima(train, order = c(4,0,5), seasonal = c(3,1,3), include.constant = TRUE, lambda=lambda.dt) 
checkresiduals(fit2.3)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,5)(3,1,3)[12] with drift
Q* = 21.844, df = 8, p-value = 0.005213

Model df: 16.   Total lags used: 24
ggtsdisplay(resid(fit2.3), main = "Residuals from ARIMA(4,0,5)(3,1,3)[12] with constant")
summary(fit2.3)
Series: train 
ARIMA(4,0,5)(3,1,3)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
          ar1     ar2     ar3     ar4     ma1     ma2     ma3      ma4     ma5     sar1     sar2
      -0.3883  0.2151  0.3876  0.6888  0.9290  0.4842  0.1130  -0.3640  0.0781  -0.6467  -0.7309
s.e.   0.2087  0.1161  0.1986  0.1160  0.2164  0.1875  0.1298   0.0906  0.0648   0.5396   0.2061
        sar3     sma1    sma2     sma3   drift
      0.1020  -0.0136  0.1755  -0.7216  0.0032
s.e.  0.1202   0.5296  0.2699   0.2854  0.0006

sigma^2 estimated as 0.0009435:  log likelihood=799.51
AIC=-1565.03   AICc=-1563.37   BIC=-1497.73

Training set error measures:
                   ME     RMSE      MAE         MPE     MAPE      MASE         ACF1
Training set -127.131 25050.02 16393.95 -0.03786575 3.087644 0.3762419 -0.007796833
fit2.3.2 = Arima(train, order = c(6,0,5), seasonal = c(3,1,3), include.constant = TRUE, lambda=lambda.dt) 
checkresiduals(fit2.3.2)

    Ljung-Box test

data:  Residuals from ARIMA(6,0,5)(3,1,3)[12] with drift
Q* = 16.457, df = 6, p-value = 0.0115

Model df: 18.   Total lags used: 24
ggtsdisplay(resid(fit2.3.2), main = "Residuals from ARIMA(6,0,5)(3,1,3)[12] with constant")
summary(fit2.3.2)
Series: train 
ARIMA(6,0,5)(3,1,3)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1     ar2     ar3     ar4      ar5     ar6     ma1      ma2      ma3      ma4     ma5
      0.2823  0.5319  0.4120  0.1175  -0.6516  0.2486  0.2559  -0.2444  -0.4140  -0.2310  0.6826
s.e.  0.1342  0.1152  0.1656  0.1280   0.1413  0.1029  0.1163   0.1357   0.0901   0.1125  0.1113
         sar1     sar2    sar3     sma1    sma2     sma3   drift
      -0.4843  -0.7138  0.0780  -0.1673  0.2239  -0.6452  0.0031
s.e.   0.5420   0.1825  0.1127   0.5403  0.3854   0.2154  0.0005

sigma^2 estimated as 0.0009264:  log likelihood=803.7
AIC=-1569.41   AICc=-1567.34   BIC=-1494.2

Training set error measures:
                    ME     RMSE      MAE         MPE     MAPE      MASE        ACF1
Training set -211.9534 24866.09 16204.16 -0.01465611 3.037107 0.3718862 -0.02837137
fit2.4 = Arima(train, order = c(4,0,5), seasonal = c(3,1,0), include.constant = TRUE, lambda=lambda.dt) 
checkresiduals(fit2.4)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,5)(3,1,0)[12] with drift
Q* = 18.187, df = 11, p-value = 0.07735

Model df: 13.   Total lags used: 24
ggtsdisplay(resid(fit2.4), main = "Residuals from ARIMA(4,0,5)(3,1,0)[12] with constant")## Would be the best
summary(2.4)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    2.4     2.4     2.4     2.4     2.4     2.4 

Residuals Check

checkresiduals(fit2.4, lag = 24)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,5)(3,1,0)[12] with drift
Q* = 18.187, df = 11, p-value = 0.07735

Model df: 13.   Total lags used: 24
checkresiduals(fit2.4, lag = 36)

    Ljung-Box test

data:  Residuals from ARIMA(4,0,5)(3,1,0)[12] with drift
Q* = 23.828, df = 23, p-value = 0.4134

Model df: 13.   Total lags used: 36
checkresiduals(arima.auto, lag = 24)

    Ljung-Box test

data:  Residuals from ARIMA(1,0,1)(1,1,2)[12] with drift
Q* = 30.113, df = 18, p-value = 0.03637

Model df: 6.   Total lags used: 24
checkresiduals(arima.auto, lag = 36)

    Ljung-Box test

data:  Residuals from ARIMA(1,0,1)(1,1,2)[12] with drift
Q* = 34.717, df = 30, p-value = 0.2531

Model df: 6.   Total lags used: 36

We may select ARIMA(4,0,5)(3,1,0)[12] with drift and ARIMA(1,0,1)(1,1,2)[12] with drift

Information and AICc

in.sample.table = rbind(cbind(accuracy(fit2.4)[,"MASE"],accuracy(arima.auto)[,"MASE"]),cbind(accuracy(fit2.4)[,"MAPE"],accuracy(arima.auto)[,"MAPE"]),cbind(fit2.4[["aicc"]],arima.auto[["aicc"]]))

rownames(in.sample.table) = c("MASE", "MAPE", "AICc") ; colnames(in.sample.table) = c("ARIMA(4,0,5)(3,1,0)[12] with constant","ARIMA(1,0,1)(1,1,2)[12] with constant")

in.sample.table
     ARIMA(4,0,5)(3,1,0)[12] with constant ARIMA(1,0,1)(1,1,2)[12] with constant
MASE                              0.393111                             0.3840334
MAPE                              3.197522                             3.1509987
AICc                          -1548.023305                         -1569.9471358

Out-sample accuracy

arima.f = forecast(fit2.4, h = length(test))

arima.auto.f = forecast(arima.auto, h = length(test))

ac.out.table = rbind(cbind(accuracy(arima.f, test)[2,"MASE"], accuracy(arima.auto.f, test)[2,"MASE"]),
      cbind(accuracy(arima.f, test)[2,"MAPE"], accuracy(arima.auto.f, test)[2,"MAPE"]),
      cbind(accuracy(arima.f, test)[2,"RMSE"], accuracy(arima.auto.f, test)[2,"RMSE"]))

rownames(ac.out.table) = c("MASE", "MAPE", "RMSE"); colnames(ac.out.table) = c("ARIMA(4,0,5)(3,1,0)[12] with constant", "ARIMA(1,0,1)(1,1,2)[12] with constant")

ac.out.table
     ARIMA(4,0,5)(3,1,0)[12] with constant ARIMA(1,0,1)(1,1,2)[12] with constant
MASE                              1.713150                              1.561988
MAPE                              5.674618                              5.194949
RMSE                          92675.919367                          85807.839740

Estimating with full data set

arima.full = Arima(dt, order = c(1,0,1), seasonal = c(1,1,2), include.constant = TRUE, lambda=lambda.dt)
summary(arima.full)
Series: dt 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1      ma1    sar1     sma1     sma2   drift
      0.9713  -0.4539  0.0985  -0.7296  -0.1151  0.0031
s.e.  0.0125   0.0458  0.2213   0.2207   0.1706  0.0004

sigma^2 estimated as 0.0008879:  log likelihood=1017.37
AIC=-2020.73   AICc=-2020.5   BIC=-1991.41

Training set error measures:
                    ME    RMSE      MAE         MPE     MAPE      MASE       ACF1
Training set -116.3212 31300.3 21016.25 -0.02771271 3.097895 0.4053521 0.02570216

H=24

arima.fcast = forecast(arima.full, h = 24)
summary(arima.fcast)

Forecast method: ARIMA(1,0,1)(1,1,2)[12] with drift

Model Information:
Series: dt 
ARIMA(1,0,1)(1,1,2)[12] with drift 
Box Cox transformation: lambda= -0.02807425 

Coefficients:
         ar1      ma1    sar1     sma1     sma2   drift
      0.9713  -0.4539  0.0985  -0.7296  -0.1151  0.0031
s.e.  0.0125   0.0458  0.2213   0.2207   0.1706  0.0004

sigma^2 estimated as 0.0008879:  log likelihood=1017.37
AIC=-2020.73   AICc=-2020.5   BIC=-1991.41

Error measures:
                    ME    RMSE      MAE         MPE     MAPE      MASE       ACF1
Training set -116.3212 31300.3 21016.25 -0.02771271 3.097895 0.4053521 0.02570216

Forecasts:
         Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
Aug 2019        1733765 1637522 1835833 1588815 1892345
Sep 2019        1449643 1359807 1545592 1314588 1599004
Oct 2019        1541912 1437420 1654229 1385080 1717058
Nov 2019        1500204 1391235 1617967 1336878 1684114
Dec 2019        1728199 1594694 1873221 1528360 1954999
Jan 2020        1685566 1548951 1834599 1481293 1918912
Feb 2020        1571014 1438511 1716098 1373074 1798408
Mar 2020        1674959 1528228 1836213 1455970 1927953
Apr 2020        1665982 1515350 1832051 1441351 1926760
May 2020        1596291 1448017 1760218 1375333 1853906
Jun 2020        1624929 1470095 1796576 1394355 1894881
Jul 2020        1897680 1711989 2104140 1621358 2222647
Aug 2020        1828882 1636223 2044939 1542809 2169769
Sep 2020        1527538 1361367 1714633 1281041 1823058
Oct 2020        1635466 1451382 1843637 1362697 1964679
Nov 2020        1598244 1413262 1808210 1324396 1930640
Dec 2020        1844850 1625016 2095374 1519732 2241898
Jan 2021        1781932 1564923 2029997 1461238 2175424
Feb 2021        1666372 1459619 1903353 1361037 2042568
Mar 2021        1786135 1559988 2046123 1452407 2199201
Apr 2021        1763130 1536207 2024656 1428462 2178930
May 2021        1701513 1479366 1958098 1374066 2109711
Jun 2021        1722773 1494625 1986875 1386667 2143195
Jul 2021        2012332 1741328 2326885 1613351 2513446
autoplot(arima.fcast, level = 95)+
    xlab("Years")+
    ylab("Number of Visitor Arrivals")+
    theme_light()+
    theme(legend.title = element_text(colour = "black", size = 25),
          legend.text = element_text(colour = "black", size = 25))+
    theme(legend.position = c(0.8,0.2))+
    ggtitle("Forecasts of Number of Visitor Arrivals in Singapore for the Next 24 Months")+
    theme(plot.title = element_text(size = 30))

Comparison with an ETS model

ets.model = ets(dt, model = "ZZZ", lambda = lambda.dt)
summary(ets.model)
ETS(A,Ad,A) 

Call:
 ets(y = dt, model = "ZZZ", lambda = lambda.dt) 

  Box-Cox transformation: lambda= -0.0281 

  Smoothing parameters:
    alpha = 0.4465 
    beta  = 0.0121 
    gamma = 0.1769 
    phi   = 0.98 

  Initial states:
    l = 10.1692 
    b = 0.0083 
    s = 0.0572 0.0113 5e-04 -0.0413 0.0885 0.0388
           -0.0401 -0.0249 -0.0297 -0.0111 -0.0544 0.0052

  sigma:  0.0307

      AIC      AICc       BIC 
-359.1399 -357.7149 -283.3130 

Training set error measures:
                  ME     RMSE     MAE       MPE     MAPE      MASE       ACF1
Training set 2757.32 33397.09 22515.4 0.2546606 3.224471 0.4342672 0.08679948
ets.fcast = forecast(ets.model, h = 24)
summary(ets.fcast)

Forecast method: ETS(A,Ad,A)

Model Information:
ETS(A,Ad,A) 

Call:
 ets(y = dt, model = "ZZZ", lambda = lambda.dt) 

  Box-Cox transformation: lambda= -0.0281 

  Smoothing parameters:
    alpha = 0.4465 
    beta  = 0.0121 
    gamma = 0.1769 
    phi   = 0.98 

  Initial states:
    l = 10.1692 
    b = 0.0083 
    s = 0.0572 0.0113 5e-04 -0.0413 0.0885 0.0388
           -0.0401 -0.0249 -0.0297 -0.0111 -0.0544 0.0052

  sigma:  0.0307

      AIC      AICc       BIC 
-359.1399 -357.7149 -283.3130 

Error measures:
                  ME     RMSE     MAE       MPE     MAPE      MASE       ACF1
Training set 2757.32 33397.09 22515.4 0.2546606 3.224471 0.4342672 0.08679948

Forecasts:
         Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
Aug 2019        1711062 1613457 1814747 1564124 1872228
Sep 2019        1413012 1325068 1506968 1280811 1559282
Oct 2019        1524473 1421379 1635271 1369733 1697242
Nov 2019        1503998 1394708 1622113 1340192 1688458
Dec 2019        1755231 1618500 1903865 1550601 1987726
Jan 2020        1706815 1565772 1860954 1496015 1948273
Feb 2020        1572266 1435277 1722732 1367788 1808301
Mar 2020        1659466 1507061 1827762 1432284 1923859
Apr 2020        1633242 1475959 1807807 1399089 1907871
May 2020        1559453 1402567 1734436 1326180 1835116
Jun 2020        1577367 1411762 1763009 1331440 1870233
Jul 2020        1840986 1638975 2068682 1541407 2200746
Aug 2020        1753137 1548459 1985731 1450221 2121478
Sep 2020        1446870 1272931 1645337 1189706 1761525
Oct 2020        1560340 1366107 1783077 1273530 1913971
Nov 2020        1538654 1341058 1766306 1247213 1900564
Dec 2020        1795032 1556463 2071354 1443622 2234978
Jan 2021        1744705 1506145 2022284 1393702 2187224
Feb 2021        1606383 1380965 1869803 1275068 2026842
Mar 2021        1694802 1450106 1982149 1335583 2154090
Apr 2021        1667301 1420299 1958678 1305101 2133636
May 2021        1591274 1349788 1877397 1237542 2049774
Jun 2021        1608913 1358631 1906839 1242711 2086961
Jul 2021        1877219 1577011 2236491 1438539 2454589
autoplot(dt)+
    autolayer(arima.fcast$mean, lwd = 1.0, series = "ARIMA Forecast")+
    autolayer(ets.fcast[["mean"]], lwd = 1.0, lty = 2, series = "ETS Forecast")+
    theme_light()+
    theme(legend.title = element_text(colour = "black", size = 25),
          legend.text = element_text(colour = "black", size = 25))+
    theme(legend.position = c(0.8,0.2))+
    xlab("Years")+
    ylab("Number of Visitor Arrivals")+
    ggtitle("Forecasts from 2 models")+
    theme(plot.title = element_text(size = 30))

16/10/2019 consultation

  • autoARIMA better goodness of fit not white noise at lag = 24,
  • autoARIMA ggtsdisplay may not able to updata
  • Manual ARIMA sightly worse goodness of fit, but whitenoise(but complex model)