library(ggplot2)
library(forecast)
library(tseries)
library(gridExtra)
library(docstring)
## 
## Attaching package: 'docstring'
## The following object is masked from 'package:utils':
## 
##     ?
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(xts)
library(timeSeries)
## Loading required package: timeDate
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
library(zoo)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:timeSeries':
## 
##     outlier
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(psych)
library(expsmooth)
library(fma)
library(fpp2)
library(fGarch)
## Loading required package: fBasics
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:psych':
## 
##     tr
## The following object is masked from 'package:TTR':
## 
##     volatility

We use the “monthly air passenger miles in US” from 1960 to 1977, and evalute the multiple models selections includes Auto ARIMA, ETS models. Firstly, there are 216 variables(one month worth) and these datasets would be converted into time series for the further analysis.

setwd ("~/desktop/ADEC7460.02 Spring 2019 Predictive Analytics/Dataset")
airps= read.csv("monthly-us-air-passenger-miles-j.csv")
colnames(airps)<-c("Month","monthlypassenger")
airps<-airps[-217,]
airps_dataTS <- ts(airps$monthlypassenger, start=c(1960, 1), end=c(1977,12), frequency=12)
describe(airps)
##                   vars   n   mean    sd median trimmed   mad min max range
## Month*               1 216 108.50 62.50  108.5  108.50 80.06   1 216   215
## monthlypassenger*    2 216  99.59 53.17  104.5  100.93 68.94   2 185   183
##                    skew kurtosis   se
## Month*             0.00    -1.22 4.25
## monthlypassenger* -0.17    -1.23 3.62
ts.plot(airps_dataTS)

The historical data show the seasonality.

#Classical additive decomposition 
airps_dataTS %>% decompose(type="additive") %>% 
  autoplot() + xlab("Year") +
  ggtitle("Classical additive decomposition of Monthly Air Passenger Miles")

#Moving Average Order=5
ma(airps_dataTS,5)
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
## 1960    NA    NA  97.6 100.6 104.8 109.2 109.6 110.2 105.6 101.6  97.6
## 1961  75.8  78.0  78.8  83.4 105.2 109.0 109.6 111.2 106.8 105.6 103.8
## 1962 102.8 106.8 107.8 111.0 114.2 116.4 115.6 115.4 111.0 110.4 109.2
## 1963 109.0 113.6 116.4 119.8 125.6 128.8 129.0 130.2 127.6 127.2 126.0
## 1964 126.4 128.6 129.4 132.2 135.6 138.4 139.0 139.8 136.8 136.2 134.8
## 1965 134.4 137.6 138.4 140.4 144.6 147.6 148.2 149.4 147.6 147.4 146.4
## 1966 147.0 150.0 150.6 152.8 152.2 151.2 151.2 151.4 150.0 153.2 155.0
## 1967 155.6 156.8 156.6 159.0 162.6 165.0 165.8 166.4 164.4 163.6 161.6
## 1968 162.0 164.0 163.8 166.2 169.2 172.0 171.8 172.2 169.6 169.6 167.8
## 1969 167.8 170.0 169.6 171.0 174.2 176.6 176.2 175.8 173.8 173.4 138.0
## 1970 107.4 110.8  77.4  82.4  56.6  67.2  32.6  67.8  98.0  90.2  79.4
## 1971  77.2  44.2  43.8  47.6  22.0  33.0  32.8  33.8  29.2  23.0  15.4
## 1972  15.2  20.4  20.8  28.2  41.0  51.4  52.0  53.2  45.4  40.2  31.4
## 1973  30.0  35.6  36.2  43.0  55.8  64.2  63.8  62.2  53.4  46.6  38.4
## 1974  36.8  43.0  44.6  50.2  61.2  66.6  63.0  59.6  49.2  43.8  34.2
## 1975  32.4  34.2  32.6  39.0  52.2  59.6  62.0  64.0  56.4  51.8  46.0
## 1976  47.6  54.8  56.2  61.0  71.0  76.6  75.8  75.6  67.8  64.6  60.6
## 1977  59.2  65.4  65.6  68.8  78.4  81.6  82.2  83.8  80.0  79.2    NA
##        Dec
## 1960  76.8
## 1961 101.6
## 1962 107.0
## 1963 125.4
## 1964 134.0
## 1965 146.0
## 1966 154.2
## 1967 161.0
## 1968 166.8
## 1969 139.8
## 1970 113.4
## 1971  13.2
## 1972  27.8
## 1973  33.4
## 1974  29.6
## 1975  45.6
## 1976  56.8
## 1977    NA
autoplot(airps_dataTS, series="Data") +
  autolayer(ma(airps_dataTS,5), series="5-MA") +
  xlab("Year") + 
  ylab("Monthly Air Passenger Miles") +
    ggtitle("Monthly Air Passenger Miles From 1960 to 1977")
## Warning: Removed 4 rows containing missing values (geom_path).

#Moving Average Order=12 NOTE: For Even Order numnber, we need to apply moving average of moving average
ma(airps_dataTS,12,centre=TRUE)
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1960        NA        NA        NA        NA        NA        NA 101.75000
## 1961  94.33333  94.33333  94.37500  94.41667  94.70833  95.41667  96.45833
## 1962 108.45833 108.62500 109.12500 109.58333 110.00000 110.33333 110.75000
## 1963 115.58333 116.95833 118.08333 119.41667 120.91667 122.54167 123.95833
## 1964 130.08333 130.95833 131.75000 132.45833 133.16667 133.87500 134.66667
## 1965 138.95833 139.70833 140.54167 141.54167 142.50000 143.54167 144.45833
## 1966 149.62500 149.00000 149.08333 149.70833 150.50000 151.33333 152.00000
## 1967 156.04167 158.25000 159.62500 160.16667 160.66667 161.20833 161.79167
## 1968 165.29167 165.75000 166.20833 166.66667 167.08333 167.62500 168.29167
## 1969 170.75000 171.08333 171.41667 171.75000 172.12500 172.41667 165.66667
## 1970 116.62500 106.25000  94.50000  88.45833  89.70833  83.50000  76.70833
## 1971  62.33333  62.33333  62.45833  55.58333  41.04167  34.00000  35.00000
## 1972  28.16667  29.58333  31.00000  32.29167  33.58333  35.12500  36.50000
## 1973  43.54167  44.16667  44.95833  45.83333  46.33333  46.54167  46.95833
## 1974  50.20833  50.04167  49.62500  49.12500  48.79167  48.75000  48.58333
## 1975  43.75000  44.08333  44.37500  45.12500  46.16667  46.91667  48.20833
## 1976  58.66667  59.08333  60.04167  61.41667  62.50000  63.70833  64.79167
## 1977  68.58333  68.75000  69.50000  70.91667  72.62500  74.16667        NA
##            Aug       Sep       Oct       Nov       Dec
## 1960  97.83333  94.29167  94.41667  94.20833  94.25000
## 1961 100.83333 105.20833 106.33333 107.45833 108.25000
## 1962 111.37500 112.00000 112.70833 113.54167 114.37500
## 1963 125.41667 126.87500 127.66667 128.37500 129.25000
## 1964 135.37500 135.91667 136.70833 137.62500 138.25000
## 1965 145.45833 146.66667 147.70833 148.66667 149.58333
## 1966 152.58333 153.20833 153.70833 154.00000 154.54167
## 1967 162.41667 162.91667 163.50000 164.20833 164.79167
## 1968 168.70833 169.08333 169.54167 170.08333 170.50000
## 1969 159.58333 153.54167 147.20833 140.87500 128.00000
## 1970  76.79167  76.66667  69.54167  62.62500  62.45833
## 1971  28.25000  21.79167  23.16667  24.25000  26.16667
## 1972  37.37500  38.41667  39.83333  41.50000  42.79167
## 1973  47.58333  48.41667  49.29167  49.83333  50.20833
## 1974  47.95833  47.41667  46.04167  44.41667  43.75000
## 1975  50.16667  51.62500  53.62500  56.12500  57.70833
## 1976  65.41667  66.16667  67.12500  67.87500  68.37500
## 1977        NA        NA        NA        NA        NA
autoplot(airps_dataTS, series="Data") +
  autolayer(ma(airps_dataTS,12,centre=TRUE), series="12-MA") +
  xlab("Year") + 
  ylab("Monthly Air Passenger Miles") +
  ggtitle("Monthly Air Passenger Miles From 1960 to 1977")
## Warning: Removed 12 rows containing missing values (geom_path).

#Classical multiplicative decomposition
airps_dataTS %>% decompose(type="multiplicative") %>% 
  autoplot() + xlab("Year") +
  ggtitle("Classical multiplicative decomposition of Monthly Passengers Miles")

#Comparing Two Decompistion
mult<-decompose(airps_dataTS,type="multiplicative")
error.m<-(na.omit(mult$random)-1)*na.omit(mult$trend)

add<-decompose(airps_dataTS,type="additive")
error.a<-na.omit(add$random)

MAE.m<-sum(abs(error.m))
MAE.a<-sum(abs(error.a))

MAE.m
## [1] 3801.3
MAE.a
## [1] 2810.097
#Arima
airps.arima<-auto.arima(airps_dataTS)
summary(airps.arima)
## Series: airps_dataTS 
## ARIMA(0,1,2)(0,0,1)[12] 
## 
## Coefficients:
##           ma1     ma2    sma1
##       -0.8413  0.1155  0.2655
## s.e.   0.0663  0.0725  0.0628
## 
## sigma^2 estimated as 833:  log likelihood=-1027.37
## AIC=2062.74   AICc=2062.93   BIC=2076.22
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.197984 28.59294 15.60635 -79.85802 97.62649 0.8154957
##                      ACF1
## Training set -0.005199522
autoplot(forecast(airps.arima,h=24))

airps.arima2<-auto.arima(airps_dataTS, lambda = BoxCox.lambda(airps_dataTS))
summary(airps.arima2)
## Series: airps_dataTS 
## ARIMA(0,1,1)(0,0,1)[12] 
## Box Cox transformation: lambda= 1.999945 
## 
## Coefficients:
##           ma1    sma1
##       -0.7473  0.2263
## s.e.   0.0430  0.0644
## 
## sigma^2 estimated as 7118607:  log likelihood=-2000.94
## AIC=4007.89   AICc=4008   BIC=4018
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -3.16319 32.79692 17.43888 -103.5352 124.9229 0.911253
##                    ACF1
## Training set 0.09063563
autoplot(forecast(airps.arima2,h=24))

accuracy(airps.arima)
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.197984 28.59294 15.60635 -79.85802 97.62649 0.8154957
##                      ACF1
## Training set -0.005199522
accuracy(airps.arima2)
##                    ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -3.16319 32.79692 17.43888 -103.5352 124.9229 0.911253
##                    ACF1
## Training set 0.09063563

The results implies that two ARIMA models had a descent fit, but the ARIMA model without BoxCox has better performance. Next we try to work on ETS model.

#ETS 
airps.ETS<-ets(airps_dataTS,model="ZZZ")
summary(airps.ETS)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = airps_dataTS, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.2476 
## 
##   Initial states:
##     l = 99.0502 
## 
##   sigma:  30.1023
## 
##      AIC     AICc      BIC 
## 2635.839 2635.952 2645.964 
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
##                     ACF1
## Training set -0.09176193
plot(airps.ETS)

autoplot(forecast(airps.ETS,h=24))

airps.ETS2<-ets(airps_dataTS,model="ANN")
summary(airps.ETS2)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = airps_dataTS, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.2476 
## 
##   Initial states:
##     l = 99.0502 
## 
##   sigma:  30.1023
## 
##      AIC     AICc      BIC 
## 2635.839 2635.952 2645.964 
## 
## Training set error measures:
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
##                     ACF1
## Training set -0.09176193
plot(airps.ETS2)

autoplot(forecast(airps.ETS2,h=24))

airps.ETS3<-ets(airps_dataTS, lambda= BoxCox.lambda(airps_dataTS))
summary(airps.ETS3)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = airps_dataTS, lambda = BoxCox.lambda(airps_dataTS)) 
## 
##   Box-Cox transformation: lambda= 1.9999 
## 
##   Smoothing parameters:
##     alpha = 0.2632 
## 
##   Initial states:
##     l = 5051.8074 
## 
##   sigma:  2736.576
## 
##      AIC     AICc      BIC 
## 4584.099 4584.212 4594.225 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -4.640254 32.49046 17.93155 -108.1264 120.9974 0.9369967
##                   ACF1
## Training set 0.1013107
plot(airps.ETS3)

autoplot(forecast(airps.ETS3,h=24))

accuracy(airps.ETS)
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
##                     ACF1
## Training set -0.09176193
accuracy(airps.ETS2)
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
##                     ACF1
## Training set -0.09176193
accuracy(airps.ETS3)
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -4.640254 32.49046 17.93155 -108.1264 120.9974 0.9369967
##                   ACF1
## Training set 0.1013107

Comparing three ETS models, we found that ‘ZZZ’ model is outperformed than others.

airps.GARCH <- garchFit(formula = ~ garch(1, 1),data =airps_dataTS)
## 
## Series Initialization:
##  ARMA Model:                arma
##  Formula Mean:              ~ arma(0, 0)
##  GARCH Model:               garch
##  Formula Variance:          ~ garch(1, 1)
##  ARMA Order:                0 0
##  Max ARMA Order:            0
##  GARCH Order:               1 1
##  Max GARCH Order:           1
##  Maximum Order:             1
##  Conditional Dist:          norm
##  h.start:                   2
##  llh.start:                 1
##  Length of Series:          216
##  Recursion Init:            mci
##  Series Scale:              53.16575
## 
## Parameter Initialization:
##  Initial Parameters:          $params
##  Limits of Transformations:   $U, $V
##  Which Parameters are Fixed?  $includes
##  Parameter Matrix:
##                       U        V  params includes
##     mu     -18.73160157  18.7316 1.87316     TRUE
##     omega    0.00000100 100.0000 0.10000     TRUE
##     alpha1   0.00000001   1.0000 0.10000     TRUE
##     gamma1  -0.99999999   1.0000 0.10000    FALSE
##     beta1    0.00000001   1.0000 0.80000     TRUE
##     delta    0.00000000   2.0000 2.00000    FALSE
##     skew     0.10000000  10.0000 1.00000    FALSE
##     shape    1.00000000  10.0000 4.00000    FALSE
##  Index List of Parameters to be Optimized:
##     mu  omega alpha1  beta1 
##      1      2      3      5 
##  Persistence:                  0.9 
## 
## 
## --- START OF TRACE ---
## Selected Algorithm: nlminb 
## 
## R coded nlminb Solver: 
## 
##   0:     286.27306:  1.87316 0.100000 0.100000 0.800000
##   1:     282.59333:  1.87940 0.0736107 0.104496 0.787136
##   2:     276.05944:  1.91915 0.00908923 0.201788 0.801696
##   3:     275.36967:  1.92640 0.0221006 0.200290 0.803029
##   4:     273.98431:  1.94567 0.0166375 0.195589 0.797506
##   5:     271.52909:  2.03363 0.0269211 0.173062 0.783538
##   6:     270.85782:  2.04172 0.0184543 0.175323 0.783511
##   7:     270.26796:  2.05466 0.0201054 0.180501 0.787783
##   8:     269.19086:  2.08395 0.00908984 0.184461 0.788426
##   9:     263.63340:  2.27226 1.00000e-06 0.221820 0.819263
##  10:     260.74689:  2.43440 4.86571e-05 0.241937 0.739646
##  11:     259.44504:  2.49851 0.00573378 0.253019 0.702134
##  12:     258.70040:  2.43402 0.00552145 0.243707 0.740364
##  13:     258.19380:  2.46061 1.00000e-06 0.287686 0.730112
##  14:     257.61939:  2.50635 0.00443809 0.327779 0.707524
##  15:     257.34639:  2.55322 0.00252667 0.364781 0.680421
##  16:     256.72188:  2.52495 1.00000e-06 0.357853 0.669149
##  17:     256.05833:  2.50948 0.00296849 0.349375 0.651116
##  18:     255.38815:  2.49763 0.00318699 0.411762 0.627037
##  19:     254.02348:  2.45078 0.00376548 0.521211 0.522748
##  20:     253.25722:  2.37491 0.00583310 0.677694 0.379545
##  21:     253.12874:  2.41548 0.00527806 0.645731 0.414282
##  22:     253.04236:  2.40296 0.00557239 0.674027 0.391591
##  23:     253.02683:  2.39741 0.00565866 0.690925 0.381153
##  24:     253.02050:  2.39716 0.00563567 0.698026 0.378880
##  25:     253.01686:  2.39766 0.00555720 0.703612 0.378846
##  26:     253.01660:  2.39747 0.00552358 0.703515 0.379429
##  27:     253.01600:  2.39667 0.00540361 0.702329 0.381249
##  28:     253.01580:  2.39627 0.00534416 0.701702 0.381701
##  29:     253.01574:  2.39614 0.00532265 0.701668 0.381438
##  30:     253.01574:  2.39616 0.00532586 0.701847 0.381206
##  31:     253.01574:  2.39617 0.00532732 0.701909 0.381152
##  32:     253.01574:  2.39617 0.00532740 0.701917 0.381147
## 
## Final Estimate of the Negative LLH:
##  LLH:  1111.273    norm LLH:  5.144784 
##          mu       omega      alpha1       beta1 
## 127.3943334  15.0584064   0.7019168   0.3811473 
## 
## R-optimhess Difference Approximated Hessian Matrix:
##                 mu       omega       alpha1         beta1
## mu     -0.19394135  0.02268628   -0.5547777   -0.01554201
## omega   0.02268628 -0.01060516   -0.3918318   -0.88824466
## alpha1 -0.55477766 -0.39183177 -197.2720823 -264.11758000
## beta1  -0.01554201 -0.88824466 -264.1175800 -429.25941590
## attr(,"time")
## Time difference of 0.004422188 secs
## 
## --- END OF TRACE ---
## 
## 
## Time to Estimate Parameters:
##  Time difference of 0.02553296 secs
summary(airps.GARCH)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = airps_dataTS) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x7fa2021b1300>
##  [data = airps_dataTS]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##        mu      omega     alpha1      beta1  
## 127.39433   15.05841    0.70192    0.38115  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu      127.3943      2.7275   46.707  < 2e-16 ***
## omega    15.0584     13.0659    1.153  0.24912    
## alpha1    0.7019      0.1771    3.963 7.39e-05 ***
## beta1     0.3811      0.1270    3.002  0.00269 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  -1111.273    normalized:  -5.144784 
## 
## Description:
##  Mon Apr 29 01:33:11 2019 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  5.906705  0.05216454  
##  Shapiro-Wilk Test  R    W      0.9092554 3.347033e-10
##  Ljung-Box Test     R    Q(10)  1067.192  0           
##  Ljung-Box Test     R    Q(15)  1515.225  0           
##  Ljung-Box Test     R    Q(20)  1814.848  0           
##  Ljung-Box Test     R^2  Q(10)  6.306484  0.7888902   
##  Ljung-Box Test     R^2  Q(15)  18.41377  0.2415374   
##  Ljung-Box Test     R^2  Q(20)  20.49078  0.4276293   
##  LM Arch Test       R    TR^2   13.30901  0.3469862   
## 
## Information Criterion Statistics:
##      AIC      BIC      SIC     HQIC 
## 10.32660 10.38911 10.32593 10.35186