library(ggplot2)
library(forecast)
library(tseries)
library(gridExtra)
library(docstring)
##
## Attaching package: 'docstring'
## The following object is masked from 'package:utils':
##
## ?
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(xts)
library(timeSeries)
## Loading required package: timeDate
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
library(zoo)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:timeSeries':
##
## outlier
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(psych)
library(expsmooth)
library(fma)
library(fpp2)
library(fGarch)
## Loading required package: fBasics
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:psych':
##
## tr
## The following object is masked from 'package:TTR':
##
## volatility
We use the “monthly air passenger miles in US” from 1960 to 1977, and evalute the multiple models selections includes Auto ARIMA, ETS models. Firstly, there are 216 variables(one month worth) and these datasets would be converted into time series for the further analysis.
setwd ("~/desktop/ADEC7460.02 Spring 2019 Predictive Analytics/Dataset")
airps= read.csv("monthly-us-air-passenger-miles-j.csv")
colnames(airps)<-c("Month","monthlypassenger")
airps<-airps[-217,]
airps_dataTS <- ts(airps$monthlypassenger, start=c(1960, 1), end=c(1977,12), frequency=12)
describe(airps)
## vars n mean sd median trimmed mad min max range
## Month* 1 216 108.50 62.50 108.5 108.50 80.06 1 216 215
## monthlypassenger* 2 216 99.59 53.17 104.5 100.93 68.94 2 185 183
## skew kurtosis se
## Month* 0.00 -1.22 4.25
## monthlypassenger* -0.17 -1.23 3.62
ts.plot(airps_dataTS)
The historical data show the seasonality.
#Classical additive decomposition
airps_dataTS %>% decompose(type="additive") %>%
autoplot() + xlab("Year") +
ggtitle("Classical additive decomposition of Monthly Air Passenger Miles")
#Moving Average Order=5
ma(airps_dataTS,5)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1960 NA NA 97.6 100.6 104.8 109.2 109.6 110.2 105.6 101.6 97.6
## 1961 75.8 78.0 78.8 83.4 105.2 109.0 109.6 111.2 106.8 105.6 103.8
## 1962 102.8 106.8 107.8 111.0 114.2 116.4 115.6 115.4 111.0 110.4 109.2
## 1963 109.0 113.6 116.4 119.8 125.6 128.8 129.0 130.2 127.6 127.2 126.0
## 1964 126.4 128.6 129.4 132.2 135.6 138.4 139.0 139.8 136.8 136.2 134.8
## 1965 134.4 137.6 138.4 140.4 144.6 147.6 148.2 149.4 147.6 147.4 146.4
## 1966 147.0 150.0 150.6 152.8 152.2 151.2 151.2 151.4 150.0 153.2 155.0
## 1967 155.6 156.8 156.6 159.0 162.6 165.0 165.8 166.4 164.4 163.6 161.6
## 1968 162.0 164.0 163.8 166.2 169.2 172.0 171.8 172.2 169.6 169.6 167.8
## 1969 167.8 170.0 169.6 171.0 174.2 176.6 176.2 175.8 173.8 173.4 138.0
## 1970 107.4 110.8 77.4 82.4 56.6 67.2 32.6 67.8 98.0 90.2 79.4
## 1971 77.2 44.2 43.8 47.6 22.0 33.0 32.8 33.8 29.2 23.0 15.4
## 1972 15.2 20.4 20.8 28.2 41.0 51.4 52.0 53.2 45.4 40.2 31.4
## 1973 30.0 35.6 36.2 43.0 55.8 64.2 63.8 62.2 53.4 46.6 38.4
## 1974 36.8 43.0 44.6 50.2 61.2 66.6 63.0 59.6 49.2 43.8 34.2
## 1975 32.4 34.2 32.6 39.0 52.2 59.6 62.0 64.0 56.4 51.8 46.0
## 1976 47.6 54.8 56.2 61.0 71.0 76.6 75.8 75.6 67.8 64.6 60.6
## 1977 59.2 65.4 65.6 68.8 78.4 81.6 82.2 83.8 80.0 79.2 NA
## Dec
## 1960 76.8
## 1961 101.6
## 1962 107.0
## 1963 125.4
## 1964 134.0
## 1965 146.0
## 1966 154.2
## 1967 161.0
## 1968 166.8
## 1969 139.8
## 1970 113.4
## 1971 13.2
## 1972 27.8
## 1973 33.4
## 1974 29.6
## 1975 45.6
## 1976 56.8
## 1977 NA
autoplot(airps_dataTS, series="Data") +
autolayer(ma(airps_dataTS,5), series="5-MA") +
xlab("Year") +
ylab("Monthly Air Passenger Miles") +
ggtitle("Monthly Air Passenger Miles From 1960 to 1977")
## Warning: Removed 4 rows containing missing values (geom_path).
#Moving Average Order=12 NOTE: For Even Order numnber, we need to apply moving average of moving average
ma(airps_dataTS,12,centre=TRUE)
## Jan Feb Mar Apr May Jun Jul
## 1960 NA NA NA NA NA NA 101.75000
## 1961 94.33333 94.33333 94.37500 94.41667 94.70833 95.41667 96.45833
## 1962 108.45833 108.62500 109.12500 109.58333 110.00000 110.33333 110.75000
## 1963 115.58333 116.95833 118.08333 119.41667 120.91667 122.54167 123.95833
## 1964 130.08333 130.95833 131.75000 132.45833 133.16667 133.87500 134.66667
## 1965 138.95833 139.70833 140.54167 141.54167 142.50000 143.54167 144.45833
## 1966 149.62500 149.00000 149.08333 149.70833 150.50000 151.33333 152.00000
## 1967 156.04167 158.25000 159.62500 160.16667 160.66667 161.20833 161.79167
## 1968 165.29167 165.75000 166.20833 166.66667 167.08333 167.62500 168.29167
## 1969 170.75000 171.08333 171.41667 171.75000 172.12500 172.41667 165.66667
## 1970 116.62500 106.25000 94.50000 88.45833 89.70833 83.50000 76.70833
## 1971 62.33333 62.33333 62.45833 55.58333 41.04167 34.00000 35.00000
## 1972 28.16667 29.58333 31.00000 32.29167 33.58333 35.12500 36.50000
## 1973 43.54167 44.16667 44.95833 45.83333 46.33333 46.54167 46.95833
## 1974 50.20833 50.04167 49.62500 49.12500 48.79167 48.75000 48.58333
## 1975 43.75000 44.08333 44.37500 45.12500 46.16667 46.91667 48.20833
## 1976 58.66667 59.08333 60.04167 61.41667 62.50000 63.70833 64.79167
## 1977 68.58333 68.75000 69.50000 70.91667 72.62500 74.16667 NA
## Aug Sep Oct Nov Dec
## 1960 97.83333 94.29167 94.41667 94.20833 94.25000
## 1961 100.83333 105.20833 106.33333 107.45833 108.25000
## 1962 111.37500 112.00000 112.70833 113.54167 114.37500
## 1963 125.41667 126.87500 127.66667 128.37500 129.25000
## 1964 135.37500 135.91667 136.70833 137.62500 138.25000
## 1965 145.45833 146.66667 147.70833 148.66667 149.58333
## 1966 152.58333 153.20833 153.70833 154.00000 154.54167
## 1967 162.41667 162.91667 163.50000 164.20833 164.79167
## 1968 168.70833 169.08333 169.54167 170.08333 170.50000
## 1969 159.58333 153.54167 147.20833 140.87500 128.00000
## 1970 76.79167 76.66667 69.54167 62.62500 62.45833
## 1971 28.25000 21.79167 23.16667 24.25000 26.16667
## 1972 37.37500 38.41667 39.83333 41.50000 42.79167
## 1973 47.58333 48.41667 49.29167 49.83333 50.20833
## 1974 47.95833 47.41667 46.04167 44.41667 43.75000
## 1975 50.16667 51.62500 53.62500 56.12500 57.70833
## 1976 65.41667 66.16667 67.12500 67.87500 68.37500
## 1977 NA NA NA NA NA
autoplot(airps_dataTS, series="Data") +
autolayer(ma(airps_dataTS,12,centre=TRUE), series="12-MA") +
xlab("Year") +
ylab("Monthly Air Passenger Miles") +
ggtitle("Monthly Air Passenger Miles From 1960 to 1977")
## Warning: Removed 12 rows containing missing values (geom_path).
#Classical multiplicative decomposition
airps_dataTS %>% decompose(type="multiplicative") %>%
autoplot() + xlab("Year") +
ggtitle("Classical multiplicative decomposition of Monthly Passengers Miles")
#Comparing Two Decompistion
mult<-decompose(airps_dataTS,type="multiplicative")
error.m<-(na.omit(mult$random)-1)*na.omit(mult$trend)
add<-decompose(airps_dataTS,type="additive")
error.a<-na.omit(add$random)
MAE.m<-sum(abs(error.m))
MAE.a<-sum(abs(error.a))
MAE.m
## [1] 3801.3
MAE.a
## [1] 2810.097
#Arima
airps.arima<-auto.arima(airps_dataTS)
summary(airps.arima)
## Series: airps_dataTS
## ARIMA(0,1,2)(0,0,1)[12]
##
## Coefficients:
## ma1 ma2 sma1
## -0.8413 0.1155 0.2655
## s.e. 0.0663 0.0725 0.0628
##
## sigma^2 estimated as 833: log likelihood=-1027.37
## AIC=2062.74 AICc=2062.93 BIC=2076.22
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.197984 28.59294 15.60635 -79.85802 97.62649 0.8154957
## ACF1
## Training set -0.005199522
autoplot(forecast(airps.arima,h=24))
airps.arima2<-auto.arima(airps_dataTS, lambda = BoxCox.lambda(airps_dataTS))
summary(airps.arima2)
## Series: airps_dataTS
## ARIMA(0,1,1)(0,0,1)[12]
## Box Cox transformation: lambda= 1.999945
##
## Coefficients:
## ma1 sma1
## -0.7473 0.2263
## s.e. 0.0430 0.0644
##
## sigma^2 estimated as 7118607: log likelihood=-2000.94
## AIC=4007.89 AICc=4008 BIC=4018
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.16319 32.79692 17.43888 -103.5352 124.9229 0.911253
## ACF1
## Training set 0.09063563
autoplot(forecast(airps.arima2,h=24))
accuracy(airps.arima)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.197984 28.59294 15.60635 -79.85802 97.62649 0.8154957
## ACF1
## Training set -0.005199522
accuracy(airps.arima2)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.16319 32.79692 17.43888 -103.5352 124.9229 0.911253
## ACF1
## Training set 0.09063563
The results implies that two ARIMA models had a descent fit, but the ARIMA model without BoxCox has better performance. Next we try to work on ETS model.
#ETS
airps.ETS<-ets(airps_dataTS,model="ZZZ")
summary(airps.ETS)
## ETS(A,N,N)
##
## Call:
## ets(y = airps_dataTS, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.2476
##
## Initial states:
## l = 99.0502
##
## sigma: 30.1023
##
## AIC AICc BIC
## 2635.839 2635.952 2645.964
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
## ACF1
## Training set -0.09176193
plot(airps.ETS)
autoplot(forecast(airps.ETS,h=24))
airps.ETS2<-ets(airps_dataTS,model="ANN")
summary(airps.ETS2)
## ETS(A,N,N)
##
## Call:
## ets(y = airps_dataTS, model = "ANN")
##
## Smoothing parameters:
## alpha = 0.2476
##
## Initial states:
## l = 99.0502
##
## sigma: 30.1023
##
## AIC AICc BIC
## 2635.839 2635.952 2645.964
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
## ACF1
## Training set -0.09176193
plot(airps.ETS2)
autoplot(forecast(airps.ETS2,h=24))
airps.ETS3<-ets(airps_dataTS, lambda= BoxCox.lambda(airps_dataTS))
summary(airps.ETS3)
## ETS(A,N,N)
##
## Call:
## ets(y = airps_dataTS, lambda = BoxCox.lambda(airps_dataTS))
##
## Box-Cox transformation: lambda= 1.9999
##
## Smoothing parameters:
## alpha = 0.2632
##
## Initial states:
## l = 5051.8074
##
## sigma: 2736.576
##
## AIC AICc BIC
## 4584.099 4584.212 4594.225
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -4.640254 32.49046 17.93155 -108.1264 120.9974 0.9369967
## ACF1
## Training set 0.1013107
plot(airps.ETS3)
autoplot(forecast(airps.ETS3,h=24))
accuracy(airps.ETS)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
## ACF1
## Training set -0.09176193
accuracy(airps.ETS2)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3983409 29.96261 16.90762 -85.95385 102.6556 0.8834927
## ACF1
## Training set -0.09176193
accuracy(airps.ETS3)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.640254 32.49046 17.93155 -108.1264 120.9974 0.9369967
## ACF1
## Training set 0.1013107
Comparing three ETS models, we found that ‘ZZZ’ model is outperformed than others.
airps.GARCH <- garchFit(formula = ~ garch(1, 1),data =airps_dataTS)
##
## Series Initialization:
## ARMA Model: arma
## Formula Mean: ~ arma(0, 0)
## GARCH Model: garch
## Formula Variance: ~ garch(1, 1)
## ARMA Order: 0 0
## Max ARMA Order: 0
## GARCH Order: 1 1
## Max GARCH Order: 1
## Maximum Order: 1
## Conditional Dist: norm
## h.start: 2
## llh.start: 1
## Length of Series: 216
## Recursion Init: mci
## Series Scale: 53.16575
##
## Parameter Initialization:
## Initial Parameters: $params
## Limits of Transformations: $U, $V
## Which Parameters are Fixed? $includes
## Parameter Matrix:
## U V params includes
## mu -18.73160157 18.7316 1.87316 TRUE
## omega 0.00000100 100.0000 0.10000 TRUE
## alpha1 0.00000001 1.0000 0.10000 TRUE
## gamma1 -0.99999999 1.0000 0.10000 FALSE
## beta1 0.00000001 1.0000 0.80000 TRUE
## delta 0.00000000 2.0000 2.00000 FALSE
## skew 0.10000000 10.0000 1.00000 FALSE
## shape 1.00000000 10.0000 4.00000 FALSE
## Index List of Parameters to be Optimized:
## mu omega alpha1 beta1
## 1 2 3 5
## Persistence: 0.9
##
##
## --- START OF TRACE ---
## Selected Algorithm: nlminb
##
## R coded nlminb Solver:
##
## 0: 286.27306: 1.87316 0.100000 0.100000 0.800000
## 1: 282.59333: 1.87940 0.0736107 0.104496 0.787136
## 2: 276.05944: 1.91915 0.00908923 0.201788 0.801696
## 3: 275.36967: 1.92640 0.0221006 0.200290 0.803029
## 4: 273.98431: 1.94567 0.0166375 0.195589 0.797506
## 5: 271.52909: 2.03363 0.0269211 0.173062 0.783538
## 6: 270.85782: 2.04172 0.0184543 0.175323 0.783511
## 7: 270.26796: 2.05466 0.0201054 0.180501 0.787783
## 8: 269.19086: 2.08395 0.00908984 0.184461 0.788426
## 9: 263.63340: 2.27226 1.00000e-06 0.221820 0.819263
## 10: 260.74689: 2.43440 4.86571e-05 0.241937 0.739646
## 11: 259.44504: 2.49851 0.00573378 0.253019 0.702134
## 12: 258.70040: 2.43402 0.00552145 0.243707 0.740364
## 13: 258.19380: 2.46061 1.00000e-06 0.287686 0.730112
## 14: 257.61939: 2.50635 0.00443809 0.327779 0.707524
## 15: 257.34639: 2.55322 0.00252667 0.364781 0.680421
## 16: 256.72188: 2.52495 1.00000e-06 0.357853 0.669149
## 17: 256.05833: 2.50948 0.00296849 0.349375 0.651116
## 18: 255.38815: 2.49763 0.00318699 0.411762 0.627037
## 19: 254.02348: 2.45078 0.00376548 0.521211 0.522748
## 20: 253.25722: 2.37491 0.00583310 0.677694 0.379545
## 21: 253.12874: 2.41548 0.00527806 0.645731 0.414282
## 22: 253.04236: 2.40296 0.00557239 0.674027 0.391591
## 23: 253.02683: 2.39741 0.00565866 0.690925 0.381153
## 24: 253.02050: 2.39716 0.00563567 0.698026 0.378880
## 25: 253.01686: 2.39766 0.00555720 0.703612 0.378846
## 26: 253.01660: 2.39747 0.00552358 0.703515 0.379429
## 27: 253.01600: 2.39667 0.00540361 0.702329 0.381249
## 28: 253.01580: 2.39627 0.00534416 0.701702 0.381701
## 29: 253.01574: 2.39614 0.00532265 0.701668 0.381438
## 30: 253.01574: 2.39616 0.00532586 0.701847 0.381206
## 31: 253.01574: 2.39617 0.00532732 0.701909 0.381152
## 32: 253.01574: 2.39617 0.00532740 0.701917 0.381147
##
## Final Estimate of the Negative LLH:
## LLH: 1111.273 norm LLH: 5.144784
## mu omega alpha1 beta1
## 127.3943334 15.0584064 0.7019168 0.3811473
##
## R-optimhess Difference Approximated Hessian Matrix:
## mu omega alpha1 beta1
## mu -0.19394135 0.02268628 -0.5547777 -0.01554201
## omega 0.02268628 -0.01060516 -0.3918318 -0.88824466
## alpha1 -0.55477766 -0.39183177 -197.2720823 -264.11758000
## beta1 -0.01554201 -0.88824466 -264.1175800 -429.25941590
## attr(,"time")
## Time difference of 0.004422188 secs
##
## --- END OF TRACE ---
##
##
## Time to Estimate Parameters:
## Time difference of 0.02553296 secs
summary(airps.GARCH)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = airps_dataTS)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7fa2021b1300>
## [data = airps_dataTS]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 beta1
## 127.39433 15.05841 0.70192 0.38115
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 127.3943 2.7275 46.707 < 2e-16 ***
## omega 15.0584 13.0659 1.153 0.24912
## alpha1 0.7019 0.1771 3.963 7.39e-05 ***
## beta1 0.3811 0.1270 3.002 0.00269 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## -1111.273 normalized: -5.144784
##
## Description:
## Mon Apr 29 01:33:11 2019 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 5.906705 0.05216454
## Shapiro-Wilk Test R W 0.9092554 3.347033e-10
## Ljung-Box Test R Q(10) 1067.192 0
## Ljung-Box Test R Q(15) 1515.225 0
## Ljung-Box Test R Q(20) 1814.848 0
## Ljung-Box Test R^2 Q(10) 6.306484 0.7888902
## Ljung-Box Test R^2 Q(15) 18.41377 0.2415374
## Ljung-Box Test R^2 Q(20) 20.49078 0.4276293
## LM Arch Test R TR^2 13.30901 0.3469862
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## 10.32660 10.38911 10.32593 10.35186