Business Forecasting Final Exam

Namita Kadam | RUID - 174004689 | NetID - njk45

2017-04-27

About the Dataset:

US Department of Tourism maintains statistics on visitors to America. They have very details datasets. We are going to focus on just a general statistic that keeps track of monthly visitors. See details at http://travel.trade.gov/research/monthly/arrivals/index.asp


Import Data:

library(readr)
## Warning: package 'readr' was built under R version 3.3.3
library(lattice)
library(TTR)
library(fpp)
## Loading required package: forecast
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
## Loading required package: fma
## Loading required package: tseries
## Loading required package: expsmooth
## Loading required package: lmtest
US_tourism <- read_csv("D:/BF/FINAL/Final_Travel.csv")
## Parsed with column specification:
## cols(
##   Year = col_integer(),
##   Month = col_character(),
##   Value = col_integer()
## )
travel <- US_tourism$Value
plot(travel)

US_tourism_TS<- ts(travel,start=c(1999,1),frequency = 12)
# Subset of the data to remove values I do not want in my analysis: As we can see from the plot, there is a steep downfall in the tourism in the end of the year 2001 which may be beacause of the WTC 9/11 attack which happened in September 2011
# Therefore not considering this irregular component from my analysis
# Taking the data from January 2002 till August 2016
US_tourism_TSW <- window(US_tourism_TS, c(2002,1), c(2016,8), frequency = 12)

Plot and Inference

plot(US_tourism_TSW, ylab="US_Visitors", type="o", pch =20)


Central Tendency

summary(US_tourism_TSW)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 1197000 1735000 2056000 2206000 2569000 4075000
hist(US_tourism_TSW)

#the default percentile values
quantile(US_tourism_TSW)
##      0%     25%     50%     75%    100% 
## 1197397 1735330 2055772 2569408 4075384
cycle(US_tourism_TSW)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2002   1   2   3   4   5   6   7   8   9  10  11  12
## 2003   1   2   3   4   5   6   7   8   9  10  11  12
## 2004   1   2   3   4   5   6   7   8   9  10  11  12
## 2005   1   2   3   4   5   6   7   8   9  10  11  12
## 2006   1   2   3   4   5   6   7   8   9  10  11  12
## 2007   1   2   3   4   5   6   7   8   9  10  11  12
## 2008   1   2   3   4   5   6   7   8   9  10  11  12
## 2009   1   2   3   4   5   6   7   8   9  10  11  12
## 2010   1   2   3   4   5   6   7   8   9  10  11  12
## 2011   1   2   3   4   5   6   7   8   9  10  11  12
## 2012   1   2   3   4   5   6   7   8   9  10  11  12
## 2013   1   2   3   4   5   6   7   8   9  10  11  12
## 2014   1   2   3   4   5   6   7   8   9  10  11  12
## 2015   1   2   3   4   5   6   7   8   9  10  11  12
## 2016   1   2   3   4   5   6   7   8

This will print the cycle across the years.

cycle(US_tourism_TSW)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2002   1   2   3   4   5   6   7   8   9  10  11  12
## 2003   1   2   3   4   5   6   7   8   9  10  11  12
## 2004   1   2   3   4   5   6   7   8   9  10  11  12
## 2005   1   2   3   4   5   6   7   8   9  10  11  12
## 2006   1   2   3   4   5   6   7   8   9  10  11  12
## 2007   1   2   3   4   5   6   7   8   9  10  11  12
## 2008   1   2   3   4   5   6   7   8   9  10  11  12
## 2009   1   2   3   4   5   6   7   8   9  10  11  12
## 2010   1   2   3   4   5   6   7   8   9  10  11  12
## 2011   1   2   3   4   5   6   7   8   9  10  11  12
## 2012   1   2   3   4   5   6   7   8   9  10  11  12
## 2013   1   2   3   4   5   6   7   8   9  10  11  12
## 2014   1   2   3   4   5   6   7   8   9  10  11  12
## 2015   1   2   3   4   5   6   7   8   9  10  11  12
## 2016   1   2   3   4   5   6   7   8

This will aggregate the cycles and display a year on year trend.

boxplot(US_tourism_TSW~cycle(US_tourism_TSW))

Box plot across the months will give us a sense on seasonal effect

a = boxplot(US_tourism_TSW, las =2, col = "royalblue2")

text(a$stats[3],"Median", pos = 4, offset = 8.5)
text(max(a$stats),"Max", pos = 4, offset = 8.5)
text(min(a$stats),"Min", pos = 4, offset = 8.5)
text(a$stats[2], "1st Quartile", pos = 4, offset = 8.5)
text(a$stats[4], "3rd Quartile", pos = 4, offset = 8.5)
text(a$out[2], "Outlier", pos = 4, offset = 8.5)


Decomposition

STL_US_tourism_TSW <- stl(US_tourism_TSW, s.window ="periodic", robust = TRUE)
plot(STL_US_tourism_TSW)

decomp_US_tourism_TSW <- decompose(US_tourism_TSW, type = "additive")
decomp_US_tourism_TSW$seasonal
##             Jan        Feb        Mar        Apr        May        Jun
## 2002 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2003 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2004 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2005 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2006 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2007 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2008 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2009 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2010 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2011 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2012 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2013 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2014 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2015 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
## 2016 -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
##             Jul        Aug        Sep        Oct        Nov        Dec
## 2002  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2003  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2004  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2005  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2006  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2007  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2008  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2009  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2010  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2011  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2012  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2013  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2014  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2015  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
## 2016  503879.35  470083.42
# the values of the seasonal monthly indices:
decomp_US_tourism_TSW$figure
##  [1] -414382.74 -522276.94 -173650.06  -48804.42   30903.31   76409.67
##  [7]  503879.35  470083.42  171812.04  130449.21 -269600.93   45178.10
#the seasonal effects values for all the months. We can see that the seasonal effect values are repeated each year (month-wise) in the $seasonal object from January onwards.
plot(decomp_US_tourism_TSW$figure, main = "Seasonal Indices")

#month for which the value of time series is high: 
US_tourism[which.max(US_tourism_TS),]
## # A tibble: 1 × 3
##    Year Month   Value
##   <int> <chr>   <int>
## 1  2015  July 4075384
#month for which the value of time series is low: 
US_tourism[which.min(US_tourism_TS),]
## # A tibble: 1 × 3
##    Year    Month   Value
##   <int>    <chr>   <int>
## 1  2001 November 1118797
SA_US_tourism_TSW <- seasadj(STL_US_tourism_TSW)
plot(SA_US_tourism_TSW, main = "Time Series Adjusted for Seasonality")

plot(US_tourism_TSW)
lines(SA_US_tourism_TSW, col = "red")


Naïve Method

snaive_forecast <- snaive(US_tourism_TSW)
plot(snaive_forecast)

naive_forecast <- naive(US_tourism_TSW)
plot(naive_forecast)
lines(snaive_forecast$mean,col="black")

res <- residuals(naive_forecast)
fit <- fitted(naive_forecast)
act <- naive_forecast$x
plot(res, main="Residuals from the Naïve method Forecasts", 
ylab="", xlab="Time")

2 Do a Histogram plot of residuals. What does the plot indicate?

hist(res, xlab = "Residuals", main="Histogram of Residuals from the Naïve method Forecasts")

xyplot(res~fit, pch=16, cex = 1.5, abline=0)

xyplot(res~act, pch = 16, cex = 1.5, abline = 0)

Acf(res, main="ACF of residuals from the Naive method Forecasts")

accuracy_naive <- accuracy(naive_forecast)
accuracy_snaive <- accuracy(snaive_forecast)
accuracy_naive
##                    ME   RMSE      MAE        MPE     MAPE     MASE
## Training set 15081.85 301462 240545.6 -0.2643395 11.06543 1.397038
##                    ACF1
## Training set -0.2024705
accuracy_snaive
##                    ME     RMSE      MAE      MPE     MAPE MASE      ACF1
## Training set 115998.2 205853.8 172182.5 4.595344 7.741379    1 0.5732204
naive_forecast <- naive(US_tourism_TSW)
naive_forecast
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3836721 3450382 4223060 3245866 4427576
## Oct 2016        3836721 3450382 4223060 3245866 4427576
## Nov 2016        3836721 3450382 4223060 3245866 4427576
## Dec 2016        3836721 3450382 4223060 3245866 4427576
## Jan 2017        3836721 3450382 4223060 3245866 4427576
## Feb 2017        3836721 3450382 4223060 3245866 4427576
## Mar 2017        3836721 3450382 4223060 3245866 4427576
## Apr 2017        3836721 3450382 4223060 3245866 4427576
## May 2017        3836721 3450382 4223060 3245866 4427576
## Jun 2017        3836721 3450382 4223060 3245866 4427576
plot(naive_forecast)

snaive_forecast <- snaive(US_tourism_TSW,12)
snaive_forecast
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3551833 3288021 3815645 3148367 3955299
## Oct 2016        3500510 3236698 3764322 3097044 3903976
## Nov 2016        2822990 2559178 3086802 2419524 3226456
## Dec 2016        3145903 2882091 3409715 2742437 3549369
## Jan 2017        2645349 2381537 2909161 2241883 3048815
## Feb 2017        2405849 2142037 2669661 2002383 2809315
## Mar 2017        2897042 2633230 3160854 2493576 3300508
## Apr 2017        2895487 2631675 3159299 2492021 3298953
## May 2017        3213869 2950057 3477681 2810403 3617335
## Jun 2017        3293795 3029983 3557607 2890329 3697261
## Jul 2017        3931057 3667245 4194869 3527591 4334523
## Aug 2017        3836721 3572909 4100533 3433255 4240187
plot(snaive_forecast)

summary(naive_forecast)
## 
## Forecast method: Naive method
## 
## Model Information:
## $drift
## [1] 0
## 
## $drift.se
## [1] 0
## 
## $sd
## [1] 301948.4
## 
## $call
## naive(y = US_tourism_TSW)
## 
## 
## Error measures:
##                    ME   RMSE      MAE        MPE     MAPE     MASE
## Training set 15081.85 301462 240545.6 -0.2643395 11.06543 1.397038
##                    ACF1
## Training set -0.2024705
## 
## Forecasts:
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3836721 3450382 4223060 3245866 4427576
## Oct 2016        3836721 3450382 4223060 3245866 4427576
## Nov 2016        3836721 3450382 4223060 3245866 4427576
## Dec 2016        3836721 3450382 4223060 3245866 4427576
## Jan 2017        3836721 3450382 4223060 3245866 4427576
## Feb 2017        3836721 3450382 4223060 3245866 4427576
## Mar 2017        3836721 3450382 4223060 3245866 4427576
## Apr 2017        3836721 3450382 4223060 3245866 4427576
## May 2017        3836721 3450382 4223060 3245866 4427576
## Jun 2017        3836721 3450382 4223060 3245866 4427576
summary(snaive_forecast)
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## $drift
## [1] 0
## 
## $drift.se
## [1] 0
## 
## $sd
## [1] 170580.3
## 
## $call
## snaive(y = US_tourism_TSW, h = 12)
## 
## 
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE MASE      ACF1
## Training set 115998.2 205853.8 172182.5 4.595344 7.741379    1 0.5732204
## 
## Forecasts:
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3551833 3288021 3815645 3148367 3955299
## Oct 2016        3500510 3236698 3764322 3097044 3903976
## Nov 2016        2822990 2559178 3086802 2419524 3226456
## Dec 2016        3145903 2882091 3409715 2742437 3549369
## Jan 2017        2645349 2381537 2909161 2241883 3048815
## Feb 2017        2405849 2142037 2669661 2002383 2809315
## Mar 2017        2897042 2633230 3160854 2493576 3300508
## Apr 2017        2895487 2631675 3159299 2492021 3298953
## May 2017        3213869 2950057 3477681 2810403 3617335
## Jun 2017        3293795 3029983 3557607 2890329 3697261
## Jul 2017        3931057 3667245 4194869 3527591 4334523
## Aug 2017        3836721 3572909 4100533 3433255 4240187

Simple Moving Averages

plot(US_tourism_TSW)

ma3_forecast <- ma(US_tourism_TSW,order=3)
plot(US_tourism_TSW)
lines(ma3_forecast,col="Red", lwd = 2)

ma3_forecast <- ma(US_tourism_TSW,order=3)
plot(US_tourism_TSW)
lines(ma3_forecast,col="Red", lwd = 2)
ma6_forecast <- ma(US_tourism_TSW,order=6)
lines(ma6_forecast,col="Blue", lwd = 2)

ma3_forecast <- ma(US_tourism_TSW,order=3)
plot(US_tourism_TSW)
lines(ma3_forecast,col="Red", lwd = 2)
ma6_forecast <- ma(US_tourism_TSW,order=6)
lines(ma6_forecast,col="Blue", lwd = 2)
ma12_forecast <- ma(US_tourism_TSW,order=12)
lines(ma12_forecast,col="Green", lwd = 2)

ma1_forecast <- ma(US_tourism_TSW,order=1)
plot(US_tourism_TSW)
lines(ma1_forecast,col="Purple", lwd = 2)

MA_forecast <- forecast(ma1_forecast,h=12)
plot(MA_forecast)


Simple Smoothing

forecast_ets<-ets(US_tourism_TSW)
forecast_ets
## ETS(M,N,M) 
## 
## Call:
##  ets(y = US_tourism_TSW) 
## 
##   Smoothing parameters:
##     alpha = 0.4147 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 1615326.4368 
##     s=1.0256 0.8825 1.0596 1.0823 1.2087 1.2262
##            1.0286 1.0079 0.972 0.9245 0.7721 0.81
## 
##   sigma:  0.0469
## 
##      AIC     AICc      BIC 
## 4988.083 4991.083 5035.640
ets_forecast <- forecast.ets(forecast_ets, h=12)
ets_forecast
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3437539 3231023 3644055 3121700 3753378
## Oct 2016        3365511 3146587 3584435 3030695 3700327
## Nov 2016        2803153 2607859 2998448 2504476 3101831
## Dec 2016        3257644 3016566 3498721 2888948 3626340
## Jan 2017        2572956 2372011 2773901 2265637 2880275
## Feb 2017        2452432 2251354 2653511 2144909 2759955
## Mar 2017        2936402 2684727 3188077 2551498 3321306
## Apr 2017        3087290 2811679 3362901 2665780 3508800
## May 2017        3201511 2904728 3498294 2747620 3655401
## Jun 2017        3267131 2953460 3580802 2787413 3746850
## Jul 2017        3894858 3508463 4281252 3303918 4485797
## Aug 2017        3839291 3446509 4232072 3238584 4439998
plot(ets_forecast)

forecast_ets$initstate
##            l           s1           s2           s3           s4 
## 1.615326e+06 1.025606e+00 8.825179e-01 1.059568e+00 1.082254e+00 
##           s5           s6           s7           s8           s9 
## 1.208728e+00 1.226218e+00 1.028589e+00 1.007929e+00 9.719781e-01 
##          s10          s11          s12 
## 9.244650e-01 7.720981e-01 8.100486e-01
plot(forecast_ets$residuals, main="Residuals from Simple Smoothing method Forecasts", 
ylab="", xlab="Time")

2 Do a Histogram plot of residuals. What does the plot indicate?

hist(forecast_ets$residuals, xlab = "Residuals", main="Histogram of Residuals from Simple Smoothing method Forecasts")

xyplot(forecast_ets$residuals ~ forecast_ets$fitted, pch=16, cex = 1.5, abline=0)

xyplot(forecast_ets$residuals ~ forecast_ets$x, pch = 16, cex = 1.5, abline = 0)

acf(forecast_ets$residuals, main = "ACF of Residuals of Simple Smoothing")

accuracy_ets <- accuracy(ets_forecast)
accuracy_ets
##                    ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 21543.29 96382.89 72295.69 0.756152 3.456318 0.4198782
##                    ACF1
## Training set 0.02090026
ets_forecast
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3437539 3231023 3644055 3121700 3753378
## Oct 2016        3365511 3146587 3584435 3030695 3700327
## Nov 2016        2803153 2607859 2998448 2504476 3101831
## Dec 2016        3257644 3016566 3498721 2888948 3626340
## Jan 2017        2572956 2372011 2773901 2265637 2880275
## Feb 2017        2452432 2251354 2653511 2144909 2759955
## Mar 2017        2936402 2684727 3188077 2551498 3321306
## Apr 2017        3087290 2811679 3362901 2665780 3508800
## May 2017        3201511 2904728 3498294 2747620 3655401
## Jun 2017        3267131 2953460 3580802 2787413 3746850
## Jul 2017        3894858 3508463 4281252 3303918 4485797
## Aug 2017        3839291 3446509 4232072 3238584 4439998
plot(ets_forecast)

summary(ets_forecast)
## 
## Forecast method: ETS(M,N,M)
## 
## Model Information:
## ETS(M,N,M) 
## 
## Call:
##  ets(y = US_tourism_TSW) 
## 
##   Smoothing parameters:
##     alpha = 0.4147 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 1615326.4368 
##     s=1.0256 0.8825 1.0596 1.0823 1.2087 1.2262
##            1.0286 1.0079 0.972 0.9245 0.7721 0.81
## 
##   sigma:  0.0469
## 
##      AIC     AICc      BIC 
## 4988.083 4991.083 5035.640 
## 
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 21543.29 96382.89 72295.69 0.756152 3.456318 0.4198782
##                    ACF1
## Training set 0.02090026
## 
## Forecasts:
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3437539 3231023 3644055 3121700 3753378
## Oct 2016        3365511 3146587 3584435 3030695 3700327
## Nov 2016        2803153 2607859 2998448 2504476 3101831
## Dec 2016        3257644 3016566 3498721 2888948 3626340
## Jan 2017        2572956 2372011 2773901 2265637 2880275
## Feb 2017        2452432 2251354 2653511 2144909 2759955
## Mar 2017        2936402 2684727 3188077 2551498 3321306
## Apr 2017        3087290 2811679 3362901 2665780 3508800
## May 2017        3201511 2904728 3498294 2747620 3655401
## Jun 2017        3267131 2953460 3580802 2787413 3746850
## Jul 2017        3894858 3508463 4281252 3303918 4485797
## Aug 2017        3839291 3446509 4232072 3238584 4439998

Holt-Winters

HW_US_tourism_TSW <- HoltWinters(US_tourism_TSW)

HW_US_tourism_TSW
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = US_tourism_TSW)
## 
## Smoothing parameters:
##  alpha: 0.3272698
##  beta : 0.02480278
##  gamma: 0.7684698
## 
## Coefficients:
##            [,1]
## a   3009108.503
## b      9420.709
## s1   339813.279
## s2   249777.147
## s3  -387797.749
## s4     1381.222
## s5  -465808.327
## s6  -668332.884
## s7  -149157.471
## s8   -15436.641
## s9   313995.641
## s10  314927.905
## s11  929179.097
## s12  824051.837
plot(HW_US_tourism_TSW, lwd=2)

To make forecasts for future times not included in the original time series, we use the “forecast.HoltWinters()” function in the “forecast” package

HW_forecast_US_tourism_TSW <- forecast.HoltWinters(HW_US_tourism_TSW, h= 12)
plot(HW_forecast_US_tourism_TSW)

HW_US_tourism_TSW$coefficients
##           a           b          s1          s2          s3          s4 
## 3009108.503    9420.709  339813.279  249777.147 -387797.749    1381.222 
##          s5          s6          s7          s8          s9         s10 
## -465808.327 -668332.884 -149157.471  -15436.641  313995.641  314927.905 
##         s11         s12 
##  929179.097  824051.837
sd(complete.cases(HW_forecast_US_tourism_TSW$residuals))
## [1] 0.2527768
resHW <- residuals(HW_forecast_US_tourism_TSW)
fitHW <- fitted(HW_forecast_US_tourism_TSW)
actHW <- HW_forecast_US_tourism_TSW$x
plot(resHW, main="Residuals from the Holt-Winter method Forecasts", 
ylab="", xlab="Time")

2 Do a Histogram plot of residuals. What does the plot indicate?

hist(resHW, xlab = "Residuals", main="Histogram of Residuals from the Holt-Winter method Forecasts")

xyplot(resHW ~ fitHW, pch=16, cex = 1.5, abline=0)

xyplot(resHW ~ actHW, pch = 16, cex=1.5, abline = 0)

Acf(resHW, main = "ACF of Residuals of Holt-Winter Forecasts")

accuracy_HW <- accuracy(HW_forecast_US_tourism_TSW)
accuracy_HW
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 15175.03 123247.5 95446.26 0.4461066 4.438431 0.5543318
##                 ACF1
## Training set 0.20383
HW_forecast_US_tourism_TSW
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3358342 3201116 3515569 3117886 3598799
## Oct 2016        3277727 3111894 3443560 3024107 3531347
## Nov 2016        2649573 2475167 2823979 2382841 2916304
## Dec 2016        3048173 2865214 3231131 2768361 3327984
## Jan 2017        2590404 2398902 2781906 2297527 2883281
## Feb 2017        2397300 2197255 2597345 2091357 2703242
## Mar 2017        2925896 2717300 3134492 2606876 3244916
## Apr 2017        3069038 2851876 3286199 2736917 3401158
## May 2017        3407891 3182143 3633638 3062640 3753141
## Jun 2017        3418243 3183886 3652601 3059825 3776662
## Jul 2017        4041915 3798919 4284912 3670284 4413547
## Aug 2017        3946209 3694541 4197877 3561316 4331102
plot(HW_forecast_US_tourism_TSW)

summary(HW_forecast_US_tourism_TSW)
## 
## Forecast method: HoltWinters
## 
## Model Information:
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = US_tourism_TSW)
## 
## Smoothing parameters:
##  alpha: 0.3272698
##  beta : 0.02480278
##  gamma: 0.7684698
## 
## Coefficients:
##            [,1]
## a   3009108.503
## b      9420.709
## s1   339813.279
## s2   249777.147
## s3  -387797.749
## s4     1381.222
## s5  -465808.327
## s6  -668332.884
## s7  -149157.471
## s8   -15436.641
## s9   313995.641
## s10  314927.905
## s11  929179.097
## s12  824051.837
## 
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 15175.03 123247.5 95446.26 0.4461066 4.438431 0.5543318
##                 ACF1
## Training set 0.20383
## 
## Forecasts:
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3358342 3201116 3515569 3117886 3598799
## Oct 2016        3277727 3111894 3443560 3024107 3531347
## Nov 2016        2649573 2475167 2823979 2382841 2916304
## Dec 2016        3048173 2865214 3231131 2768361 3327984
## Jan 2017        2590404 2398902 2781906 2297527 2883281
## Feb 2017        2397300 2197255 2597345 2091357 2703242
## Mar 2017        2925896 2717300 3134492 2606876 3244916
## Apr 2017        3069038 2851876 3286199 2736917 3401158
## May 2017        3407891 3182143 3633638 3062640 3753141
## Jun 2017        3418243 3183886 3652601 3059825 3776662
## Jul 2017        4041915 3798919 4284912 3670284 4413547
## Aug 2017        3946209 3694541 4197877 3561316 4331102

ARIMA or Box-Jenkins

adf.test(US_tourism_TSW)
## Warning in adf.test(US_tourism_TSW): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  US_tourism_TSW
## Dickey-Fuller = -11.032, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
nsdiffs(US_tourism_TSW)
## [1] 1
tsdisplay(diff(US_tourism_TSW,12))

+ The residual plot obtained is not stationary; Hence doing the differetiation one more time
tsdisplay(diff(diff(US_tourism_TSW,12)))

US_tourism_TSW_diff <- diff(diff(US_tourism_TSW,12))
Acf(US_tourism_TSW_diff, lag.max = 36)

Pacf(US_tourism_TSW_diff, lag.max = 36)

arima_fit <- Arima(US_tourism_TSW, order=c(0,1,2), seasonal=c(2,1,2))
arima_fit
## Series: US_tourism_TSW 
## ARIMA(0,1,2)(2,1,2)[12]                    
## 
## Coefficients:
##           ma1     ma2     sar1     sar2    sma1    sma2
##       -0.5552  0.2355  -1.1425  -0.6561  0.6170  0.0465
## s.e.   0.0920  0.1152   0.1184   0.1433  0.1671  0.2162
## 
## sigma^2 estimated as 1.027e+10:  log likelihood=-2112.67
## AIC=4239.34   AICc=4240.06   BIC=4261
tsdisplay(residuals(arima_fit))

auto.arima(US_tourism_TSW, stepwise = FALSE, approximation = FALSE)
## Series: US_tourism_TSW 
## ARIMA(0,1,2)(2,1,1)[12]                    
## 
## Coefficients:
##           ma1     ma2     sar1     sar2    sma1
##       -0.5586  0.2425  -1.1258  -0.6276  0.5921
## s.e.   0.0908  0.1114   0.0910   0.0624  0.1163
## 
## sigma^2 estimated as 1.022e+10:  log likelihood=-2112.69
## AIC=4237.39   AICc=4237.93   BIC=4255.95
arima_fit1 <- Arima(US_tourism_TSW, order=c(0,1,2), seasonal=c(2,1,1))
arima_fit1    
## Series: US_tourism_TSW 
## ARIMA(0,1,2)(2,1,1)[12]                    
## 
## Coefficients:
##           ma1     ma2     sar1     sar2    sma1
##       -0.5586  0.2425  -1.1258  -0.6276  0.5921
## s.e.   0.0908  0.1114   0.0910   0.0624  0.1163
## 
## sigma^2 estimated as 1.022e+10:  log likelihood=-2112.69
## AIC=4237.39   AICc=4237.93   BIC=4255.95
arima_fit2 <- auto.arima(US_tourism_TSW, stepwise = FALSE, approximation = FALSE)
arima_fit2
## Series: US_tourism_TSW 
## ARIMA(0,1,2)(2,1,1)[12]                    
## 
## Coefficients:
##           ma1     ma2     sar1     sar2    sma1
##       -0.5586  0.2425  -1.1258  -0.6276  0.5921
## s.e.   0.0908  0.1114   0.0910   0.0624  0.1163
## 
## sigma^2 estimated as 1.022e+10:  log likelihood=-2112.69
## AIC=4237.39   AICc=4237.93   BIC=4255.95
res <- residuals(arima_fit2)
fit <- fitted(arima_fit2)
act <- arima_fit2$x
plot(res, main="Residuals of the Holt-Winter Forecasts", xlab="Time")

2 Do a Histogram plot of residuals. What does the plot indicate?

hist(res, xlab = "Residuals", main="Histogram of Residuals from the Arima Forecasts")

xyplot(res ~ fit, pch=16, cex = 1.5, abline=0)

xyplot(res ~ act, pch = 16, cex=1.5, abline = 0)

Acf(res, main = "ACF of Residuals of Arima Forecasts")

accuracy_arima <- accuracy(arima_fit2)
accuracy_arima
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 430.4661 95778.21 69782.74 -0.1387604 3.166028 0.4052835
##                    ACF1
## Training set 0.02986358
arima_forecast <- forecast(arima_fit2,h=12)
plot(arima_forecast)

+ Next two years. Show table and plot
arima_forecast <- forecast(arima_fit2,h=24)
plot(arima_forecast)

summary(arima_forecast)
## 
## Forecast method: ARIMA(0,1,2)(2,1,1)[12]
## 
## Model Information:
## Series: US_tourism_TSW 
## ARIMA(0,1,2)(2,1,1)[12]                    
## 
## Coefficients:
##           ma1     ma2     sar1     sar2    sma1
##       -0.5586  0.2425  -1.1258  -0.6276  0.5921
## s.e.   0.0908  0.1114   0.0910   0.0624  0.1163
## 
## sigma^2 estimated as 1.022e+10:  log likelihood=-2112.69
## AIC=4237.39   AICc=4237.93   BIC=4255.95
## 
## Error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 430.4661 95778.21 69782.74 -0.1387604 3.166028 0.4052835
##                    ACF1
## Training set 0.02986358
## 
## Forecasts:
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3424479 3294931 3554027 3226353 3622606
## Oct 2016        3387667 3246061 3529274 3171099 3604236
## Nov 2016        2759190 2592154 2926226 2503731 3014649
## Dec 2016        3164014 2974939 3353090 2874848 3453180
## Jan 2017        2717708 2508907 2926510 2398374 3037042
## Feb 2017        2426245 2199427 2653064 2079356 2773134
## Mar 2017        2888452 2644946 3131958 2516041 3260862
## Apr 2017        3186597 2927476 3445718 2790305 3582888
## May 2017        3390784 3116937 3664631 2971971 3809597
## Jun 2017        3372468 3084647 3660289 2932284 3812652
## Jul 2017        4048860 3747713 4350006 3588296 4509424
## Aug 2017        4012055 3698148 4325963 3531975 4492135
## Sep 2017        3561230 3213754 3908706 3029811 4092649
## Oct 2017        3489157 3123064 3855250 2929267 4049048
## Nov 2017        2864522 2476065 3252979 2270428 3458616
## Dec 2017        3260740 2851137 3670342 2634307 3887172
## Jan 2018        2746188 2316480 3175896 2089006 3403370
## Feb 2018        2548432 2099517 2997346 1861876 3234987
## Mar 2018        3076444 2609112 3543776 2361721 3791167
## Apr 2018        3179619 2694568 3664670 2437797 3921440
## May 2018        3571821 3069676 4073966 2803857 4339785
## Jun 2018        3515563 2996887 4034239 2722317 4308809
## Jul 2018        4235398 3700702 4770093 3417651 5053144
## Aug 2018        4115946 3565696 4666196 3274411 4957480

Accuracy Measures

final_accuracy <- rbind(accuracy_naive,accuracy_snaive,accuracy_ets,accuracy_HW, accuracy_arima)

rownames(final_accuracy) <- c("Naive Method", "Seasonal Naive","ETS", "Holt-Winter", "Arima")

final_accuracy
##                         ME      RMSE       MAE        MPE      MAPE
## Naive Method    15081.8514 301461.96 240545.59 -0.2643395 11.065429
## Seasonal Naive 115998.1585 205853.83 172182.52  4.5953436  7.741379
## ETS             21543.2855  96382.89  72295.69  0.7561520  3.456318
## Holt-Winter     15175.0336 123247.45  95446.26  0.4461066  4.438431
## Arima             430.4661  95778.21  69782.74 -0.1387604  3.166028
##                     MASE        ACF1
## Naive Method   1.3970383 -0.20247055
## Seasonal Naive 1.0000000  0.57322037
## ETS            0.4198782  0.02090026
## Holt-Winter    0.5543318  0.20382996
## Arima          0.4052835  0.02986358

Best:

+ ME:   Mean Error : 430.4661 - lowest -> Arima Method
+ RMSE: Root Mean Squared Error: 95778.21 -> (Penalizes large errors) lowest -> Arima Method
+ MAE:  Mean Absolute Error: 69782.74 -> lowest -> Arima Method
+ MPE:  Mean Percentage Error: -0.1387604 -> closest to zero -> Arima Method
+ MAPE: Mean Absolute Percentage Error: 3.166028 -> lowest -> Arima Method
+ MASE: Mean Absolute Scaled Error: 0.4052835 -> lowest -> Arima Method
+ ACF1: Autocorrelation of errors at lag 1: 0.02986358 -> lowest -> Arima Method

Worse:

+ ME:   Mean Error : 115998.1585 -> Seasonal Naive
+ RMSE: Root Mean Squared Error: 301461.96  -> Naive
+ MAE:  Mean Absolute Error: 240545.59 -> Naive
+ MPE:  Mean Percentage Error: 4.5953436  -> Seasonal Naive
+ MAPE: Mean Absolute Percentage Error: 11.065429 -> Naive Method
+ MASE: Mean Absolute Scaled Error: 1.3970383 -> Naive Method
+ ACF1: Autocorrelation of errors at lag 1: 0.57322037 -> Seasonal Naive Method

Based on all the accuracy measures that we see above, Forecasts performed by the ARIMA METHOD seems to be the best forecast method for the given dataset.


Conclusion

#Naive Method
snaive_forecast <- snaive(US_tourism_TSW, 24)
plot(snaive_forecast)

naive_forecast <- naive(US_tourism_TSW, 24)
plot(naive_forecast)
lines(snaive_forecast$mean,col="black")

#SimpleSmoothing
ma1_forecast <- ma(US_tourism_TSW,order=1)
forecast_ma <- forecast(ma1_forecast,h=24)
plot(forecast_ma)

#Holt-Winter
HW_forecast <- HoltWinters(US_tourism_TSW)
HW_forecast_US <- forecast.HoltWinters(HW_forecast, h= 24)
plot(HW_forecast_US)

#Arima
arima_forecast <- forecast(arima_fit2,h=24)
plot(arima_forecast)

arima_forecast
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Sep 2016        3424479 3294931 3554027 3226353 3622606
## Oct 2016        3387667 3246061 3529274 3171099 3604236
## Nov 2016        2759190 2592154 2926226 2503731 3014649
## Dec 2016        3164014 2974939 3353090 2874848 3453180
## Jan 2017        2717708 2508907 2926510 2398374 3037042
## Feb 2017        2426245 2199427 2653064 2079356 2773134
## Mar 2017        2888452 2644946 3131958 2516041 3260862
## Apr 2017        3186597 2927476 3445718 2790305 3582888
## May 2017        3390784 3116937 3664631 2971971 3809597
## Jun 2017        3372468 3084647 3660289 2932284 3812652
## Jul 2017        4048860 3747713 4350006 3588296 4509424
## Aug 2017        4012055 3698148 4325963 3531975 4492135
## Sep 2017        3561230 3213754 3908706 3029811 4092649
## Oct 2017        3489157 3123064 3855250 2929267 4049048
## Nov 2017        2864522 2476065 3252979 2270428 3458616
## Dec 2017        3260740 2851137 3670342 2634307 3887172
## Jan 2018        2746188 2316480 3175896 2089006 3403370
## Feb 2018        2548432 2099517 2997346 1861876 3234987
## Mar 2018        3076444 2609112 3543776 2361721 3791167
## Apr 2018        3179619 2694568 3664670 2437797 3921440
## May 2018        3571821 3069676 4073966 2803857 4339785
## Jun 2018        3515563 2996887 4034239 2722317 4308809
## Jul 2018        4235398 3700702 4770093 3417651 5053144
## Aug 2018        4115946 3565696 4666196 3274411 4957480

Rank forecasting methods that best forecast for this time series based on historical values.

  1. ARIMA
  2. Simple Smoothing (ETS)
  3. Holt-Winters
  4. Seasonal Naive Method
  5. Naive Method

Final Question