R Markdown : Looks into all methods employed to forecast Desktop visits for next 4 months. Essence of this excercise captured in our project final report.
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")
##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMonthly.csv")
##### Visualise data
#par(mfrow=c(2, 2))
desktopVisits.ts <- ts(eCommerceTrafficData$Desktop, start = c(2014,1), freq = 12)
plot(desktopVisits.ts, xlab = "", ylab = "Desktop Web Visits", bty = "l", col = 'blue')

## Partition of data of desktopVisits.ts
totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(desktopVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts
## Jan Feb Mar Apr May Jun Jul
## 2014 29074938 28901407 31936376 30644954 33099440 32994001 39602406
## 2015 60228320 54436830 59109077 61930299 64043529 60293663 60500117
## 2016 49748236 52965832 46989192 36968515 39092124 39318305 37847881
## Aug Sep Oct Nov Dec
## 2014 45560815 49464731 76709979 51124246 56054406
## 2015 60670910 53166927 66352323 46366841 48011526
## 2016 40305321 38408325 58636821 32401812 38368711
valid.ts <- window(desktopVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts
## Jan Feb Mar Apr
## 2017 39370275 32754124 34762553 33833407
### Fist lets do the seasonal naive forecast
naive.pred <- snaive(train.ts, h = 4)
naive.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2017 49748236 24448256 75048216 11055255 88441217
## Feb 2017 52965832 27665852 78265812 14272851 91658813
## Mar 2017 46989192 21689212 72289172 8296211 85682173
## Apr 2017 36968515 11668535 62268495 -1724466 75661496
valid.ts
## Jan Feb Mar Apr
## 2017 39370275 32754124 34762553 33833407
plot(train.ts, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'blue')
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 245140.7 19741680 17513243 -6.334299 35.92816 1.0000000
## Test set -11487854.0 12995526 11487854 -33.126359 33.12636 0.6559524
## ACF1 Theil's U
## Training set 0.88825313 NA
## Test set -0.06372008 3.544568
# ############ Linear Trend Model
train.lm <- tslm(train.ts ~ trend)
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)
##
## Call:
## tslm(formula = train.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17233761 -10678930 -538363 10304702 29895173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45965259 4241852 10.836 1.43e-12 ***
## trend 84955 199926 0.425 0.674
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12460000 on 34 degrees of freedom
## Multiple R-squared: 0.005283, Adjusted R-squared: -0.02397
## F-statistic: 0.1806 on 1 and 34 DF, p-value: 0.6736
accuracy(train.lm.pred,valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.449482e-09 12110236 10465438 -6.912762 23.85774 0.5975728
## Test set -1.405592e+07 14291462 14055924 -40.646107 40.64611 0.8025883
## ACF1 Theil's U
## Training set 0.6246426 NA
## Test set -0.3188786 4.175378
############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.expo.trend)
##
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41042 -0.23580 0.02536 0.23028 0.53968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.583304 0.090693 193.876 <2e-16 ***
## trend 0.003256 0.004275 0.762 0.452
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2664 on 34 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.227e+15 on 1 and 34 DF, p-value: < 2.2e-16
accuracy(train.lm.expo.trend.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 1538939 12243758 10570101 -3.383864 23.21705 0.6035490
## Test set -13884998 14133831 13884998 -40.167864 40.16786 0.7928285
## ACF1 Theil's U
## Training set 0.6277634 NA
## Test set -0.3024940 4.140457
################## Quadratic or polynomial -ve trend model
train.lm.poly.trend <- tslm(train.ts ~ (trend + I(trend^2)))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.poly.trend)
##
## Call:
## tslm(formula = train.ts ~ (trend + I(trend^2)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13059362 -4978613 -867608 2765997 26495628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23629928 4359029 5.421 5.33e-06 ***
## trend 3611586 543254 6.648 1.45e-07 ***
## I(trend^2) -95314 14241 -6.693 1.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8238000 on 33 degrees of freedom
## Multiple R-squared: 0.578, Adjusted R-squared: 0.5525
## F-statistic: 22.6 on 2 and 33 DF, p-value: 6.561e-07
accuracy(train.lm.poly.trend.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0 7887479 5580374 -2.461382 11.79518 0.3186374
## Test set 13902954 14272037 13902954 39.713807 39.71381 0.7938538
## ACF1 Theil's U
## Training set 0.2021654 NA
## Test set 0.1362109 4.102964
### check seasoanlity in this Monthly time series
fit <- tbats(desktopVisits.ts)
fit
## TBATS(0.001, {0,0}, 0.9, {<12,5>})
##
## Call: tbats(y = desktopVisits.ts)
##
## Parameters
## Lambda: 0.001299
## Alpha: 0.6182117
## Beta: 0.0878412
## Damping Parameter: 0.899772
## Gamma-1 Values: -0.001162561
## Gamma-2 Values: 0.01485927
##
## Seed States:
## [,1]
## [1,] 17.228572090
## [2,] 0.114912930
## [3,] 0.029554630
## [4,] -0.037937340
## [5,] 0.019912324
## [6,] 0.069292702
## [7,] 0.003278349
## [8,] -0.073323405
## [9,] 0.034711907
## [10,] 0.047568036
## [11,] -0.007281941
## [12,] -0.085306426
##
## Sigma: 0.08651201
## AIC: 1395.249
seasonal <- !is.null(fit$seasonal)
# this shows there is seasonlity present
seasonal
## [1] TRUE
## Season is true lets check model with seasonality
# Only Season
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
plot(train.lm.season.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.season.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.season)
##
## Call:
## tslm(formula = train.ts ~ season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17275560 -8598416 -173703 8682679 18749043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46350498 7405414 6.259 1.81e-06 ***
## season2 -915808 10472837 -0.087 0.9310
## season3 -338950 10472837 -0.032 0.9744
## season4 -3169242 10472837 -0.303 0.7648
## season5 -938800 10472837 -0.090 0.9293
## season6 -2148508 10472837 -0.205 0.8392
## season7 -367030 10472837 -0.035 0.9723
## season8 2495184 10472837 0.238 0.8137
## season9 662830 10472837 0.063 0.9501
## season10 20882543 10472837 1.994 0.0576 .
## season11 -3052865 10472837 -0.292 0.7732
## season12 1127716 10472837 0.108 0.9151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12830000 on 24 degrees of freedom
## Multiple R-squared: 0.2561, Adjusted R-squared: -0.08488
## F-statistic: 0.7511 on 11 and 24 DF, p-value: 0.6819
accuracy(train.lm.season.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.131632e-10 10472837 9210317 -5.508366 21.15450 0.5259058
## Test set -1.006441e+07 10288859 10064408 -29.108165 29.10816 0.5746742
## ACF1 Theil's U
## Training set 0.8896044 NA
## Test set -0.3184161 2.997776
### Additive Seasonal Model with trend
train.lm.tns <- tslm(train.ts ~ trend + season)
train.lm.tns.pred <- forecast(train.lm.tns, h = nValid, level = 0)
plot(train.lm.tns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tns.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.tns)
##
## Call:
## tslm(formula = train.ts ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17030419 -8843556 -173703 8866534 18749043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46084929 8099103 5.690 8.57e-06 ***
## trend 20428 222836 0.092 0.9278
## season2 -936237 10698452 -0.088 0.9310
## season3 -379806 10705412 -0.035 0.9720
## season4 -3230527 10717002 -0.301 0.7658
## season5 -1020514 10733206 -0.095 0.9251
## season6 -2250650 10754005 -0.209 0.8361
## season7 -489600 10779371 -0.045 0.9642
## season8 2352185 10809272 0.218 0.8297
## season9 499403 10843671 0.046 0.9637
## season10 20698688 10882525 1.902 0.0698 .
## season11 -3257149 10925787 -0.298 0.7683
## season12 903004 10973404 0.082 0.9351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13100000 on 23 degrees of freedom
## Multiple R-squared: 0.2564, Adjusted R-squared: -0.1316
## F-statistic: 0.6607 on 12 and 23 DF, p-value: 0.7697
accuracy(train.lm.tns.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.035813e-09 10470924 9223936 -5.493565 21.15385 0.5266835
## Test set -1.055469e+07 10768926 10554690 -30.508574 30.50857 0.6026691
## ACF1 Theil's U
## Training set 0.8905480 NA
## Test set -0.3184161 3.129804
### Additive Seasonal Model with quadratic trend
train.lm.qtns <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.qtns.pred <- forecast(train.lm.qtns, h = nValid, level = 0)
plot(train.lm.qtns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.qtns.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.qtns)
##
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2) + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10161706 -2842748 -471742 3842405 9468055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25202707 4514897 5.582 1.30e-05 ***
## trend 3597476 415388 8.661 1.54e-08 ***
## I(trend^2) -96677 10854 -8.907 9.49e-09 ***
## season2 -1903006 5097993 -0.373 0.7125
## season3 -2119992 5103894 -0.415 0.6819
## season4 -5550774 5112316 -1.086 0.2893
## season5 -3727469 5122418 -0.728 0.4745
## season6 -5150959 5133641 -1.003 0.3266
## season7 -3389909 5145701 -0.659 0.5169
## season8 -354770 5158593 -0.069 0.9458
## season9 -1820844 5172585 -0.352 0.7282
## season10 18958502 5188211 3.654 0.0014 **
## season11 -4223918 5206273 -0.811 0.4259
## season12 903004 5227827 0.173 0.8644
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6241000 on 22 degrees of freedom
## Multiple R-squared: 0.8386, Adjusted R-squared: 0.7432
## F-statistic: 8.79 on 13 and 22 DF, p-value: 6.013e-06
accuracy(train.lm.qtns.pred, valid.ts)
## ME RMSE MAE MPE MAPE
## Training set -5.175025e-10 4878792 3996013 -0.9752233 8.369716
## Test set 1.728827e+07 17994194 17288273 49.6413890 49.641389
## MASE ACF1 Theil's U
## Training set 0.2281709 0.7147569 NA
## Test set 0.9871543 0.2233240 5.333458
### multiplicative Seasonal Model with trend
train.lm.tnms <- tslm(train.ts ~ trend*season)
train.lm.tnms.pred <- forecast(train.lm.tnms, h = nValid, level = 0)
plot(train.lm.tnms.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tnms.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.tnms)
##
## Call:
## tslm(formula = train.ts ~ trend * season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9374522 -6938911 -2305702 3840306 18749043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35152462 14395403 2.442 0.0310 *
## trend 861387 884303 0.974 0.3492
## season2 -3755353 20870291 -0.180 0.8602
## season3 1451077 21406699 0.068 0.9471
## season4 3813087 21965621 0.174 0.8651
## season5 6014418 22545381 0.267 0.7942
## season6 4306300 23144414 0.186 0.8555
## season7 12220005 23761262 0.514 0.6164
## season8 18072799 24394574 0.741 0.4730
## season9 21535221 25043100 0.860 0.4067
## season10 48647641 25705689 1.892 0.0828 .
## season11 26087504 26381283 0.989 0.3423
## season12 30011448 27068906 1.109 0.2893
## trend:season2 141297 1250593 0.113 0.9119
## trend:season3 -234187 1250593 -0.187 0.8546
## trend:season4 -597906 1250593 -0.478 0.6412
## trend:season5 -611692 1250593 -0.489 0.6336
## trend:season6 -597875 1250593 -0.478 0.6412
## trend:season7 -934493 1250593 -0.747 0.4693
## trend:season8 -1080366 1250593 -0.864 0.4046
## trend:season9 -1322071 1250593 -1.057 0.3113
## trend:season10 -1614436 1250593 -1.291 0.2210
## trend:season11 -1641489 1250593 -1.313 0.2139
## trend:season12 -1598291 1250593 -1.278 0.2254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15010000 on 12 degrees of freedom
## Multiple R-squared: 0.4908, Adjusted R-squared: -0.4851
## F-statistic: 0.5029 on 23 and 12 DF, p-value: 0.9243
accuracy(train.lm.tnms.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.067190e-10 8664361 7023820 -3.52261 15.29524 0.4010576
## Test set -2.659293e+07 27623767 26592933 -76.10115 76.10115 1.5184471
## ACF1 Theil's U
## Training set 0.89035318 NA
## Test set 0.04916565 7.249066
### Seasonal Model only
# We have fins out that RMSE was lowest for this case hence used this model with season
# this will consist 13 predictors 11 for seasons and 2 for trends
### on the basis of MAPE, we will use seasonal to do forecasting while using model driven approach
Visits.lm.tns <- tslm(desktopVisits.ts ~ season)
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
plot(Visits.lm.tns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = desktopVisits.ts ~ season)
##
## Coefficients:
## (Intercept) season2 season3 season4 season5
## 44605442 -2340894 -1406143 -3761149 806255
## season6 season7 season8 season9 season10
## -403453 1378026 4240240 2407885 22627599
## season11 season12
## -1307809 2872772
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 3.734044e-10 10327237 9178878 -5.329291 21.04732 0.5512021
## ACF1
## Training set 0.8974096
##
## Forecasts:
## Point Forecast Lo 0 Hi 0
## May 2017 45411698 45411698 45411698
## Jun 2017 44201990 44201990 44201990
## Jul 2017 45983468 45983468 45983468
## Aug 2017 48845682 48845682 48845682
names(Visits.lm.tns.pred)
## [1] "model" "mean" "lower" "upper" "level"
## [6] "x" "method" "newdata" "residuals" "fitted"
round(Visits.lm.tns.pred$mean,0)
## May Jun Jul Aug
## 2017 45411698 44201990 45983468 48845682
accuracy(Visits.lm.tns.pred)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.734044e-10 10327237 9178878 -5.329291 21.04732 0.5512021
## ACF1
## Training set 0.8974096
# Calcualting Residuals of training period
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
res <- residuals(train.lm.season)
#par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")

Acf(res, lag.max = 12, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")

# Check Corelation , we chose Trend model becuase of low MAPE and Run AR model
train.res.arima <- Arima(train.lm.season$residuals, order = c(3,1,1))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)
plot(train.lm.season$residuals, ylab = "Residuals", xlab = "Time", bty = "l", xaxt = "n", main ="")
#axis(1, at = seq(20015, 2017, 1),labels = format(seq(2015, 2016, 1)))
lines(train.res.arima.pred$fitted, lwd = 2, col = "blue")

summary(train.res.arima)
## Series: train.lm.season$residuals
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## -0.0062 -0.0771 0.3052 0.2614
## s.e. 0.3631 0.1720 0.1620 0.3714
##
## sigma^2 estimated as 1.331e+13: log likelihood=-576.58
## AIC=1163.16 AICc=1165.23 BIC=1170.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 146878.7 3385608 2762126 -2.530912 87.46429 0.1577164
## ACF1
## Training set 0.01477983
# ACF plot of residuals of residuals
Acf(train.res.arima$residuals, lag.max = 12, main="ACF of residuals of residuals")

## After visualization of Residual-Residuals, it looks like we lefy with only noise
# now do adjusted forecast
modifedForecastValid <- train.res.arima.pred$mean + train.lm.season.pred$mean
accuracy(modifedForecastValid,valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -1450453 2342350 2111235 -4.459261 6.137639 -0.2893773 0.6802824
accuracy(train.lm.season)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.131632e-10 10472837 9210317 -5.508366 21.1545 0.8828569
modifedForecastValid
## Jan Feb Mar Apr
## 2017 38048711 36288366 37353387 34831708
# above results shows improve MAPE from 21 to 6.1
# Now do the forecast
Visits.lm.tns <- tslm(desktopVisits.ts ~ season)
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
visit.res.arima <- Arima(Visits.lm.tns$residuals, order = c(3,1,1))
visit.res.arima.pred <- forecast(visit.res.arima, h = 4)
ModifiedForecast <- visit.res.arima.pred$mean + Visits.lm.tns.pred$mean
ModifiedForecast
## May Jun Jul Aug
## 2017 36699704 36193520 38176419 40543531
################# Now use Smoothing Methods ###########################
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2014,nTrain+1), end = c(2014,totalRecords), freq = 12)
plot(train.ts, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black')
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2)
lines(valid.ts)

summary(ma.trailing.pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42590000 42590000 42590000 42590000 42590000 42590000
accuracy(ma.trailing.pred, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -7407500 7824881 7407500 -21.64459 21.64459 -0.3377797 2.364315
# using twice differencing
#Simple exponential smoothing
#The result is a series with no monthly seasonality.
diff.ts <- diff(desktopVisits.ts, lag = 12)
diff.ts
## Jan Feb Mar Apr May Jun Jul
## 2015 31153382 25535423 27172701 31285345 30944089 27299662 20897711
## 2016 -10480084 -1470998 -12119885 -24961784 -24951405 -20975358 -22652236
## 2017 -10377961 -20211708 -12226639 -3135108
## Aug Sep Oct Nov Dec
## 2015 15110095 3702196 -10357656 -4757405 -8042880
## 2016 -20365589 -14758602 -7715502 -13965029 -9642815
## 2017
totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(diff.ts, end = c(2014,nTrain))
train.ts
## Jan Feb Mar Apr May Jun Jul
## 2015 31153382 25535423 27172701 31285345 30944089 27299662 20897711
## 2016 -10480084 -1470998 -12119885 -24961784 -24951405 -20975358 -22652236
## Aug Sep Oct Nov Dec
## 2015 15110095 3702196 -10357656 -4757405 -8042880
## 2016 -20365589 -14758602 -7715502 -13965029 -9642815
valid.ts <- window(diff.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts
## Jan Feb Mar Apr
## 2017 -10377961 -20211708 -12226639 -3135108
#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood
#is equivalent to minimizing the RMSE in the training period
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2)
ses.pred <- forecast(ses, h = nValid, level = 0)
ses.pred
## Point Forecast Lo 0 Hi 0
## Jan 2017 -12897651 -12897651 -12897651
## Feb 2017 -12897651 -12897651 -12897651
## Mar 2017 -12897651 -12897651 -12897651
## Apr 2017 -12897651 -12897651 -12897651
plot(ses.pred, ylab = "Visits (Difference)", xlab = "Time", bty = "l", xaxt = "n", main ="", flty = 2)
lines(ses.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

accuracy(ses.pred,valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -7201209 13104268 10423493 45.96912 110.99282 0.3297824
## Test set 1409797 6237022 5066826 -76.24357 94.33719 0.1603061
## ACF1 Theil's U
## Training set 0.73670149 NA
## Test set -0.06372008 0.8405701
## Holt's Winter model for seasonality
totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(desktopVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts
## Jan Feb Mar Apr May Jun Jul
## 2014 29074938 28901407 31936376 30644954 33099440 32994001 39602406
## 2015 60228320 54436830 59109077 61930299 64043529 60293663 60500117
## 2016 49748236 52965832 46989192 36968515 39092124 39318305 37847881
## Aug Sep Oct Nov Dec
## 2014 45560815 49464731 76709979 51124246 56054406
## 2015 60670910 53166927 66352323 46366841 48011526
## 2016 40305321 38408325 58636821 32401812 38368711
valid.ts <- window(desktopVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts
## Jan Feb Mar Apr
## 2017 39370275 32754124 34762553 33833407
ses.opt <- ets(train.ts, model = "MAA", restrict = FALSE)
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0)
accuracy(ses.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -7201209 13104268 10423493 45.96912 110.9928 0.3297824
## Test set 48077741 48143815 48077741 136.84006 136.8401 1.5211015
## ACF1 Theil's U
## Training set 0.7367015 NA
## Test set -0.3377797 12.69082
accuracy(ses.opt.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -708040.2 3846683 3052959 -1.558153 6.512519 0.1743229
## Test set -3745969.6 4614515 3768634 -11.156365 11.213932 0.2151877
## ACF1 Theil's U
## Training set 0.1351516 NA
## Test set -0.3544818 1.391771
ses.opt
## ETS(M,A,A)
##
## Call:
## ets(y = train.ts, model = "MAA", restrict = FALSE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0502
## gamma = 1e-04
##
## Initial states:
## l = 34650089.5582
## b = 1250888.1227
## s=-2233072 -5263891 19652168 -1991675 623300.7 -1509264
## -1937441 -934100.9 -3819095 -826233.8 -513868.5 -1246827
##
## sigma: 0.0777
##
## AIC AICc BIC
## 1250.322 1284.322 1277.242
plot(ses.opt.pred, ylab = "Visits (Differenced)", xlab = "Time", bty = "l", xaxt = "n", main ="", flty = 2)
lines(ses.opt.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

## Using MAA model we will forecast next 4 months of visits
desktopVisits <- ets(desktopVisits.ts, model = "MAA", restrict = FALSE)
desktopVisits.pred <- forecast(desktopVisits, h = nValid, level = 0)
desktopVisits.pred
## Point Forecast Lo 0 Hi 0
## May 2017 36779198 36779198 36779198
## Jun 2017 35837265 35837265 35837265
## Jul 2017 35022150 35022150 35022150
## Aug 2017 37748349 37748349 37748349
desktopVisits.pred$mean
## May Jun Jul Aug
## 2017 36779198 35837265 35022150 37748349
accuracy(desktopVisits.pred)
## ME RMSE MAE MPE MAPE MASE
## Training set -807458.8 3998663 3054776 -1.899861 6.813952 0.1834428
## ACF1
## Training set 0.09762681
desktopVisits
## ETS(M,A,A)
##
## Call:
## ets(y = desktopVisits.ts, model = "MAA", restrict = FALSE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0369
## gamma = 1e-04
##
## Initial states:
## l = 35889739.4354
## b = 1252668.9292
## s=-2076710 -5107529 20117253 -1917731 7385.388 -2658328
## -1781079 -777739.3 -3662733 -669872 -357507 -1115410
##
## sigma: 0.0846
##
## AIC AICc BIC
## 1394.474 1422.292 1423.185