ISB CBA 7 Forecasting Analytics Project

R Markdown : Looks into all methods employed to forecast Desktop visits for next 4 months. Essence of this excercise captured in our project final report.

if (!require("pacman")) install.packages("pacman")

## Loading required package: pacman

pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")
##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMonthly.csv")
##### Visualise data
#par(mfrow=c(2, 2))
desktopVisits.ts <- ts(eCommerceTrafficData$Desktop, start = c(2014,1), freq = 12)
plot(desktopVisits.ts, xlab = "", ylab = "Desktop Web Visits", bty = "l", col = 'blue')

## Partition of data of desktopVisits.ts 
totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(desktopVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 2014 29074938 28901407 31936376 30644954 33099440 32994001 39602406
## 2015 60228320 54436830 59109077 61930299 64043529 60293663 60500117
## 2016 49748236 52965832 46989192 36968515 39092124 39318305 37847881
##           Aug      Sep      Oct      Nov      Dec
## 2014 45560815 49464731 76709979 51124246 56054406
## 2015 60670910 53166927 66352323 46366841 48011526
## 2016 40305321 38408325 58636821 32401812 38368711

valid.ts <- window(desktopVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##           Jan      Feb      Mar      Apr
## 2017 39370275 32754124 34762553 33833407

### Fist lets do the seasonal naive forecast
naive.pred <- snaive(train.ts, h = 4)
naive.pred

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2017       49748236 24448256 75048216 11055255 88441217
## Feb 2017       52965832 27665852 78265812 14272851 91658813
## Mar 2017       46989192 21689212 72289172  8296211 85682173
## Apr 2017       36968515 11668535 62268495 -1724466 75661496

valid.ts

##           Jan      Feb      Mar      Apr
## 2017 39370275 32754124 34762553 33833407

plot(train.ts, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'blue') 
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)

##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set    245140.7 19741680 17513243  -6.334299 35.92816 1.0000000
## Test set     -11487854.0 12995526 11487854 -33.126359 33.12636 0.6559524
##                     ACF1 Theil's U
## Training set  0.88825313        NA
## Test set     -0.06372008  3.544568

# ############ Linear Trend Model
train.lm <- tslm(train.ts ~ trend) 
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2) 
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)

## 
## Call:
## tslm(formula = train.ts ~ trend)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -17233761 -10678930   -538363  10304702  29895173 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 45965259    4241852  10.836 1.43e-12 ***
## trend          84955     199926   0.425    0.674    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12460000 on 34 degrees of freedom
## Multiple R-squared:  0.005283,   Adjusted R-squared:  -0.02397 
## F-statistic: 0.1806 on 1 and 34 DF,  p-value: 0.6736

accuracy(train.lm.pred,valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  1.449482e-09 12110236 10465438  -6.912762 23.85774 0.5975728
## Test set     -1.405592e+07 14291462 14055924 -40.646107 40.64611 0.8025883
##                    ACF1 Theil's U
## Training set  0.6246426        NA
## Test set     -0.3188786  4.175378

############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.expo.trend)

## 
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41042 -0.23580  0.02536  0.23028  0.53968 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.583304   0.090693 193.876   <2e-16 ***
## trend        0.003256   0.004275   0.762    0.452    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2664 on 34 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.227e+15 on 1 and 34 DF,  p-value: < 2.2e-16

accuracy(train.lm.expo.trend.pred, valid.ts)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   1538939 12243758 10570101  -3.383864 23.21705 0.6035490
## Test set     -13884998 14133831 13884998 -40.167864 40.16786 0.7928285
##                    ACF1 Theil's U
## Training set  0.6277634        NA
## Test set     -0.3024940  4.140457

################## Quadratic or polynomial -ve trend model
train.lm.poly.trend <- tslm(train.ts ~ (trend + I(trend^2)))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.poly.trend)

## 
## Call:
## tslm(formula = train.ts ~ (trend + I(trend^2)))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -13059362  -4978613   -867608   2765997  26495628 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 23629928    4359029   5.421 5.33e-06 ***
## trend        3611586     543254   6.648 1.45e-07 ***
## I(trend^2)    -95314      14241  -6.693 1.27e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8238000 on 33 degrees of freedom
## Multiple R-squared:  0.578,  Adjusted R-squared:  0.5525 
## F-statistic:  22.6 on 2 and 33 DF,  p-value: 6.561e-07

accuracy(train.lm.poly.trend.pred, valid.ts)

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set        0  7887479  5580374 -2.461382 11.79518 0.3186374
## Test set     13902954 14272037 13902954 39.713807 39.71381 0.7938538
##                   ACF1 Theil's U
## Training set 0.2021654        NA
## Test set     0.1362109  4.102964

### check seasoanlity in this Monthly time series
fit <- tbats(desktopVisits.ts)
fit

## TBATS(0.001, {0,0}, 0.9, {<12,5>})
## 
## Call: tbats(y = desktopVisits.ts)
## 
## Parameters
##   Lambda: 0.001299
##   Alpha: 0.6182117
##   Beta: 0.0878412
##   Damping Parameter: 0.899772
##   Gamma-1 Values: -0.001162561
##   Gamma-2 Values: 0.01485927
## 
## Seed States:
##               [,1]
##  [1,] 17.228572090
##  [2,]  0.114912930
##  [3,]  0.029554630
##  [4,] -0.037937340
##  [5,]  0.019912324
##  [6,]  0.069292702
##  [7,]  0.003278349
##  [8,] -0.073323405
##  [9,]  0.034711907
## [10,]  0.047568036
## [11,] -0.007281941
## [12,] -0.085306426
## 
## Sigma: 0.08651201
## AIC: 1395.249

seasonal <- !is.null(fit$seasonal)
# this shows there is seasonlity present
seasonal

## [1] TRUE

## Season is true lets check model with seasonality
# Only Season
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
plot(train.lm.season.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.season.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.season)

## 
## Call:
## tslm(formula = train.ts ~ season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -17275560  -8598416   -173703   8682679  18749043 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 46350498    7405414   6.259 1.81e-06 ***
## season2      -915808   10472837  -0.087   0.9310    
## season3      -338950   10472837  -0.032   0.9744    
## season4     -3169242   10472837  -0.303   0.7648    
## season5      -938800   10472837  -0.090   0.9293    
## season6     -2148508   10472837  -0.205   0.8392    
## season7      -367030   10472837  -0.035   0.9723    
## season8      2495184   10472837   0.238   0.8137    
## season9       662830   10472837   0.063   0.9501    
## season10    20882543   10472837   1.994   0.0576 .  
## season11    -3052865   10472837  -0.292   0.7732    
## season12     1127716   10472837   0.108   0.9151    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12830000 on 24 degrees of freedom
## Multiple R-squared:  0.2561, Adjusted R-squared:  -0.08488 
## F-statistic: 0.7511 on 11 and 24 DF,  p-value: 0.6819

accuracy(train.lm.season.pred, valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -4.131632e-10 10472837  9210317  -5.508366 21.15450 0.5259058
## Test set     -1.006441e+07 10288859 10064408 -29.108165 29.10816 0.5746742
##                    ACF1 Theil's U
## Training set  0.8896044        NA
## Test set     -0.3184161  2.997776

### Additive Seasonal Model with  trend
train.lm.tns <- tslm(train.ts ~ trend + season)
train.lm.tns.pred <- forecast(train.lm.tns, h = nValid, level = 0)
plot(train.lm.tns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tns)

## 
## Call:
## tslm(formula = train.ts ~ trend + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -17030419  -8843556   -173703   8866534  18749043 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 46084929    8099103   5.690 8.57e-06 ***
## trend          20428     222836   0.092   0.9278    
## season2      -936237   10698452  -0.088   0.9310    
## season3      -379806   10705412  -0.035   0.9720    
## season4     -3230527   10717002  -0.301   0.7658    
## season5     -1020514   10733206  -0.095   0.9251    
## season6     -2250650   10754005  -0.209   0.8361    
## season7      -489600   10779371  -0.045   0.9642    
## season8      2352185   10809272   0.218   0.8297    
## season9       499403   10843671   0.046   0.9637    
## season10    20698688   10882525   1.902   0.0698 .  
## season11    -3257149   10925787  -0.298   0.7683    
## season12      903004   10973404   0.082   0.9351    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13100000 on 23 degrees of freedom
## Multiple R-squared:  0.2564, Adjusted R-squared:  -0.1316 
## F-statistic: 0.6607 on 12 and 23 DF,  p-value: 0.7697

accuracy(train.lm.tns.pred, valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  1.035813e-09 10470924  9223936  -5.493565 21.15385 0.5266835
## Test set     -1.055469e+07 10768926 10554690 -30.508574 30.50857 0.6026691
##                    ACF1 Theil's U
## Training set  0.8905480        NA
## Test set     -0.3184161  3.129804

### Additive Seasonal Model with quadratic trend
train.lm.qtns <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.qtns.pred <- forecast(train.lm.qtns, h = nValid, level = 0)
plot(train.lm.qtns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.qtns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.qtns)

## 
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2) + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -10161706  -2842748   -471742   3842405   9468055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25202707    4514897   5.582 1.30e-05 ***
## trend        3597476     415388   8.661 1.54e-08 ***
## I(trend^2)    -96677      10854  -8.907 9.49e-09 ***
## season2     -1903006    5097993  -0.373   0.7125    
## season3     -2119992    5103894  -0.415   0.6819    
## season4     -5550774    5112316  -1.086   0.2893    
## season5     -3727469    5122418  -0.728   0.4745    
## season6     -5150959    5133641  -1.003   0.3266    
## season7     -3389909    5145701  -0.659   0.5169    
## season8      -354770    5158593  -0.069   0.9458    
## season9     -1820844    5172585  -0.352   0.7282    
## season10    18958502    5188211   3.654   0.0014 ** 
## season11    -4223918    5206273  -0.811   0.4259    
## season12      903004    5227827   0.173   0.8644    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6241000 on 22 degrees of freedom
## Multiple R-squared:  0.8386, Adjusted R-squared:  0.7432 
## F-statistic:  8.79 on 13 and 22 DF,  p-value: 6.013e-06

accuracy(train.lm.qtns.pred, valid.ts)

##                         ME     RMSE      MAE        MPE      MAPE
## Training set -5.175025e-10  4878792  3996013 -0.9752233  8.369716
## Test set      1.728827e+07 17994194 17288273 49.6413890 49.641389
##                   MASE      ACF1 Theil's U
## Training set 0.2281709 0.7147569        NA
## Test set     0.9871543 0.2233240  5.333458

### multiplicative Seasonal Model with  trend
train.lm.tnms <- tslm(train.ts ~ trend*season)
train.lm.tnms.pred <- forecast(train.lm.tnms, h = nValid, level = 0)
plot(train.lm.tnms.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tnms.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tnms)

## 
## Call:
## tslm(formula = train.ts ~ trend * season)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9374522 -6938911 -2305702  3840306 18749043 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    35152462   14395403   2.442   0.0310 *
## trend            861387     884303   0.974   0.3492  
## season2        -3755353   20870291  -0.180   0.8602  
## season3         1451077   21406699   0.068   0.9471  
## season4         3813087   21965621   0.174   0.8651  
## season5         6014418   22545381   0.267   0.7942  
## season6         4306300   23144414   0.186   0.8555  
## season7        12220005   23761262   0.514   0.6164  
## season8        18072799   24394574   0.741   0.4730  
## season9        21535221   25043100   0.860   0.4067  
## season10       48647641   25705689   1.892   0.0828 .
## season11       26087504   26381283   0.989   0.3423  
## season12       30011448   27068906   1.109   0.2893  
## trend:season2    141297    1250593   0.113   0.9119  
## trend:season3   -234187    1250593  -0.187   0.8546  
## trend:season4   -597906    1250593  -0.478   0.6412  
## trend:season5   -611692    1250593  -0.489   0.6336  
## trend:season6   -597875    1250593  -0.478   0.6412  
## trend:season7   -934493    1250593  -0.747   0.4693  
## trend:season8  -1080366    1250593  -0.864   0.4046  
## trend:season9  -1322071    1250593  -1.057   0.3113  
## trend:season10 -1614436    1250593  -1.291   0.2210  
## trend:season11 -1641489    1250593  -1.313   0.2139  
## trend:season12 -1598291    1250593  -1.278   0.2254  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15010000 on 12 degrees of freedom
## Multiple R-squared:  0.4908, Adjusted R-squared:  -0.4851 
## F-statistic: 0.5029 on 23 and 12 DF,  p-value: 0.9243

accuracy(train.lm.tnms.pred, valid.ts)

##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  2.067190e-10  8664361  7023820  -3.52261 15.29524 0.4010576
## Test set     -2.659293e+07 27623767 26592933 -76.10115 76.10115 1.5184471
##                    ACF1 Theil's U
## Training set 0.89035318        NA
## Test set     0.04916565  7.249066

### Seasonal Model only
# We have fins out that RMSE was lowest for this case hence used this model with season
# this will consist 13 predictors 11 for seasons and 2 for trends


### on the basis of MAPE, we will use seasonal to do forecasting while using model driven approach 
Visits.lm.tns <- tslm(desktopVisits.ts ~ season)
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
plot(Visits.lm.tns.pred, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = desktopVisits.ts ~ season)
## 
## Coefficients:
## (Intercept)      season2      season3      season4      season5  
##    44605442     -2340894     -1406143     -3761149       806255  
##     season6      season7      season8      season9     season10  
##     -403453      1378026      4240240      2407885     22627599  
##    season11     season12  
##    -1307809      2872772  
## 
## 
## Error measures:
##                        ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 3.734044e-10 10327237 9178878 -5.329291 21.04732 0.5512021
##                   ACF1
## Training set 0.8974096
## 
## Forecasts:
##          Point Forecast     Lo 0     Hi 0
## May 2017       45411698 45411698 45411698
## Jun 2017       44201990 44201990 44201990
## Jul 2017       45983468 45983468 45983468
## Aug 2017       48845682 48845682 48845682

names(Visits.lm.tns.pred)

##  [1] "model"     "mean"      "lower"     "upper"     "level"    
##  [6] "x"         "method"    "newdata"   "residuals" "fitted"

round(Visits.lm.tns.pred$mean,0)

##           May      Jun      Jul      Aug
## 2017 45411698 44201990 45983468 48845682

accuracy(Visits.lm.tns.pred)

##                        ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 3.734044e-10 10327237 9178878 -5.329291 21.04732 0.5512021
##                   ACF1
## Training set 0.8974096

# Calcualting Residuals of training period
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
res <- residuals(train.lm.season)
#par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")

Acf(res, lag.max = 12, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")

# Check Corelation , we chose Trend model becuase of low MAPE and Run AR model
train.res.arima <- Arima(train.lm.season$residuals, order = c(3,1,1))
train.res.arima.pred <- forecast(train.res.arima, h = nValid) 
plot(train.lm.season$residuals, ylab = "Residuals", xlab = "Time", bty = "l", xaxt = "n", main ="") 
#axis(1, at = seq(20015, 2017, 1),labels = format(seq(2015, 2016, 1))) 
lines(train.res.arima.pred$fitted, lwd = 2, col = "blue")

summary(train.res.arima)

## Series: train.lm.season$residuals 
## ARIMA(3,1,1)                    
## 
## Coefficients:
##           ar1      ar2     ar3     ma1
##       -0.0062  -0.0771  0.3052  0.2614
## s.e.   0.3631   0.1720  0.1620  0.3714
## 
## sigma^2 estimated as 1.331e+13:  log likelihood=-576.58
## AIC=1163.16   AICc=1165.23   BIC=1170.94
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE
## Training set 146878.7 3385608 2762126 -2.530912 87.46429 0.1577164
##                    ACF1
## Training set 0.01477983

# ACF plot of residuals of residuals
Acf(train.res.arima$residuals, lag.max = 12, main="ACF of residuals of residuals")

## After visualization of Residual-Residuals, it looks like we lefy with only noise
# now do adjusted forecast
modifedForecastValid <- train.res.arima.pred$mean + train.lm.season.pred$mean
accuracy(modifedForecastValid,valid.ts)

##                ME    RMSE     MAE       MPE     MAPE       ACF1 Theil's U
## Test set -1450453 2342350 2111235 -4.459261 6.137639 -0.2893773 0.6802824

accuracy(train.lm.season)

##                         ME     RMSE     MAE       MPE    MAPE      MASE
## Training set -4.131632e-10 10472837 9210317 -5.508366 21.1545 0.8828569

modifedForecastValid

##           Jan      Feb      Mar      Apr
## 2017 38048711 36288366 37353387 34831708

# above results shows improve MAPE from 21 to 6.1
# Now do the forecast
Visits.lm.tns <- tslm(desktopVisits.ts ~ season)
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
visit.res.arima <- Arima(Visits.lm.tns$residuals, order = c(3,1,1))
visit.res.arima.pred <- forecast(visit.res.arima, h = 4)
ModifiedForecast <- visit.res.arima.pred$mean + Visits.lm.tns.pred$mean
ModifiedForecast

##           May      Jun      Jul      Aug
## 2017 36699704 36193520 38176419 40543531

################# Now use Smoothing Methods ###########################
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1) 
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2014,nTrain+1), end = c(2014,totalRecords), freq = 12) 
plot(train.ts, xlab = "Monthly", ylab = "Desktop Web Visits", bty = "l", col = 'black') 
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2) 
lines(valid.ts)

summary(ma.trailing.pred)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 42590000 42590000 42590000 42590000 42590000 42590000

accuracy(ma.trailing.pred, valid.ts)

##                ME    RMSE     MAE       MPE     MAPE       ACF1 Theil's U
## Test set -7407500 7824881 7407500 -21.64459 21.64459 -0.3377797  2.364315

# using twice differencing

#Simple exponential smoothing
#The result is a series with no monthly seasonality.
diff.ts <- diff(desktopVisits.ts, lag = 12)
diff.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  31153382  25535423  27172701  31285345  30944089  27299662  20897711
## 2016 -10480084  -1470998 -12119885 -24961784 -24951405 -20975358 -22652236
## 2017 -10377961 -20211708 -12226639  -3135108                              
##            Aug       Sep       Oct       Nov       Dec
## 2015  15110095   3702196 -10357656  -4757405  -8042880
## 2016 -20365589 -14758602  -7715502 -13965029  -9642815
## 2017

totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(diff.ts,  end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  31153382  25535423  27172701  31285345  30944089  27299662  20897711
## 2016 -10480084  -1470998 -12119885 -24961784 -24951405 -20975358 -22652236
##            Aug       Sep       Oct       Nov       Dec
## 2015  15110095   3702196 -10357656  -4757405  -8042880
## 2016 -20365589 -14758602  -7715502 -13965029  -9642815

valid.ts <- window(diff.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017 -10377961 -20211708 -12226639  -3135108

#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over 
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood 
#is equivalent to minimizing the RMSE in the training period 
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2) 
ses.pred <- forecast(ses, h = nValid, level = 0) 
ses.pred

##          Point Forecast      Lo 0      Hi 0
## Jan 2017      -12897651 -12897651 -12897651
## Feb 2017      -12897651 -12897651 -12897651
## Mar 2017      -12897651 -12897651 -12897651
## Apr 2017      -12897651 -12897651 -12897651

plot(ses.pred, ylab = "Visits (Difference)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

accuracy(ses.pred,valid.ts)

##                    ME     RMSE      MAE       MPE      MAPE      MASE
## Training set -7201209 13104268 10423493  45.96912 110.99282 0.3297824
## Test set      1409797  6237022  5066826 -76.24357  94.33719 0.1603061
##                     ACF1 Theil's U
## Training set  0.73670149        NA
## Test set     -0.06372008 0.8405701

## Holt's Winter model for seasonality
totalRecords <- length(desktopVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(desktopVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 2014 29074938 28901407 31936376 30644954 33099440 32994001 39602406
## 2015 60228320 54436830 59109077 61930299 64043529 60293663 60500117
## 2016 49748236 52965832 46989192 36968515 39092124 39318305 37847881
##           Aug      Sep      Oct      Nov      Dec
## 2014 45560815 49464731 76709979 51124246 56054406
## 2015 60670910 53166927 66352323 46366841 48011526
## 2016 40305321 38408325 58636821 32401812 38368711

valid.ts <- window(desktopVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##           Jan      Feb      Mar      Apr
## 2017 39370275 32754124 34762553 33833407

ses.opt <- ets(train.ts, model = "MAA", restrict = FALSE)
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0) 
accuracy(ses.pred, valid.ts)

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -7201209 13104268 10423493  45.96912 110.9928 0.3297824
## Test set     48077741 48143815 48077741 136.84006 136.8401 1.5211015
##                    ACF1 Theil's U
## Training set  0.7367015        NA
## Test set     -0.3377797  12.69082

accuracy(ses.opt.pred, valid.ts)

##                      ME    RMSE     MAE        MPE      MAPE      MASE
## Training set  -708040.2 3846683 3052959  -1.558153  6.512519 0.1743229
## Test set     -3745969.6 4614515 3768634 -11.156365 11.213932 0.2151877
##                    ACF1 Theil's U
## Training set  0.1351516        NA
## Test set     -0.3544818  1.391771

ses.opt

## ETS(M,A,A) 
## 
## Call:
##  ets(y = train.ts, model = "MAA", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0502 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 34650089.5582 
##     b = 1250888.1227 
##     s=-2233072 -5263891 19652168 -1991675 623300.7 -1509264
##            -1937441 -934100.9 -3819095 -826233.8 -513868.5 -1246827
## 
##   sigma:  0.0777
## 
##      AIC     AICc      BIC 
## 1250.322 1284.322 1277.242

plot(ses.opt.pred, ylab = "Visits (Differenced)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.opt.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

## Using MAA model we will forecast next 4 months of visits
desktopVisits <- ets(desktopVisits.ts, model = "MAA", restrict = FALSE)
desktopVisits.pred <- forecast(desktopVisits, h = nValid, level = 0) 
desktopVisits.pred

##          Point Forecast     Lo 0     Hi 0
## May 2017       36779198 36779198 36779198
## Jun 2017       35837265 35837265 35837265
## Jul 2017       35022150 35022150 35022150
## Aug 2017       37748349 37748349 37748349

desktopVisits.pred$mean

##           May      Jun      Jul      Aug
## 2017 36779198 35837265 35022150 37748349

accuracy(desktopVisits.pred)

##                     ME    RMSE     MAE       MPE     MAPE      MASE
## Training set -807458.8 3998663 3054776 -1.899861 6.813952 0.1834428
##                    ACF1
## Training set 0.09762681

desktopVisits

## ETS(M,A,A) 
## 
## Call:
##  ets(y = desktopVisits.ts, model = "MAA", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0369 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 35889739.4354 
##     b = 1252668.9292 
##     s=-2076710 -5107529 20117253 -1917731 7385.388 -2658328
##            -1781079 -777739.3 -3662733 -669872 -357507 -1115410
## 
##   sigma:  0.0846
## 
##      AIC     AICc      BIC 
## 1394.474 1422.292 1423.185

ISB CBA 7 Forecasting Analytics Project | 29-Apr-2017

Team 24 Anurag Singhvi (71620007) Debanjan Paul (71620021) Soubhagya Rout (71620070) Vineet Garg (71620090

April 28, 2017

R Markdown : Looks into all methods employed to forecast Desktop visits for next 4 months. Essence of this excercise captured in our project final report.