ISB CBA 7 Forecasting Analytics Project

R Markdown : Looks into all methods employed to forecast total( all channels) visits for next 4 months. Essence of this excercise captured in our project final report.

if (!require("pacman")) install.packages("pacman")

## Loading required package: pacman

pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")
##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMonthly.csv")
##### Visualise data
#par(mfrow=c(2, 2))
TotalVisits.ts <- ts(eCommerceTrafficData$TotalVisits, start = c(2014,1), freq = 12)
plot(TotalVisits.ts, xlab = "", ylab = "Total Visits", bty = "l", col = 'blue')

## Partition of data of TotalVisits.ts 
totalRecords <- length(TotalVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(TotalVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2014  31944459  32131287  36621256  39207562  45414351  47004826  56350300
## 2015 119822776 117816813 147750078 157721474 174790461 180707771 202712701
## 2016 213871408 216558235 193210173 271283170 369011753 241251452 226731320
##            Aug       Sep       Oct       Nov       Dec
## 2014  67423003  76981084 122308397  89350520 118632408
## 2015 210368309 173536455 281506763 205632740 201678208
## 2016 237626221 223646066 368585606 207938840 259179985

valid.ts <- window(TotalVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017 271255935 232961354 257716308 253290742

### Fist lets do the seasonal naive forecast
naive.pred <- snaive(train.ts, h = 4)
naive.pred

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Jan 2017      213871408  79796149 347946667   8820994 418921822
## Feb 2017      216558235  82482976 350633494  11507821 421608649
## Mar 2017      193210173  59134914 327285432 -11840241 398260587
## Apr 2017      271283170 137207911 405358429  66232756 476333584

valid.ts

##            Jan       Feb       Mar       Apr
## 2017 271255935 232961354 257716308 253290742

plot(train.ts, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'blue') 
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)

##                    ME      RMSE      MAE      MPE     MAPE      MASE
## Training set 94396866 104619480 94396866 46.60743 46.60743 1.0000000
## Test set     30075338  44852021 39071552 11.53067 15.08241 0.4139073
##                    ACF1 Theil's U
## Training set  0.4172954        NA
## Test set     -0.5643028  1.644198

# ############ Linear Trend Model
train.lm <- tslm(train.ts ~ trend) 
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2) 
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)

## 
## Call:
## tslm(formula = train.ts ~ trend)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -86355258 -16856671  -7258716  13320696 121467951 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21584039   13454549   1.604    0.118    
## trend        7791716     634136  12.287 4.66e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39530000 on 34 degrees of freedom
## Multiple R-squared:  0.8162, Adjusted R-squared:  0.8108 
## F-statistic:   151 on 1 and 34 DF,  p-value: 4.663e-14

accuracy(train.lm.pred,valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE     MASE
## Training set  1.553316e-09 38411935 26875918  -6.090579 17.09837 0.284712
## Test set     -6.775902e+07 70089770 67759019 -27.113791 27.11379 0.717810
##                     ACF1 Theil's U
## Training set -0.01747662        NA
## Test set     -0.38459280  2.971831

############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.expo.trend)

## 
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58890 -0.26274  0.04587  0.27694  0.51574 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.583163   0.101671  172.94  < 2e-16 ***
## trend        0.061671   0.004792   12.87 1.27e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2987 on 34 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.176e+18 on 1 and 34 DF,  p-value: < 2.2e-16

accuracy(train.lm.expo.trend.pred, valid.ts)

##                        ME      RMSE       MAE        MPE     MAPE
## Training set     728981.9  57563311  41206764  -4.327135 25.73511
## Test set     -212277060.7 215582827 212277061 -84.310693 84.31069
##                   MASE       ACF1 Theil's U
## Training set 0.4365268 0.45952272        NA
## Test set     2.2487723 0.02527446  8.985159

################## Quadratic or polynomial  trend model
train.lm.poly.trend <- tslm(train.ts ~ (trend + I(trend^2)))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.poly.trend)

## 
## Call:
## tslm(formula = train.ts ~ (trend + I(trend^2)))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -64328360 -17151624  -8866950  11297494 121780706 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9825514   19991007  -0.491   0.6263    
## trend       12751119    2491424   5.118  1.3e-05 ***
## I(trend^2)   -134038      65313  -2.052   0.0481 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37780000 on 33 degrees of freedom
## Multiple R-squared:  0.837,  Adjusted R-squared:  0.8271 
## F-statistic: 84.72 on 2 and 33 DF,  p-value: 1.003e-13

accuracy(train.lm.poly.trend.pred, valid.ts)

##                         ME     RMSE      MAE         MPE     MAPE
## Training set -7.241599e-10 36172881 24925354  -0.6620252 17.72114
## Test set     -2.844123e+07 32014324 28441229 -11.5528748 11.55287
##                   MASE       ACF1 Theil's U
## Training set 0.2640485 -0.1381656        NA
## Test set     0.3012942 -0.5560946   1.37739

### check seasoanlity in this Monthly time series
fit <- tbats(TotalVisits.ts)
fit

## BATS(0, {0,0}, 0.953, -)
## 
## Call: tbats(y = TotalVisits.ts)
## 
## Parameters
##   Lambda: 0
##   Alpha: 0.166763
##   Beta: 0.05184388
##   Damping Parameter: 0.953035
## 
## Seed States:
##            [,1]
## [1,] 16.9940603
## [2,]  0.1551613
## 
## Sigma: 0.1683346
## AIC: 1519.949

seasonal <- !is.null(fit$seasonal)
# this shows there is seasonlity present
seasonal

## [1] FALSE

## Season is false but lets check model with seasonality
# Only Season
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
plot(train.lm.season.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.season.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.season)

## 
## Call:
## tslm(formula = train.ts ~ season)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -150991171  -83114900   18685748   65648742  172606231 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 121879548   57919216   2.104    0.046 *
## season2        289231   81910141   0.004    0.997  
## season3       3980955   81910141   0.049    0.962  
## season4      34191188   81910141   0.417    0.680  
## season5      74525974   81910141   0.910    0.372  
## season6      34441802   81910141   0.420    0.678  
## season7      40051893   81910141   0.489    0.629  
## season8      49926297   81910141   0.610    0.548  
## season9      36174987   81910141   0.442    0.663  
## season10    135587374   81910141   1.655    0.111  
## season11     45761152   81910141   0.559    0.582  
## season12     71283986   81910141   0.870    0.393  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 100300000 on 24 degrees of freedom
## Multiple R-squared:  0.1642, Adjusted R-squared:  -0.2189 
## F-statistic: 0.4286 on 11 and 24 DF,  p-value: 0.9278

accuracy(train.lm.season.pred, valid.ts)

##                         ME      RMSE       MAE       MPE     MAPE
## Training set -1.672055e-09  81910141  69634653 -52.23028 80.81227
## Test set      1.223112e+08 123921329 122311194  48.04318 48.04318
##                   MASE       ACF1 Theil's U
## Training set 0.7376797  0.8697204        NA
## Test set     1.2957124 -0.4169193  4.469117

### Additive Seasonal Model with  trend
train.lm.tns <- tslm(train.ts ~ trend + season)
train.lm.tns.pred <- forecast(train.lm.tns, h = nValid, level = 0)
plot(train.lm.tns.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tns)

## 
## Call:
## tslm(formula = train.ts ~ trend + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -56594305 -21827872    821665  17507799  78209366 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19616277   21446035   0.915   0.3698    
## trend         7866406     590059  13.332 2.63e-12 ***
## season2      -7577175   28328988  -0.267   0.7915    
## season3     -11751856   28347418  -0.415   0.6823    
## season4      10591971   28378107   0.373   0.7124    
## season5      43060352   28421016   1.515   0.1434    
## season6      -4890225   28476089  -0.172   0.8652    
## season7      -7146540   28543257  -0.250   0.8045    
## season8      -5138542   28622434  -0.180   0.8591    
## season9     -26756256   28713521  -0.932   0.3611    
## season10     64789725   28816404   2.248   0.0344 *  
## season11    -32902902   28930959  -1.137   0.2671    
## season12    -15246474   29057047  -0.525   0.6048    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34690000 on 23 degrees of freedom
## Multiple R-squared:  0.9042, Adjusted R-squared:  0.8543 
## F-statistic:  18.1 on 12 and 23 DF,  p-value: 6.329e-09

accuracy(train.lm.tns.pred, valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -2.089817e-10 27726505 21795572  -4.398126 17.45735 0.2308930
## Test set     -6.648254e+07 69400255 66482538 -26.565181 26.56518 0.7042876
##                    ACF1 Theil's U
## Training set  0.2829072        NA
## Test set     -0.4169193  2.919761

### Additive Seasonal Model with quadratic trend
train.lm.qtns <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.qtns.pred <- forecast(train.lm.qtns, h = nValid, level = 0)
plot(train.lm.qtns.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.qtns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.qtns)

## 
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2) + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -45758007 -14549072  -4991206  13818442  79757408 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8248489   22966646  -0.359   0.7229    
## trend        12639537    2113023   5.982 5.09e-06 ***
## I(trend^2)    -129004      55212  -2.337   0.0290 *  
## season2      -8867210   25932777  -0.342   0.7356    
## season3     -14073920   25962794  -0.542   0.5932    
## season4       7495886   26005634   0.288   0.7759    
## season5      39448253   26057023   1.514   0.1443    
## season6      -8760332   26114110  -0.335   0.7405    
## season7     -11016647   26175459  -0.421   0.6779    
## season8      -8750641   26241040  -0.333   0.7419    
## season9     -29852342   26312213  -1.135   0.2688    
## season10     62467661   26391704   2.367   0.0272 *  
## season11    -34192938   26483583  -1.291   0.2101    
## season12    -15246474   26593223  -0.573   0.5722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31750000 on 22 degrees of freedom
## Multiple R-squared:  0.9233, Adjusted R-squared:  0.8779 
## F-statistic: 20.36 on 13 and 22 DF,  p-value: 2.709e-09

accuracy(train.lm.qtns.pred, valid.ts)

##                         ME     RMSE      MAE         MPE     MAPE
## Training set -7.248546e-10 24817731 19596782   0.7711109 18.75613
## Test set     -2.932952e+07 33090826 29329517 -11.8540806 11.85408
##                   MASE       ACF1 Theil's U
## Training set 0.2075999  0.1058391        NA
## Test set     0.3107043 -0.6860664  1.403082

### multiplicative Seasonal Model with  trend
train.lm.tnms <- tslm(train.ts ~ trend*season)
train.lm.tnms.pred <- forecast(train.lm.tnms, h = nValid, level = 0)
plot(train.lm.tnms.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tnms.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tnms)

## 
## Call:
## tslm(formula = train.ts ~ trend * season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -21615061 -12063243  -3157054   9087888  40781261 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     23335784   28477094   0.819 0.428501    
## trend            7580290    1749334   4.333 0.000973 ***
## season2         -8749392   41285764  -0.212 0.835724    
## season3          4656646   42346892   0.110 0.914255    
## season4        -21982120   43452555  -0.506 0.622100    
## season5        -56145088   44599441  -1.259 0.232015    
## season6        -12699404   45784452  -0.277 0.786214    
## season7          3710683   47004705   0.079 0.938379    
## season8          6634046   48257527   0.137 0.892938    
## season9          6386892   49540447   0.129 0.899554    
## season10         8377030   50851186   0.165 0.871894    
## season11        30657776   52187649   0.587 0.567789    
## season12        29280173   53547910   0.547 0.594536    
## trend:season2     104167    2473932   0.042 0.967107    
## trend:season3   -1055751    2473932  -0.427 0.677117    
## trend:season4    2089528    2473932   0.845 0.414846    
## trend:season5    5902936    2473932   2.386 0.034382 *  
## trend:season6     513320    2473932   0.207 0.839106    
## trend:season7    -481080    2473932  -0.194 0.849069    
## trend:season8    -488489    2473932  -0.197 0.846777    
## trend:season9   -1469249    2473932  -0.594 0.563612    
## trend:season10   2681261    2473932   1.084 0.299747    
## trend:season11  -2639110    2473932  -1.067 0.307069    
## trend:season12  -1724141    2473932  -0.697 0.499130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29690000 on 12 degrees of freedom
## Multiple R-squared:  0.9634, Adjusted R-squared:  0.8933 
## F-statistic: 13.73 on 23 and 12 DF,  p-value: 1.72e-05

accuracy(train.lm.tnms.pred, valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  9.313226e-10 17139903 13406819  -2.352404 10.47283 0.1420261
## Test set     -6.644341e+07 79497358 66443412 -26.611594 26.61159 0.7038731
##                    ACF1 Theil's U
## Training set  0.5916994        NA
## Test set     -0.4457964  3.373949

### on the basis of MAPE, we will use polynomial trend to do forecasting while using model driven approach 
Visits.lm.tns <- tslm(TotalVisits.ts ~ trend + I(trend^2))
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
plot(Visits.lm.tns.pred, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = TotalVisits.ts ~ trend + I(trend^2))
## 
## Coefficients:
## (Intercept)        trend   I(trend^2)  
##   -16214324     14008122      -174189  
## 
## 
## Error measures:
##                         ME     RMSE      MAE           MPE     MAPE
## Training set -3.731202e-10 35056957 23531453 -0.0009186393 17.38559
##                   MASE       ACF1
## Training set 0.2720612 -0.1161433
## 
## Forecasts:
##          Point Forecast      Lo 0      Hi 0
## May 2017      265306197 265306197 265306197
## Jun 2017      264856594 264856594 264856594
## Jul 2017      264058612 264058612 264058612
## Aug 2017      262912250 262912250 262912250

names(Visits.lm.tns.pred)

##  [1] "model"     "mean"      "lower"     "upper"     "level"    
##  [6] "x"         "method"    "newdata"   "residuals" "fitted"

round(Visits.lm.tns.pred$mean,0)

##            May       Jun       Jul       Aug
## 2017 265306197 264856594 264058612 262912250

accuracy(Visits.lm.tns.pred)

##                         ME     RMSE      MAE           MPE     MAPE
## Training set -3.731202e-10 35056957 23531453 -0.0009186393 17.38559
##                   MASE       ACF1
## Training set 0.2720612 -0.1161433

# Calcualting Residuals of training period
train.lm.qt <- tslm(train.ts ~ trend + I(trend^2))
train.lm.qt.pred <- forecast(train.lm.qt, h = nValid, level = 0)
res <- residuals(train.lm.qt)
#par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")

Acf(res, lag.max = 12, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")

## ACF plot shows that errors are random

################# Now use Smoothing Methods ###########################
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1) 
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2014,nTrain+1), end = c(2014,totalRecords), freq = 12) 
plot(train.ts, xlab = "Monthly", ylab = "Total Visits", bty = "l", col = 'black') 
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2) 
lines(valid.ts)

summary(ma.trailing.pred)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 252400000 252400000 252400000 252400000 252400000 252400000

accuracy(ma.trailing.pred, valid.ts)

##               ME     RMSE      MAE       MPE     MAPE       ACF1 Theil's U
## Test set 1398232 13805576 11121482 0.2523255 4.426086 -0.5927464 0.4241695

#Simple exponential smoothing
#The result is a series with no monthly seasonality.
diff.ts <- diff(TotalVisits.ts, lag = 12)
diff.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  87878317  85685526 111128822 118513912 129376110 133702945 146362401
## 2016  94048632  98741422  45460095 113561696 194221292  60543681  24018619
## 2017  57384527  16403119  64506135 -17992428                              
##            Aug       Sep       Oct       Nov       Dec
## 2015 142945306  96555371 159198366 116282220  83045800
## 2016  27257912  50109611  87078843   2306100  57501777
## 2017

totalRecords <- length(TotalVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(diff.ts,  end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  87878317  85685526 111128822 118513912 129376110 133702945 146362401
## 2016  94048632  98741422  45460095 113561696 194221292  60543681  24018619
##            Aug       Sep       Oct       Nov       Dec
## 2015 142945306  96555371 159198366 116282220  83045800
## 2016  27257912  50109611  87078843   2306100  57501777

valid.ts <- window(diff.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017  57384527  16403119  64506135 -17992428

#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over 
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood 
#is equivalent to minimizing the RMSE in the training period 
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2) 
ses.pred <- forecast(ses, h = nValid, level = 0) 
ses.pred

##          Point Forecast     Lo 0     Hi 0
## Jan 2017       60077281 60077281 60077281
## Feb 2017       60077281 60077281 60077281
## Mar 2017       60077281 60077281 60077281
## Apr 2017       60077281 60077281 60077281

plot(ses.pred, ylab = "Visits (Difference)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

accuracy(ses.pred,valid.ts)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  -9960066 42150393 34282425 -167.3729 184.0797 0.5682420
## Test set     -30001943 44802839 32216370   42.4553 177.9292 0.5339964
##                    ACF1 Theil's U
## Training set  0.1618327        NA
## Test set     -0.5643028 0.4438489

## Holt's Winter model for trend
totalRecords <- length(TotalVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(TotalVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2014  31944459  32131287  36621256  39207562  45414351  47004826  56350300
## 2015 119822776 117816813 147750078 157721474 174790461 180707771 202712701
## 2016 213871408 216558235 193210173 271283170 369011753 241251452 226731320
##            Aug       Sep       Oct       Nov       Dec
## 2014  67423003  76981084 122308397  89350520 118632408
## 2015 210368309 173536455 281506763 205632740 201678208
## 2016 237626221 223646066 368585606 207938840 259179985

valid.ts <- window(TotalVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017 271255935 232961354 257716308 253290742

ses.opt <- ets(train.ts, model = "MMA", restrict = FALSE)
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0) 
accuracy(ses.pred, valid.ts)

##                     ME      RMSE       MAE        MPE      MAPE     MASE
## Training set  -9960066  42150393  34282425 -167.37286 184.07973 0.568242
## Test set     193728803 194215056 193728803   76.25839  76.25839 3.211116
##                    ACF1 Theil's U
## Training set  0.1618327        NA
## Test set     -0.5927464  7.318424

accuracy(ses.opt.pred, valid.ts)

##                    ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -3486295 41193738 30125100 -19.047986 34.177373 0.3191324
## Test set      5747393 17817734 16638420   2.026787  6.447628 0.1762603
##                    ACF1 Theil's U
## Training set  0.2166489        NA
## Test set     -0.6826918 0.6571848

ses.opt

## ETS(M,Md,A) 
## 
## Call:
##  ets(y = train.ts, model = "MMA", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.5845 
##     beta  = 1e-04 
##     gamma = 0.0702 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 14915389.5956 
##     b = 3.0166 
##     s=-14752803 -16628065 49364406 -19187229 1554158 -177624.8
##            -6408161 59911998 10114577 -25721323 -22412503 -15657428
## 
##   sigma:  0.2802
## 
##      AIC     AICc      BIC 
## 1427.001 1467.236 1455.504

plot(ses.opt.pred, ylab = "Visits (Differenced)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.opt.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

## Using MMA model we will forecast next 4 months of visits
TotalVisits <- ets(TotalVisits.ts, model = "MMA", restrict = FALSE)
TotalVisits.pred <- forecast(TotalVisits, h = 4, level = 0) 
TotalVisits.pred

##          Point Forecast      Lo 0      Hi 0
## May 2017      313084346 314800278 314800278
## Jun 2017      254727298 248824412 248824412
## Jul 2017      252242364 244468257 244468257
## Aug 2017      256032658 244604761 244604761

TotalVisits.pred$mean

##            May       Jun       Jul       Aug
## 2017 313084346 254727298 252242364 256032658

accuracy(TotalVisits.pred)

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -2851494 39029433 29022224 -16.15458 31.10948 0.3355432
##                   ACF1
## Training set 0.2192869

TotalVisits

## ETS(M,Md,A) 
## 
## Call:
##  ets(y = TotalVisits.ts, model = "MMA", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.5632 
##     beta  = 1e-04 
##     gamma = 0.0974 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 15101049.7279 
##     b = 2.9139 
##     s=-14025835 -15901098 67595314 -25772931 -6161550 -9922877
##            -5681191 60638967 10841548 -24994353 -21685535 -14930458
## 
##   sigma:  0.2675
## 
##      AIC     AICc      BIC 
## 1585.867 1618.439 1616.267

ISB CBA 7 Forecasting Analytics Project | 29-Apr-2017

Team 24 Anurag Singhvi (71620007) Debanjan Paul (71620021) Soubhagya Rout (71620070) Vineet Garg (71620090

April 28, 2017

R Markdown : Looks into all methods employed to forecast total( all channels) visits for next 4 months. Essence of this excercise captured in our project final report.