ISB CBA 7 Forecasting Analytics Project

R Markdown : Looks into all methods employed to forecast mobile APP visits for next 4 months. Essence of this excercise captured in our project final report.

if (!require("pacman")) install.packages("pacman")

## Loading required package: pacman

pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")
##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMonthly.csv")
##### Visualise data
#par(mfrow=c(2, 2))
appVisits.ts <- ts(eCommerceTrafficData$Mapp, start = c(2014,1), freq = 12)
plot(appVisits.ts, xlab = "", ylab = "Mobile App Visits", bty = "l", col = 'blue')

## Partition of data of appVisits.ts 
totalRecords <- length(appVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(appVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2014   2869521   3229880   4684880   8562608  12314911  14010825  16747894
## 2015  59594456  63379983  88641001  95791175 110746932 120414108 142212584
## 2016 164123172 163592403 146220981 234314655 318261988 180043453 164339827
##            Aug       Sep       Oct       Nov       Dec
## 2014  21862188  27516353  45598418  38226274  62578002
## 2015 149697399 120369528 215154440 159265899 153666682
## 2016 170356543 156311048 257243995 143502849 178127276

valid.ts <- window(appVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017 182350139 153941816 172790970 165909172

### Fist lets do the seasonal naive forecast
naive.pred <- snaive(train.ts, h = 4)
naive.pred

##          Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## Jan 2017      164123172  38714513 289531831 -27672821 355919165
## Feb 2017      163592403  38183744 289001062 -28203590 355388396
## Mar 2017      146220981  20812322 271629640 -45575012 338016974
## Apr 2017      234314655 108905996 359723314  42518662 426110648

valid.ts

##            Jan       Feb       Mar       Apr
## 2017 182350139 153941816 172790970 165909172

plot(train.ts, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'blue') 
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 84093185 97856897 85406772 58.642109 59.55748 1.0000000
## Test set     -8314779 38113746 30713257 -5.531781 18.21805 0.3596115
##                    ACF1 Theil's U
## Training set  0.5203794        NA
## Test set     -0.3936157  2.152795

# ############ Linear Trend Model
train.lm <- tslm(train.ts ~ trend) 
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2) 
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)

## 
## Call:
## tslm(formula = train.ts ~ trend)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -80040587 -17740267  -4609969   9641164 135465921 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -14149552   12948413  -1.093    0.282    
## trend         6791228     610281  11.128 7.05e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38040000 on 34 degrees of freedom
## Multiple R-squared:  0.7846, Adjusted R-squared:  0.7782 
## F-statistic: 123.8 on 1 and 34 DF,  p-value: 7.05e-13

accuracy(train.lm.pred,valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  8.536113e-10 36966949 24400937  -3.835911 41.23754 0.2857026
## Test set     -7.856471e+07 79931000 78564711 -47.196241 47.19624 0.9198885
##                    ACF1 Theil's U
## Training set  0.2191944        NA
## Test set     -0.3316361  4.371476

############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.expo.trend)

## 
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2543 -0.3711  0.1625  0.4911  0.7834 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16.015356   0.206429   77.58  < 2e-16 ***
## trend        0.108549   0.009729   11.16 6.58e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6064 on 34 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.428e+18 on 1 and 34 DF,  p-value: < 2.2e-16

accuracy(train.lm.expo.trend.pred, valid.ts)

##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set   -7393697  85434099  55662537  -20.90667  59.56623 0.6517345
## Test set     -424921938 431610594 424921938 -253.82955 253.82955 4.9752722
##                   ACF1 Theil's U
## Training set 0.7047932        NA
## Test set     0.1836972  23.60487

################## Quadratic or polynomial  trend model
train.lm.poly.trend <- tslm(train.ts ~ (trend + I(trend^2)))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.poly.trend)

## 
## Call:
## tslm(formula = train.ts ~ (trend + I(trend^2)))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -60213621 -22151192  -6681768   9505072 135747440 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -42422082   19392079  -2.188   0.0359 *  
## trend        11255312    2416781   4.657 5.05e-05 ***
## I(trend^2)    -120651      63356  -1.904   0.0656 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36650000 on 33 degrees of freedom
## Multiple R-squared:  0.8059, Adjusted R-squared:  0.7941 
## F-statistic: 68.51 on 2 and 33 DF,  p-value: 1.786e-12

accuracy(train.lm.poly.trend.pred, valid.ts)

##                         ME     RMSE      MAE      MPE    MAPE      MASE
## Training set -1.189215e-09 35089145 24174358  49.8526 87.8712 0.2830497
## Test set     -4.317378e+07 44634615 43173778 -26.0901 26.0901 0.5055077
##                    ACF1 Theil's U
## Training set  0.1449017        NA
## Test set     -0.5654056  2.455583

### check seasoanlity in this Monthly time series
fit <- tbats(appVisits.ts)
fit

## BATS(0, {0,0}, 0.947, -)
## 
## Call: tbats(y = appVisits.ts)
## 
## Parameters
##   Lambda: 0
##   Alpha: 0.2481564
##   Beta: 0.06782401
##   Damping Parameter: 0.946678
## 
## Seed States:
##            [,1]
## [1,] 14.8142911
## [2,]  0.3440338
## 
## Sigma: 0.2198401
## AIC: 1487.595

seasonal <- !is.null(fit$seasonal)
# this shows there is seasonlity present
seasonal

## [1] FALSE

## Season is false but lets check model with seasonality
# Only Season
train.lm.season <- tslm(train.ts ~ season)
train.lm.season.pred <- forecast(train.lm.season, h = nValid, level = 0)
plot(train.lm.season.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.season.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.season)

## 
## Call:
## tslm(formula = train.ts ~ season)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -134793033  -73598812   17280932   55280179  171154044 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75529050   52851985   1.429    0.166
## season2      1205039   74743994   0.016    0.987
## season3      4319904   74743994   0.058    0.954
## season4     37360430   74743994   0.500    0.622
## season5     71578894   74743994   0.958    0.348
## season6     29293746   74743994   0.392    0.699
## season7     32237719   74743994   0.431    0.670
## season8     38442994   74743994   0.514    0.612
## season9     25869927   74743994   0.346    0.732
## season10    97136568   74743994   1.300    0.206
## season11    38135958   74743994   0.510    0.615
## season12    55928270   74743994   0.748    0.462
## 
## Residual standard error: 91540000 on 24 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  -0.2843 
## F-statistic: 0.2957 on 11 and 24 DF,  p-value: 0.9803

accuracy(train.lm.season.pred, valid.ts)

##                    ME     RMSE      MAE        MPE      MAPE      MASE
## Training set        0 74743994 64578017 -296.09231 331.19869 0.7561229
## Test set     82497631 84883868 82497631   48.61995  48.61995 0.9659378
##                    ACF1 Theil's U
## Training set  0.8759955        NA
## Test set     -0.3078260  3.951817

### Additive Seasonal Model with  trend
train.lm.tns <- tslm(train.ts ~ trend + season)
train.lm.tns.pred <- forecast(train.lm.tns, h = nValid, level = 0)
plot(train.lm.tns.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tns)

## 
## Call:
## tslm(formula = train.ts ~ trend + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -54255343 -18349290   1625161  15308228  87060860 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15571901   22843647  -0.682    0.502    
## trend         7007765     628513  11.150 9.44e-11 ***
## season2      -5802726   30175153  -0.192    0.849    
## season3      -9695627   30194784  -0.321    0.751    
## season4      16337134   30227473   0.540    0.594    
## season5      43547832   30273178   1.438    0.164    
## season6      -5745081   30331841  -0.189    0.851    
## season7      -9808874   30403386  -0.323    0.750    
## season8     -10611364   30487723  -0.348    0.731    
## season9     -30192197   30584746  -0.987    0.334    
## season10     34066679   30694334   1.110    0.279    
## season11    -31941696   30816354  -1.037    0.311    
## season12    -21157149   30950659  -0.684    0.501    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36950000 on 23 degrees of freedom
## Multiple R-squared:  0.8625, Adjusted R-squared:  0.7908 
## F-statistic: 12.02 on 12 and 23 DF,  p-value: 3.269e-07

accuracy(train.lm.tns.pred, valid.ts)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -9.832648e-10 29533407 23388827   2.893108 63.68492 0.2738521
## Test set     -8.568874e+07 87988475 85688738 -51.428443 51.42844 1.0033014
##                    ACF1 Theil's U
## Training set  0.3944743        NA
## Test set     -0.3078260   4.77675

### Additive Seasonal Model with quadratic trend
train.lm.qtns <- tslm(train.ts ~ trend + I(trend^2) + season)
train.lm.qtns.pred <- forecast(train.lm.qtns, h = nValid, level = 0)
plot(train.lm.qtns.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.qtns.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.qtns)

## 
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2) + season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -47428101 -18669535  -2525287  17621512  88444246 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -40472852   25346295  -1.597   0.1246    
## trend        11273206    2331960   4.834 7.88e-05 ***
## I(trend^2)    -115282      60933  -1.892   0.0717 .  
## season2      -6955548   28619756  -0.243   0.8102    
## season3     -11770706   28652883  -0.411   0.6852    
## season4      13570361   28700162   0.473   0.6410    
## season5      40319931   28756876   1.402   0.1748    
## season6      -9203547   28819877  -0.319   0.7525    
## season7     -13267339   28887583  -0.459   0.6505    
## season8     -13839265   28959959  -0.478   0.6375    
## season9     -32958969   29038507  -1.135   0.2686    
## season10     31991600   29126234   1.098   0.2839    
## season11    -33094518   29227633  -1.132   0.2697    
## season12    -21157149   29348633  -0.721   0.4786    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35040000 on 22 degrees of freedom
## Multiple R-squared:  0.8817, Adjusted R-squared:  0.8119 
## F-statistic: 12.62 on 13 and 22 DF,  p-value: 2.51e-07

accuracy(train.lm.qtns.pred, valid.ts)

##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  1.396984e-09 27389176 21047477  52.22086 98.03125 0.2464380
## Test set     -5.248747e+07 54690985 52487470 -31.61438 31.61438 0.6145586
##                    ACF1 Theil's U
## Training set  0.3007832        NA
## Test set     -0.5454901  2.956618

### multiplicative Seasonal Model with  trend
train.lm.tnms <- tslm(train.ts ~ trend*season)
train.lm.tnms.pred <- forecast(train.lm.tnms, h = nValid, level = 0)
plot(train.lm.tnms.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.tnms.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.tnms)

## 
## Call:
## tslm(formula = train.ts ~ trend * season)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -36361012 -17129455  -6095840  10491863  45600892 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    -11816678   33173232  -0.356  0.72787   
## trend            6718902    2037815   3.297  0.00637 **
## season2         -4994039   48094172  -0.104  0.91901   
## season3          3205569   49330289   0.065  0.94926   
## season4        -25795207   50618286  -0.510  0.61957   
## season5        -57787891   51954304  -1.112  0.28780   
## season6         -7884998   53334734  -0.148  0.88492   
## season7          2739833   54756219   0.050  0.96092   
## season8          2043426   56215643   0.036  0.97160   
## season9           520296   57710128   0.009  0.99295   
## season10        -9526150   59237020  -0.161  0.87492   
## season11        24591634   60793879   0.405  0.69296   
## season12        27724724   62378460   0.444  0.66462   
## trend:season2     -37130    2881906  -0.013  0.98993   
## trend:season3    -821565    2881906  -0.285  0.78045   
## trend:season4    2687433    2881906   0.933  0.36945   
## trend:season5    6028893    2881906   2.092  0.05837 . 
## trend:season6     199124    2881906   0.069  0.94605   
## trend:season7    -569238    2881906  -0.198  0.84673   
## trend:season8    -531637    2881906  -0.184  0.85672   
## trend:season9   -1352457    2881906  -0.469  0.64727   
## trend:season10   2099664    2881906   0.729  0.48024   
## trend:season11  -2332378    2881906  -0.809  0.43409   
## trend:season12  -1904349    2881906  -0.661  0.52123   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34580000 on 12 degrees of freedom
## Multiple R-squared:  0.9372, Adjusted R-squared:  0.8167 
## F-statistic: 7.781 on 23 and 12 DF,  p-value: 0.0003454

accuracy(train.lm.tnms.pred, valid.ts)

##                         ME      RMSE      MAE        MPE     MAPE
## Training set -2.328306e-09  19966432 17031787   6.777368 43.20576
## Test set     -8.972845e+07 102561537 89728449 -54.025813 54.02581
##                   MASE       ACF1 Theil's U
## Training set 0.1994196  0.6728281        NA
## Test set     1.0506011 -0.2950032  5.656306

### on the basis of MAPE, we will use model polynomial model
Visits.lm.tns <- tslm(appVisits.ts ~ trend + I(trend^2))
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 4, level = 0)
plot(Visits.lm.tns.pred, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = appVisits.ts ~ trend + I(trend^2))
## 
## Coefficients:
## (Intercept)        trend   I(trend^2)  
##   -51912812     13128194      -180670  
## 
## 
## Error measures:
##                        ME     RMSE      MAE     MPE     MAPE      MASE
## Training set 3.633113e-09 34519251 23538175 57.9854 95.79685 0.3033527
##                  ACF1
## Training set 0.190379
## 
## Forecasts:
##          Point Forecast      Lo 0      Hi 0
## May 2017      182636375 182636375 182636375
## Jun 2017      180768934 180768934 180768934
## Jul 2017      178540153 178540153 178540153
## Aug 2017      175950031 175950031 175950031

names(Visits.lm.tns.pred)

##  [1] "model"     "mean"      "lower"     "upper"     "level"    
##  [6] "x"         "method"    "newdata"   "residuals" "fitted"

round(Visits.lm.tns.pred$mean,0)

##            May       Jun       Jul       Aug
## 2017 182636375 180768934 178540153 175950031

accuracy(Visits.lm.tns.pred)

##                        ME     RMSE      MAE     MPE     MAPE      MASE
## Training set 3.633113e-09 34519251 23538175 57.9854 95.79685 0.3033527
##                  ACF1
## Training set 0.190379

# Calcualting Residuals of training period
train.lm.qt <- tslm(train.ts ~ trend + I(trend^2))
train.lm.qt.pred <- forecast(train.lm.qt, h = nValid, level = 0)
res <- residuals(train.lm.qt)
#par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")

Acf(res, lag.max = 12, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")

## ACF plot shows that errors are random

################# Now use Smoothing Methods ###########################
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
last.ma <- tail(ma.trailing, 1) 
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2014,nTrain+1), end = c(2014,totalRecords), freq = 12) 
plot(train.ts, xlab = "Monthly", ylab = "Mobile App Visits", bty = "l", col = 'black') 
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2) 
lines(valid.ts)

summary(ma.trailing.pred)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 189700000 189700000 189700000 189700000 189700000 189700000

accuracy(ma.trailing.pred, valid.ts)

##                 ME     RMSE      MAE       MPE     MAPE       ACF1
## Test set -20955158 23372636 20955158 -12.84802 12.84802 -0.6362683
##          Theil's U
## Test set    1.3043

#Simple exponential smoothing
#The result is a series with no monthly seasonality.
diff.ts <- diff(appVisits.ts, lag = 12)
diff.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  56724935  60150103  83956121  87228567  98432021 106403283 125464690
## 2016 104528716 100212420  57579980 138523480 207515056  59629345  22127243
## 2017  18226967  -9650587  26569989 -68405483                              
##            Aug       Sep       Oct       Nov       Dec
## 2015 127835211  92853175 169556022 121039625  91088680
## 2016  20659144  35941520  42089555 -15763050  24460594
## 2017

totalRecords <- length(appVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(diff.ts,  end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2015  56724935  60150103  83956121  87228567  98432021 106403283 125464690
## 2016 104528716 100212420  57579980 138523480 207515056  59629345  22127243
##            Aug       Sep       Oct       Nov       Dec
## 2015 127835211  92853175 169556022 121039625  91088680
## 2016  20659144  35941520  42089555 -15763050  24460594

valid.ts <- window(diff.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017  18226967  -9650587  26569989 -68405483

#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over 
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood 
#is equivalent to minimizing the RMSE in the training period 
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2) 
ses.pred <- forecast(ses, h = nValid, level = 0) 
ses.pred

##          Point Forecast     Lo 0     Hi 0
## Jan 2017       44146557 44146557 44146557
## Feb 2017       44146557 44146557 44146557
## Mar 2017       44146557 44146557 44146557
## Apr 2017       44146557 44146557 44146557

plot(ses.pred, ylab = "Visits (Difference)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

accuracy(ses.pred,valid.ts)

##                     ME     RMSE      MAE       MPE      MAPE      MASE
## Training set  -8597834 46992556 37747688 -26.44339  89.54415 0.4925129
## Test set     -52461335 64309516 52461335 128.40736 232.58565 0.6844892
##                    ACF1 Theil's U
## Training set  0.3676872        NA
## Test set     -0.3936157  1.013092

## Holt's Winter model for trend
totalRecords <- length(appVisits.ts)
nValid <- 4
nTrain <- totalRecords - nValid
train.ts <- window(appVisits.ts,start = c (2014,1), end = c(2014,nTrain))
train.ts

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2014   2869521   3229880   4684880   8562608  12314911  14010825  16747894
## 2015  59594456  63379983  88641001  95791175 110746932 120414108 142212584
## 2016 164123172 163592403 146220981 234314655 318261988 180043453 164339827
##            Aug       Sep       Oct       Nov       Dec
## 2014  21862188  27516353  45598418  38226274  62578002
## 2015 149697399 120369528 215154440 159265899 153666682
## 2016 170356543 156311048 257243995 143502849 178127276

valid.ts <- window(appVisits.ts, start = c (2014,nTrain+1), end = c(2014,totalRecords))
valid.ts

##            Jan       Feb       Mar       Apr
## 2017 182350139 153941816 172790970 165909172

ses.opt <- ets(train.ts, model = "MAN", restrict = FALSE)
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0) 
accuracy(ses.pred, valid.ts)

##                     ME      RMSE       MAE       MPE     MAPE      MASE
## Training set  -8597834  46992556  37747688 -26.44339 89.54415 0.4925129
## Test set     124601468 125030745 124601468  73.73870 73.73870 1.6257375
##                    ACF1 Theil's U
## Training set  0.3676872        NA
## Test set     -0.6362683  6.174715

accuracy(ses.opt.pred, valid.ts)

##                    ME     RMSE      MAE       MPE      MAPE       MASE
## Training set  4505258 40662195 25982820 11.058455 25.761235 0.30422435
## Test set     -3214459 10310554  7996170 -2.272052  4.921869 0.09362455
##                    ACF1 Theil's U
## Training set -0.0901213        NA
## Test set     -0.6501759 0.5206197

ses.opt

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = train.ts, model = "MAN", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.4558 
##     beta  = 0.1247 
##     phi   = 0.8 
## 
##   Initial states:
##     l = -6483708.473 
##     b = 4040627.5949 
## 
##   sigma:  0.4331
## 
##      AIC     AICc      BIC 
## 1372.740 1375.636 1382.241

plot(ses.opt.pred, ylab = "Visits (Differenced)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
lines(ses.opt.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

## Using MMA model we will forecast next 4 months of visits
appVisits <- ets(appVisits.ts, model = "MAN", restrict = FALSE)
appVisits.pred <- forecast(appVisits, h = 4, level = 0) 
appVisits.pred

##          Point Forecast      Lo 0      Hi 0
## May 2017      164937047 164937047 164937047
## Jun 2017      163971799 163971799 163971799
## Jul 2017      163199601 163199601 163199601
## Aug 2017      162581842 162581842 162581842

appVisits.pred$mean

##            May       Jun       Jul       Aug
## 2017 164937047 163971799 163199601 162581842

accuracy(appVisits.pred)

##                   ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 4065329 38895955 24370173 9.847372 23.75835 0.3140753
##                    ACF1
## Training set -0.1122878

appVisits

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = appVisits.ts, model = "MAN", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.4922 
##     beta  = 0.1082 
##     phi   = 0.8 
## 
##   Initial states:
##     l = -6483708.6573 
##     b = 4040628.5953 
## 
##   sigma:  0.412
## 
##      AIC     AICc      BIC 
## 1532.177 1534.723 1542.311

ISB CBA 7 Forecasting Analytics Project | 29-Apr-2017

Team 24 Anurag Singhvi (71620007) Debanjan Paul (71620021) Soubhagya Rout (71620070) Vineet Garg (71620090

April 28, 2017

R Markdown : Looks into all methods employed to forecast mobile APP visits for next 4 months. Essence of this excercise captured in our project final report.