ISB CBA 7 Forecasting Analytics Project

R Markdown : Looks into all methods employed to forecast by mobile website visits in next 31 days. Essence of this excercise captured in our project final report.

if (!require("pacman")) install.packages("pacman")

## Loading required package: pacman

pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")

##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMSite.csv")
##### Visualise data
#par(mfrow=c(2, 2))
inds <- seq(as.Date("2016-06-01"), as.Date("2017-04-26"), by = "day")
msiteVisits.ts <- ts(eCommerceTrafficData$Msite, start = c(2016, as.numeric(format(inds[1], "%j"))),
                     frequency = 365)
## Create a time series object
#set.seed(25)
#myts <- ts(rnorm(length(inds)), start = c(2014, as.numeric(format(inds[1], "%j"))), frequency = 365)
plot(msiteVisits.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'blue')

## Partition of data of msiteVisits.ts 
totalRecords <- length(msiteVisits.ts)
nValid <- 30
nTrain <- totalRecords - nValid
train.ts <- window(msiteVisits.ts,   end = c(2016,nTrain+152))
train.ts

## Time Series:
## Start = c(2016, 153) 
## End = c(2017, 87) 
## Frequency = 365 
##   [1]  638924  621551  612524  649303  682003  650997  666294  643216
##   [9]  662607  649403  733549  754677  685341  714513  688101  691029
##  [17]  695338  760045  782677  726363  722317  716809  796145  906379
##  [25]  964083  871043  833100  806597  765161  799605  804905  848118
##  [33]  874755  948782  825695  814920  784606  742827  800109  782769
##  [41]  686708  777702  735898  732430  717800  752718  807568  795744
##  [49]  838381  765791  746786  726628  787495  828946  723347  738310
##  [57]  755544  781441  780184  895473  941232  836298  814334  824858
##  [65]  901296  788971  824617  829795  794045  875051 1064335  990604
##  [73]  959906  934911  955322  961230  809609  888793  828345  769793
##  [81]  826669  872168  775592  823425  857899  863461  810313  891521
##  [89]  965621  853565  864092  907918  846782  852926  865667  924585
##  [97]  864265  847157  899649  875631  866522  915443  995791  871876
## [105]  943203  892953  898668  912152  939879 1036236  955124 1112400
## [113] 1015003  927124  973250 1037797 1106680 1040786 1112751 1159553
## [121] 1049889 1186951 1635477 3310886 3168573 2627580 2396093 2341057
## [129] 1756836 1553280 1550617 1384788 1368609 1428759 1330636 1340249
## [137] 1354607 1400673 1599320 1668215 1681157 1439793 1384893 1488837
## [145] 1499969 1439539 1845368 1631772 1496598 1602927 1504939 1243709
## [153] 1229034 1171021 1158217 1186004 1192900 1205581 1204987 1128667
## [161] 1056104 1025816  974643  942471 1002697 1009399 1013037  964279
## [169] 1012953  983338  972978 1010457 1057662  965433  950138 1013368
## [177] 1065172 1073608 1126187 1174226 1122767 1138006 1132063 1135001
## [185] 1139198 1201561 1263487 1216613 1439936 1274099 1193751 1129309
## [193] 1194806 1215360 1136545 1180475 1213463 1234914 1350754 1410599
## [201] 2215635 1978298 1768547 1672310 1456865 1449475 1491856 1472283
## [209] 1372064 1436930 1401101 1376261 1401613 1260889 1358198 1514850
## [217] 1664532 1748482 1613290 1547275 1579180 1595381 1471066 1538218
## [225] 1597072 1467105 1398411 1455865 1664993 1451662 1561636 1526293
## [233] 1521937 1613314 1633250 1611917 1746487 1933836 1997426 1968933
## [241] 1575150 1625743 1605638 1565246 1383135 1411509 1414403 2043191
## [249] 2120504 1636371 1524758 1643197 1797452 1693548 1914091 1740283
## [257] 1736766 1576075 1570576 1632385 1531384 1552463 1538022 1568570
## [265] 1563765 1647921 1714686 1545015 1629682 1638858 1689618 1594829
## [273] 1595492 1743327 1618757 1651877 1644925 1735297 1594113 1519222
## [281] 1419551 1402060 1523682 1536577 1488831 1320252 1484997 1682487
## [289] 1644421 1516058 1565907 1631832 1548662 1623896 1976426 1883772
## [297] 1754834 1723123 1757045 1646646

valid.ts <- window(msiteVisits.ts, start = c (2016,nTrain+153), end = c(2016,totalRecords+152))
valid.ts

## Time Series:
## Start = c(2017, 88) 
## End = c(2017, 117) 
## Frequency = 365 
##  [1] 1661924 1715717 1542527 1604040 1980687 1917899 1708403 1636475
##  [9] 2441091 2147307 1960746 2015527 1974848 1717667 1703160 1696207
## [17] 1725137 1899645 1731911 1774245 1732232 1678414 1683333 1619003
## [25] 1635254 1605765 1616746 1614752 1602553 1589401

### Fist lets do the seasonal naive forecast
naive.pred <- naive(train.ts, h = 30)
naive.pred

##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## 2017.238        1646646 1443349 1849943 1335730 1957562
## 2017.241        1646646 1443349 1849943 1335730 1957562
## 2017.244        1646646 1443349 1849943 1335730 1957562
## 2017.247        1646646 1443349 1849943 1335730 1957562
## 2017.249        1646646 1443349 1849943 1335730 1957562
## 2017.252        1646646 1443349 1849943 1335730 1957562
## 2017.255        1646646 1443349 1849943 1335730 1957562
## 2017.258        1646646 1443349 1849943 1335730 1957562
## 2017.260        1646646 1443349 1849943 1335730 1957562
## 2017.263        1646646 1443349 1849943 1335730 1957562
## 2017.266        1646646 1443349 1849943 1335730 1957562
## 2017.268        1646646 1443349 1849943 1335730 1957562
## 2017.271        1646646 1443349 1849943 1335730 1957562
## 2017.274        1646646 1443349 1849943 1335730 1957562
## 2017.277        1646646 1443349 1849943 1335730 1957562
## 2017.279        1646646 1443349 1849943 1335730 1957562
## 2017.282        1646646 1443349 1849943 1335730 1957562
## 2017.285        1646646 1443349 1849943 1335730 1957562
## 2017.288        1646646 1443349 1849943 1335730 1957562
## 2017.290        1646646 1443349 1849943 1335730 1957562
## 2017.293        1646646 1443349 1849943 1335730 1957562
## 2017.296        1646646 1443349 1849943 1335730 1957562
## 2017.299        1646646 1443349 1849943 1335730 1957562
## 2017.301        1646646 1443349 1849943 1335730 1957562
## 2017.304        1646646 1443349 1849943 1335730 1957562
## 2017.307        1646646 1443349 1849943 1335730 1957562
## 2017.310        1646646 1443349 1849943 1335730 1957562
## 2017.312        1646646 1443349 1849943 1335730 1957562
## 2017.315        1646646 1443349 1849943 1335730 1957562
## 2017.318        1646646 1443349 1849943 1335730 1957562

valid.ts

## Time Series:
## Start = c(2017, 88) 
## End = c(2017, 117) 
## Frequency = 365 
##  [1] 1661924 1715717 1542527 1604040 1980687 1917899 1708403 1636475
##  [9] 2441091 2147307 1960746 2015527 1974848 1717667 1703160 1696207
## [17] 1725137 1899645 1731911 1774245 1732232 1678414 1683333 1619003
## [25] 1635254 1605765 1616746 1614752 1602553 1589401

plot(train.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'blue') 
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)

##                      ME     RMSE       MAE         MPE     MAPE MASE
## Training set   3370.308 158633.6  85010.49 -0.09560444 6.263054  NaN
## Test set     117774.533 227653.6 144437.47  5.68615230 7.363165  NaN
##                    ACF1 Theil's U
## Training set 0.08249566        NA
## Test set     0.48014505  1.103587

# ############ Linear Trend Model
#tslm function (which stands for time series linear model)
#op <- par(oma=c(4,6,1,1))
#par(op) 
train.lm <- tslm(train.ts ~ trend) 
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2) 
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)

## 
## Call:
## tslm(formula = train.ts ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -402593 -142882  -54449   42101 2176528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 683911.5    33139.4   20.64   <2e-16 ***
## trend         3632.6      190.9   19.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 286300 on 298 degrees of freedom
## Multiple R-squared:  0.5487, Adjusted R-squared:  0.5472 
## F-statistic: 362.3 on 1 and 298 DF,  p-value: < 2.2e-16

accuracy(train.lm.pred,valid.ts)

##                         ME     RMSE      MAE       MPE     MAPE MASE
## Training set  7.775517e-12 285322.6 165621.8 -3.719312 12.63214  NaN
## Test set     -6.558669e+04 218081.3 188453.2 -4.878744 10.60818  NaN
##                   ACF1 Theil's U
## Training set 0.8456486        NA
## Test set     0.5384365  1.120771

############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2, main = "Exponential Trend Moedl")
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.expo.trend)

## 
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30670 -0.11450 -0.04445  0.05212  1.13103 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.348e+01  2.189e-02  616.01   <2e-16 ***
## trend       3.200e-03  1.261e-04   25.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1891 on 298 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 9.26e+14 on 1 and 298 DF,  p-value: < 2.2e-16

accuracy(train.lm.expo.trend.pred, valid.ts)

##                     ME     RMSE      MAE        MPE     MAPE MASE
## Training set   22495.4 294990.0 164554.4  -1.550321 11.95760  NaN
## Test set     -208057.5 303040.2 268127.8 -13.086905 15.73866  NaN
##                   ACF1 Theil's U
## Training set 0.8536558        NA
## Test set     0.5822136   1.58165

################## Quadratic or polynomial trend model
train.lm.poly.trend <- tslm(train.ts ~ trend + I(trend^2))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2, main = "Polynomial Trend Moedl")
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)

summary(train.lm.poly.trend)

## 
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -409587 -151156  -36232   65401 2137018 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 595854.428  49532.238  12.030  < 2e-16 ***
## trend         5382.112    759.908   7.083 1.03e-11 ***
## I(trend^2)      -5.812      2.445  -2.377   0.0181 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 284100 on 297 degrees of freedom
## Multiple R-squared:  0.5571, Adjusted R-squared:  0.5541 
## F-statistic: 186.8 on 2 and 297 DF,  p-value: < 2.2e-16

accuracy(train.lm.poly.trend.pred, valid.ts)

##                        ME     RMSE      MAE       MPE      MAPE MASE
## Training set 8.126288e-12 282646.2 167232.4 -3.465474 12.970419  NaN
## Test set     4.949529e+04 206388.5 137139.9  1.746836  7.188252  NaN
##                   ACF1 Theil's U
## Training set 0.8430400        NA
## Test set     0.5065894  1.018332

### check seasoanlity in this daily time series
fit <- tbats(msiteVisits.ts)
fit

## BATS(0, {0,0}, 0.8, -)
## 
## Call: tbats(y = msiteVisits.ts)
## 
## Parameters
##   Lambda: 8.1e-05
##   Alpha: 1.146764
##   Beta: -0.1869698
##   Damping Parameter: 0.800002
## 
## Seed States:
##              [,1]
## [1,] 1.335957e+01
## [2,] 4.394361e-04
## 
## Sigma: 0.09159712
## AIC: 9589.874

seasonal <- !is.null(fit$seasonal)
# this shows there is no seasonlity present
seasonal

## [1] FALSE

### on the basis of MAPE, we will use quadratic trend model to do forecasting while using model driven approach 
Visits.lm.tns <- tslm(msiteVisits.ts ~ trend + I(trend^2))
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 30, level = 0)
plot(Visits.lm.tns.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = msiteVisits.ts ~ trend + I(trend^2))
## 
## Coefficients:
## (Intercept)        trend   I(trend^2)  
##  600512.169     5245.582       -5.197  
## 
## 
## Error measures:
##                         ME     RMSE      MAE       MPE     MAPE MASE
## Training set -1.095114e-11 276428.2 164764.7 -3.257002 12.43757  NaN
##                   ACF1
## Training set 0.8275723
## 
## Forecasts:
##          Point Forecast    Lo 0    Hi 0
## 2017.321        1767421 1767421 1767421
## 2017.323        1769221 1769221 1769221
## 2017.326        1771011 1771011 1771011
## 2017.329        1772790 1772790 1772790
## 2017.332        1774559 1774559 1774559
## 2017.334        1776317 1776317 1776317
## 2017.337        1778066 1778066 1778066
## 2017.340        1779803 1779803 1779803
## 2017.342        1781530 1781530 1781530
## 2017.345        1783247 1783247 1783247
## 2017.348        1784954 1784954 1784954
## 2017.351        1786650 1786650 1786650
## 2017.353        1788336 1788336 1788336
## 2017.356        1790011 1790011 1790011
## 2017.359        1791676 1791676 1791676
## 2017.362        1793330 1793330 1793330
## 2017.364        1794975 1794975 1794975
## 2017.367        1796608 1796608 1796608
## 2017.370        1798232 1798232 1798232
## 2017.373        1799845 1799845 1799845
## 2017.375        1801447 1801447 1801447
## 2017.378        1803039 1803039 1803039
## 2017.381        1804621 1804621 1804621
## 2017.384        1806192 1806192 1806192
## 2017.386        1807753 1807753 1807753
## 2017.389        1809304 1809304 1809304
## 2017.392        1810844 1810844 1810844
## 2017.395        1812374 1812374 1812374
## 2017.397        1813893 1813893 1813893
## 2017.400        1815402 1815402 1815402

names(Visits.lm.tns.pred)

##  [1] "model"     "mean"      "lower"     "upper"     "level"    
##  [6] "x"         "method"    "newdata"   "residuals" "fitted"

round(Visits.lm.tns.pred$mean,0)

## Time Series:
## Start = c(2017, 118) 
## End = c(2017, 147) 
## Frequency = 365 
##       1       2       3       4       5       6       7       8       9 
## 1767421 1769221 1771011 1772790 1774559 1776317 1778066 1779803 1781530 
##      10      11      12      13      14      15      16      17      18 
## 1783247 1784954 1786650 1788336 1790011 1791676 1793330 1794975 1796608 
##      19      20      21      22      23      24      25      26      27 
## 1798232 1799845 1801447 1803039 1804621 1806192 1807753 1809304 1810844 
##      28      29      30 
## 1812374 1813893 1815402

accuracy(Visits.lm.tns.pred)

##                         ME     RMSE      MAE       MPE     MAPE MASE
## Training set -1.095114e-11 276428.2 164764.7 -3.257002 12.43757  NaN
##                   ACF1
## Training set 0.8275723

# Calcualting Residuals of training period
train.lm.poly.trend <- tslm(train.ts ~ trend + I(trend^2))
res <- residuals(train.lm.poly.trend)
par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")
Acf(res, lag.max = 30, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")

# Check Corelation , we chose Trend model becuase of low MAPE and Run AR model
train.res.arima <- Arima(train.lm.poly.trend$residuals, order = c(2,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid) 
plot(train.lm.poly.trend$residuals, ylab = "Residuals", xlab = "Time", bty = "l", xaxt = "n", main ="") 
#axis(1, at = seq(20015, 2017, 1),labels = format(seq(2015, 2016, 1))) 
lines(train.res.arima.pred$fitted, lwd = 2, col = "blue")

summary(train.res.arima)

## Series: train.lm.poly.trend$residuals 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2   intercept
##       0.9873  -0.1736   -268.5008
## s.e.  0.0567   0.0567  45874.8214
## 
## sigma^2 estimated as 2.263e+10:  log likelihood=-4001.17
## AIC=8010.34   AICc=8010.47   BIC=8025.15
## 
## Training set error measures:
##                     ME     RMSE      MAE     MPE     MAPE MASE       ACF1
## Training set -141.1313 149662.7 80801.86 -27.242 184.0771  NaN 0.01967882

# ACF plot of residuals of residuals
Acf(train.res.arima$residuals, lag.max = 30, main="ACF of residuals of residuals")
## After visualization of Residual-Residuals, it looks like we lefy with only noise
# now do adjusted forecast
train.res.arima.pred$mean

## Time Series:
## Start = c(2017, 88) 
## End = c(2017, 117) 
## Frequency = 365 
##  [1] -52699.7126 -45004.7502 -35331.2159 -27116.9249 -20686.8938
##  [6] -15765.0356 -12022.3361  -9181.9075  -7027.5159  -5393.7589
## [11]  -4154.8859  -3215.4677  -2503.1249  -1962.9698  -1553.3814
## [16]  -1242.7990  -1007.2908   -828.7099   -693.2957   -590.6139
## [21]   -512.7524   -453.7117   -408.9424   -374.9947   -349.2529
## [26]   -329.7334   -314.9322   -303.7088   -295.1983   -288.7449

train.lm.poly.trend$mean

## NULL

modifedForecastValid <- train.res.arima.pred$mean + train.lm.poly.trend.pred$mean
accuracy(modifedForecastValid,valid.ts)

##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 57882.45 208753.6 138533.9 2.234121 7.239291 0.5090777  1.029331

accuracy(train.lm.poly.trend)

##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 8.126288e-12 282646.2 167232.4 -3.465474 12.97042 0.4708253

# above results shows improve MAPe from 12.97 to 7.24

################# Now use Smoothing Methods ###########################
# SMA
ma.trailing <- rollmean(train.ts, k = 30, align = "right")
last.ma <- tail(ma.trailing, 1) 
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2016,nTrain+153), end = c(2016,totalRecords+152), freq = 365) 
plot(train.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black') 
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2) 
lines(valid.ts)

summary(ma.trailing.pred)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 1617000 1617000 1617000 1617000 1617000 1617000

accuracy(ma.trailing.pred, valid.ts)

##                ME     RMSE      MAE      MPE     MAPE     ACF1 Theil's U
## Test set 147136.7 244140.4 156816.9 7.367908 7.984772 0.480145  1.179378

# using twice differencing

#Simple exponential smoothing
#When both trend and seasonality exist, we can apply differencing twice to the series in order to 
#de-trend and deseasonalize it. We performed double differencing on the Amtrak data, 
#which contain both trend and seasonality. 
#The result is a series with no trend and no monthly seasonality.
diff.twice.ts <- diff(diff(msiteVisits.ts, lag = 7), lag = 1) 
totalRecords <- length(msiteVisits.ts)
nValid <- 30
nTrain <- totalRecords - nValid
train.ts <- window(diff.twice.ts,   end = c(2016,nTrain+152))
train.ts

## Time Series:
## Start = c(2016, 161) 
## End = c(2017, 87) 
## Frequency = 365 
##   [1]    36764    -4177    47367   -11572   -38330    13875    -3334
##   [8]   -16463    17513   -19439     1504    13022   -33218    20904
##  [15]    76408   105925    -7003  -115672    18371   -22457   -35928
##  [22]   -44892  -104934   -14491   119677   111970   -96584    30661
##  [29]   -64758   -47079    14069   -43977  -170088   214081   -31029
##  [36]    26846    27149   -22364    72190    84237   -48357   -30786
##  [43]   -15537    -5528    25949   -13399   -93775   -27674    89824
##  [50]    44902    18901    54422     4308      665   -36927    -6710
##  [57]    50541  -111068   -79643   -40581    69184   102970   178760
##  [64]  -150169    81627   -60641    15233    41658  -232627  -110100
##  [71]    13283   -27854    81871    25088  -102484   199454   -44710
##  [78]    66010     5404    24332    28601   -15480   -37306     9352
##  [85]   -66698    59292   -68467   -15182    51736   -27635     8666
##  [92]    37118   -15253    36180    21430   -63595    88435  -102742
##  [99]    29733    22593   -21194    16009    42803    85949   -47147
## [106]   -93594    32642    36820   -27474    15218   -85311   144199
## [113]   -21785    90936   383979  1606526   -76419  -612958  -278289
## [120]    54628  -721283  -652082 -1678072   -23516   524814   291637
## [127]   -43087   593834   217914    48729   364476    85074   -47208
## [134]  -143241   -64513    89586   -34934  -259077   336934  -226538
## [141]   106190   161229  -201932  -272362    45755  -463842   200792
## [148]   162961   -99433   110669   260636   -61645   -14550   -17484
## [155]   -78960   -39068    47545     7296    79958    23805    78962
## [162]    21558    21812   -22747    40503   -95867    33463    14556
## [169]    81419    18796    15100      834    40770    30534   -69173
## [176]   -48866    -4239     9784    13887     4585   208084  -159894
## [183]   -83286   -68639     3134   -41372   -31941  -179393   198825
## [190]   101799   180282    -5652   784482  -158522  -253681  -129225
## [197]  -236896  -123230   -17464  -824609   137118   274617    60408
## [204]   190605    32742  -183105   116882   256871    84816   119779
## [211]  -110352   -91367   172629   -81108  -280967   -82530   -25096
## [218]     5225    -2679    25549   192927   -89016    42822   -94197
## [225]   125611   160071   -37518  -230461   347901    77375    98933
## [232]   -24137  -485160    30657     1228  -174962  -369460   -35216
## [239]    31387  1022571    26720  -464028   -71221   300550   125881
## [246]  -106798  -408245  -251121   480616   -49078  -123938   -92446
## [253]     2903  -199464   159367    34065   155886    89655     4956
## [260]   -68670    63588    23617    20212   -89984   -83493    81070
## [267]    45101   -51547   -16128    39612   -46395   -75554  -247506
## [274]   107079    88502    19847  -138118   -27395   239636   297161
## [281]   -20575  -249985    36954   113671    85409   -89511   155040
## [288]   -54588     -575   -81560   -32003   -27229

valid.ts <- window(diff.twice.ts, start = c (2016,nTrain+153), end = c(2016,totalRecords+152))
valid.ts

## Time Series:
## Start = c(2017, 88) 
## End = c(2017, 117) 
## Frequency = 365 
##  [1]  -59956 -298737  -80536  190451  408358  -96710  -99097  -87206
##  [9]  750823 -120594 -248074 -321866   22109  -47685   57421 -811569
## [17]  322714  361069 -222515   83013  215168  -39311   11872  -93260
## [25] -158257  138245  -31353   40019   41619  -18071

#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over 
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood 
#is equivalent to minimizing the RMSE in the training period 
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2) 
ses.pred <- forecast(ses, h = nValid, level = 0) 
ses.pred

##          Point Forecast      Lo 0      Hi 0
## 2017.238      -11940.84 -11940.84 -11940.84
## 2017.241      -11940.84 -11940.84 -11940.84
## 2017.244      -11940.84 -11940.84 -11940.84
## 2017.247      -11940.84 -11940.84 -11940.84
## 2017.249      -11940.84 -11940.84 -11940.84
## 2017.252      -11940.84 -11940.84 -11940.84
## 2017.255      -11940.84 -11940.84 -11940.84
## 2017.258      -11940.84 -11940.84 -11940.84
## 2017.260      -11940.84 -11940.84 -11940.84
## 2017.263      -11940.84 -11940.84 -11940.84
## 2017.266      -11940.84 -11940.84 -11940.84
## 2017.268      -11940.84 -11940.84 -11940.84
## 2017.271      -11940.84 -11940.84 -11940.84
## 2017.274      -11940.84 -11940.84 -11940.84
## 2017.277      -11940.84 -11940.84 -11940.84
## 2017.279      -11940.84 -11940.84 -11940.84
## 2017.282      -11940.84 -11940.84 -11940.84
## 2017.285      -11940.84 -11940.84 -11940.84
## 2017.288      -11940.84 -11940.84 -11940.84
## 2017.290      -11940.84 -11940.84 -11940.84
## 2017.293      -11940.84 -11940.84 -11940.84
## 2017.296      -11940.84 -11940.84 -11940.84
## 2017.299      -11940.84 -11940.84 -11940.84
## 2017.301      -11940.84 -11940.84 -11940.84
## 2017.304      -11940.84 -11940.84 -11940.84
## 2017.307      -11940.84 -11940.84 -11940.84
## 2017.310      -11940.84 -11940.84 -11940.84
## 2017.312      -11940.84 -11940.84 -11940.84
## 2017.315      -11940.84 -11940.84 -11940.84
## 2017.318      -11940.84 -11940.84 -11940.84

plot(ses.pred, ylab = "Ridership (Twice-Differenced)", xlab = "Time", bty = "l", xaxt = "n",  main ="", flty = 2) 
axis(1, at = seq(1991, 2006, 1), labels = format(seq(1991, 2006, 1))) 
lines(ses.pred$fitted, lwd = 2, col = "blue") 
lines(valid.ts)
accuracy(ses.pred,valid.ts)

##                     ME     RMSE      MAE       MPE      MAPE MASE
## Training set -360.3507 237417.2 133355.9 112.95622 188.71335  NaN
## Test set     5543.6413 266667.4 180997.2  99.94186  99.94186  NaN
##                    ACF1 Theil's U
## Training set  0.1130451        NA
## Test set     -0.1264115 0.8966885

## Automatic selection of model
ses.opt <- ets(train.ts, model = "ZZZ")
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0) 
accuracy(ses.pred, valid.ts)

##                     ME     RMSE      MAE       MPE      MAPE MASE
## Training set -360.3507 237417.2 133355.9 112.95622 188.71335  NaN
## Test set     5543.6413 266667.4 180997.2  99.94186  99.94186  NaN
##                    ACF1 Theil's U
## Training set  0.1130451        NA
## Test set     -0.1264115 0.8966885

accuracy(ses.opt.pred, valid.ts)

##                     ME     RMSE      MAE       MPE     MAPE MASE
## Training set -2087.820 222421.3 117578.1  97.51904 101.7140  NaN
## Test set     -8777.889 266754.3 182906.7 100.01159 100.0116  NaN
##                    ACF1 Theil's U
## Training set  0.1685516        NA
## Test set     -0.1264115 0.9237874

ses.opt

## ETS(A,N,N) 
## 
## Call:
##  ets(y = train.ts, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 2441.7086 
## 
##   sigma:  222421.3
## 
##      AIC     AICc      BIC 
## 8854.012 8854.095 8865.042

### This model is used by Hyndsight for daily forecast
# we dont have full year series so we will sstick to ets for forecasting
mvisits.ts <- ts(eCommerceTrafficData$Msite,frequency = 7)
mvisits.ts.ets <- ets(mvisits.ts)
mvisits.ts.ets.pred <- forecast(mvisits.ts.ets,h=30)
mvisits.ts.ets.pred

##          Point Forecast      Lo 80   Hi 80        Lo 95   Hi 95
## 48.14286        1517792 1325975.72 1709608  1224434.301 1811150
## 48.28571        1500512 1219262.15 1781762  1070377.554 1930646
## 48.42857        1536651 1167740.17 1905562   972450.441 2100852
## 48.57143        1613982 1147299.37 2080664   900252.686 2327711
## 48.71429        1501514  996434.43 2006594   729061.372 2273967
## 48.85714        1514621  935180.98 2094061   628444.090 2400797
## 49.00000        1533390  876906.63 2189874   529385.068 2537396
## 49.14286        1467175  772739.73 2161611   405127.759 2529223
## 49.28571        1453172  700037.59 2206305   301352.571 2604990
## 49.42857        1490801  651345.23 2330256   206964.265 2774637
## 49.57143        1568450  615080.78 2521819   110397.668 3026502
## 49.71429        1461476  507765.84 2415186     2902.133 2920050
## 49.85714        1476456  446928.51 2505984   -98070.537 3050983
## 50.00000        1496889  386113.73 2607664  -201895.095 3195673
## 50.14286        1434189  305670.72 2562707  -291730.723 3160109
## 50.28571        1422320  239315.89 2605325  -386928.871 3231570
## 50.42857        1460921  180134.16 2741707  -497873.328 3419715
## 50.57143        1538777  120409.03 2957146  -630429.872 3707985
## 50.71429        1435384   47491.97 2823276  -687213.662 3557982
## 50.85714        1451585  -17713.37 2920884  -795513.001 3698683
## 51.00000        1473101  -84900.29 3031103  -909656.611 3855860
## 51.14286        1412692 -145826.51 2971211  -970856.601 3796241
## 51.28571        1402216 -208913.19 3013344 -1061793.215 3866224
## 51.42857        1441449 -280997.13 3163895 -1192804.923 4075702
## 51.57143        1519441 -366340.22 3405222 -1364612.443 4403494
## 51.71429        1418381 -407769.77 3244531 -1374475.365 4211236
## 51.85714        1435377 -479591.75 3350347 -1493315.225 4364070
## 52.00000        1457600 -555373.58 3470574 -1620977.521 4536178
## 52.14286        1398684 -598920.84 3396289 -1656389.006 4453757
## 52.28571        1389114 -660782.72 3439011 -1745932.531 4524161

plot(mvisits.ts.ets.pred, xlab = "Daily", ylab = "Daily msite visits", main = "ETS")

accuracy(mvisits.ts.ets.pred)

##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 807.8417 159309.2 82445.43 -0.1713742 5.721994 0.4411183
##                    ACF1
## Training set 0.07174062

Acf(mvisits.ts.ets.pred$residuals, lag.max = 30, main="ACF of residuals")
mvisits.ts.ets.pred$mean

## Time Series:
## Start = c(48, 2) 
## End = c(52, 3) 
## Frequency = 7 
##  [1] 1517792 1500512 1536651 1613982 1501514 1514621 1533390 1467175
##  [9] 1453172 1490801 1568450 1461476 1476456 1496889 1434189 1422320
## [17] 1460921 1538777 1435384 1451585 1473101 1412692 1402216 1441449
## [25] 1519441 1418381 1435377 1457600 1398684 1389114

# next 30 days forecast

ISB CBA 7 Forecasting Analytics Project | 29-Apr-2017

Team 24 Anurag Singhvi (71620007) Debanjan Paul (71620021) Soubhagya Rout (71620070) Vineet Garg (71620090

April 28, 2017

R Markdown : Looks into all methods employed to forecast by mobile website visits in next 31 days. Essence of this excercise captured in our project final report.