R Markdown : Looks into all methods employed to forecast by mobile website visits in next 31 days. Essence of this excercise captured in our project final report.
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load("moments","extRemes","stringi", "ggplot2", "TTR", "forecast", "zoo", "rts", "xts")
setwd("D:\\Google Drive\\FA\\_FAProject")
##### Step 1 Load Organised Data
eCommerceTrafficData <- read.csv("eCommerceTrafficDataMSite.csv")
##### Visualise data
#par(mfrow=c(2, 2))
inds <- seq(as.Date("2016-06-01"), as.Date("2017-04-26"), by = "day")
msiteVisits.ts <- ts(eCommerceTrafficData$Msite, start = c(2016, as.numeric(format(inds[1], "%j"))),
frequency = 365)
## Create a time series object
#set.seed(25)
#myts <- ts(rnorm(length(inds)), start = c(2014, as.numeric(format(inds[1], "%j"))), frequency = 365)
plot(msiteVisits.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'blue')

## Partition of data of msiteVisits.ts
totalRecords <- length(msiteVisits.ts)
nValid <- 30
nTrain <- totalRecords - nValid
train.ts <- window(msiteVisits.ts, end = c(2016,nTrain+152))
train.ts
## Time Series:
## Start = c(2016, 153)
## End = c(2017, 87)
## Frequency = 365
## [1] 638924 621551 612524 649303 682003 650997 666294 643216
## [9] 662607 649403 733549 754677 685341 714513 688101 691029
## [17] 695338 760045 782677 726363 722317 716809 796145 906379
## [25] 964083 871043 833100 806597 765161 799605 804905 848118
## [33] 874755 948782 825695 814920 784606 742827 800109 782769
## [41] 686708 777702 735898 732430 717800 752718 807568 795744
## [49] 838381 765791 746786 726628 787495 828946 723347 738310
## [57] 755544 781441 780184 895473 941232 836298 814334 824858
## [65] 901296 788971 824617 829795 794045 875051 1064335 990604
## [73] 959906 934911 955322 961230 809609 888793 828345 769793
## [81] 826669 872168 775592 823425 857899 863461 810313 891521
## [89] 965621 853565 864092 907918 846782 852926 865667 924585
## [97] 864265 847157 899649 875631 866522 915443 995791 871876
## [105] 943203 892953 898668 912152 939879 1036236 955124 1112400
## [113] 1015003 927124 973250 1037797 1106680 1040786 1112751 1159553
## [121] 1049889 1186951 1635477 3310886 3168573 2627580 2396093 2341057
## [129] 1756836 1553280 1550617 1384788 1368609 1428759 1330636 1340249
## [137] 1354607 1400673 1599320 1668215 1681157 1439793 1384893 1488837
## [145] 1499969 1439539 1845368 1631772 1496598 1602927 1504939 1243709
## [153] 1229034 1171021 1158217 1186004 1192900 1205581 1204987 1128667
## [161] 1056104 1025816 974643 942471 1002697 1009399 1013037 964279
## [169] 1012953 983338 972978 1010457 1057662 965433 950138 1013368
## [177] 1065172 1073608 1126187 1174226 1122767 1138006 1132063 1135001
## [185] 1139198 1201561 1263487 1216613 1439936 1274099 1193751 1129309
## [193] 1194806 1215360 1136545 1180475 1213463 1234914 1350754 1410599
## [201] 2215635 1978298 1768547 1672310 1456865 1449475 1491856 1472283
## [209] 1372064 1436930 1401101 1376261 1401613 1260889 1358198 1514850
## [217] 1664532 1748482 1613290 1547275 1579180 1595381 1471066 1538218
## [225] 1597072 1467105 1398411 1455865 1664993 1451662 1561636 1526293
## [233] 1521937 1613314 1633250 1611917 1746487 1933836 1997426 1968933
## [241] 1575150 1625743 1605638 1565246 1383135 1411509 1414403 2043191
## [249] 2120504 1636371 1524758 1643197 1797452 1693548 1914091 1740283
## [257] 1736766 1576075 1570576 1632385 1531384 1552463 1538022 1568570
## [265] 1563765 1647921 1714686 1545015 1629682 1638858 1689618 1594829
## [273] 1595492 1743327 1618757 1651877 1644925 1735297 1594113 1519222
## [281] 1419551 1402060 1523682 1536577 1488831 1320252 1484997 1682487
## [289] 1644421 1516058 1565907 1631832 1548662 1623896 1976426 1883772
## [297] 1754834 1723123 1757045 1646646
valid.ts <- window(msiteVisits.ts, start = c (2016,nTrain+153), end = c(2016,totalRecords+152))
valid.ts
## Time Series:
## Start = c(2017, 88)
## End = c(2017, 117)
## Frequency = 365
## [1] 1661924 1715717 1542527 1604040 1980687 1917899 1708403 1636475
## [9] 2441091 2147307 1960746 2015527 1974848 1717667 1703160 1696207
## [17] 1725137 1899645 1731911 1774245 1732232 1678414 1683333 1619003
## [25] 1635254 1605765 1616746 1614752 1602553 1589401
### Fist lets do the seasonal naive forecast
naive.pred <- naive(train.ts, h = 30)
naive.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2017.238 1646646 1443349 1849943 1335730 1957562
## 2017.241 1646646 1443349 1849943 1335730 1957562
## 2017.244 1646646 1443349 1849943 1335730 1957562
## 2017.247 1646646 1443349 1849943 1335730 1957562
## 2017.249 1646646 1443349 1849943 1335730 1957562
## 2017.252 1646646 1443349 1849943 1335730 1957562
## 2017.255 1646646 1443349 1849943 1335730 1957562
## 2017.258 1646646 1443349 1849943 1335730 1957562
## 2017.260 1646646 1443349 1849943 1335730 1957562
## 2017.263 1646646 1443349 1849943 1335730 1957562
## 2017.266 1646646 1443349 1849943 1335730 1957562
## 2017.268 1646646 1443349 1849943 1335730 1957562
## 2017.271 1646646 1443349 1849943 1335730 1957562
## 2017.274 1646646 1443349 1849943 1335730 1957562
## 2017.277 1646646 1443349 1849943 1335730 1957562
## 2017.279 1646646 1443349 1849943 1335730 1957562
## 2017.282 1646646 1443349 1849943 1335730 1957562
## 2017.285 1646646 1443349 1849943 1335730 1957562
## 2017.288 1646646 1443349 1849943 1335730 1957562
## 2017.290 1646646 1443349 1849943 1335730 1957562
## 2017.293 1646646 1443349 1849943 1335730 1957562
## 2017.296 1646646 1443349 1849943 1335730 1957562
## 2017.299 1646646 1443349 1849943 1335730 1957562
## 2017.301 1646646 1443349 1849943 1335730 1957562
## 2017.304 1646646 1443349 1849943 1335730 1957562
## 2017.307 1646646 1443349 1849943 1335730 1957562
## 2017.310 1646646 1443349 1849943 1335730 1957562
## 2017.312 1646646 1443349 1849943 1335730 1957562
## 2017.315 1646646 1443349 1849943 1335730 1957562
## 2017.318 1646646 1443349 1849943 1335730 1957562
valid.ts
## Time Series:
## Start = c(2017, 88)
## End = c(2017, 117)
## Frequency = 365
## [1] 1661924 1715717 1542527 1604040 1980687 1917899 1708403 1636475
## [9] 2441091 2147307 1960746 2015527 1974848 1717667 1703160 1696207
## [17] 1725137 1899645 1731911 1774245 1732232 1678414 1683333 1619003
## [25] 1635254 1605765 1616746 1614752 1602553 1589401
plot(train.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'blue')
lines(naive.pred$fitted, lwd = 2, col = "green")
lines(valid.ts, lwd = 2, col = "blue")

accuracy(naive.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 3370.308 158633.6 85010.49 -0.09560444 6.263054 NaN
## Test set 117774.533 227653.6 144437.47 5.68615230 7.363165 NaN
## ACF1 Theil's U
## Training set 0.08249566 NA
## Test set 0.48014505 1.103587
# ############ Linear Trend Model
#tslm function (which stands for time series linear model)
#op <- par(oma=c(4,6,1,1))
#par(op)
train.lm <- tslm(train.ts ~ trend)
# now train a model and plot it
train.lm.pred <- forecast(train.lm, h = nValid, level = 0)
plot(train.lm.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2)
lines(train.lm.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm)
##
## Call:
## tslm(formula = train.ts ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -402593 -142882 -54449 42101 2176528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 683911.5 33139.4 20.64 <2e-16 ***
## trend 3632.6 190.9 19.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 286300 on 298 degrees of freedom
## Multiple R-squared: 0.5487, Adjusted R-squared: 0.5472
## F-statistic: 362.3 on 1 and 298 DF, p-value: < 2.2e-16
accuracy(train.lm.pred,valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.775517e-12 285322.6 165621.8 -3.719312 12.63214 NaN
## Test set -6.558669e+04 218081.3 188453.2 -4.878744 10.60818 NaN
## ACF1 Theil's U
## Training set 0.8456486 NA
## Test set 0.5384365 1.120771
############# Exponential Trend Moedl #####
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda = 0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h = nValid, level = 0)
plot(train.lm.expo.trend.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2, main = "Exponential Trend Moedl")
lines(train.lm.expo.trend.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.expo.trend)
##
## Call:
## tslm(formula = train.ts ~ trend, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30670 -0.11450 -0.04445 0.05212 1.13103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.348e+01 2.189e-02 616.01 <2e-16 ***
## trend 3.200e-03 1.261e-04 25.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1891 on 298 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 9.26e+14 on 1 and 298 DF, p-value: < 2.2e-16
accuracy(train.lm.expo.trend.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 22495.4 294990.0 164554.4 -1.550321 11.95760 NaN
## Test set -208057.5 303040.2 268127.8 -13.086905 15.73866 NaN
## ACF1 Theil's U
## Training set 0.8536558 NA
## Test set 0.5822136 1.58165
################## Quadratic or polynomial trend model
train.lm.poly.trend <- tslm(train.ts ~ trend + I(trend^2))
train.lm.poly.trend.pred <- forecast(train.lm.poly.trend, h = nValid, level = 0)
plot(train.lm.poly.trend.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2, main = "Polynomial Trend Moedl")
lines(train.lm.poly.trend.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)

summary(train.lm.poly.trend)
##
## Call:
## tslm(formula = train.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -409587 -151156 -36232 65401 2137018
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 595854.428 49532.238 12.030 < 2e-16 ***
## trend 5382.112 759.908 7.083 1.03e-11 ***
## I(trend^2) -5.812 2.445 -2.377 0.0181 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 284100 on 297 degrees of freedom
## Multiple R-squared: 0.5571, Adjusted R-squared: 0.5541
## F-statistic: 186.8 on 2 and 297 DF, p-value: < 2.2e-16
accuracy(train.lm.poly.trend.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 8.126288e-12 282646.2 167232.4 -3.465474 12.970419 NaN
## Test set 4.949529e+04 206388.5 137139.9 1.746836 7.188252 NaN
## ACF1 Theil's U
## Training set 0.8430400 NA
## Test set 0.5065894 1.018332
### check seasoanlity in this daily time series
fit <- tbats(msiteVisits.ts)
fit
## BATS(0, {0,0}, 0.8, -)
##
## Call: tbats(y = msiteVisits.ts)
##
## Parameters
## Lambda: 8.1e-05
## Alpha: 1.146764
## Beta: -0.1869698
## Damping Parameter: 0.800002
##
## Seed States:
## [,1]
## [1,] 1.335957e+01
## [2,] 4.394361e-04
##
## Sigma: 0.09159712
## AIC: 9589.874
seasonal <- !is.null(fit$seasonal)
# this shows there is no seasonlity present
seasonal
## [1] FALSE
### on the basis of MAPE, we will use quadratic trend model to do forecasting while using model driven approach
Visits.lm.tns <- tslm(msiteVisits.ts ~ trend + I(trend^2))
Visits.lm.tns.pred <- forecast(Visits.lm.tns, h = 30, level = 0)
plot(Visits.lm.tns.pred, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black',flty = 2)
lines(Visits.lm.tns.pred$fitted, lwd = 2, col = "blue")

summary(Visits.lm.tns.pred)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = msiteVisits.ts ~ trend + I(trend^2))
##
## Coefficients:
## (Intercept) trend I(trend^2)
## 600512.169 5245.582 -5.197
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.095114e-11 276428.2 164764.7 -3.257002 12.43757 NaN
## ACF1
## Training set 0.8275723
##
## Forecasts:
## Point Forecast Lo 0 Hi 0
## 2017.321 1767421 1767421 1767421
## 2017.323 1769221 1769221 1769221
## 2017.326 1771011 1771011 1771011
## 2017.329 1772790 1772790 1772790
## 2017.332 1774559 1774559 1774559
## 2017.334 1776317 1776317 1776317
## 2017.337 1778066 1778066 1778066
## 2017.340 1779803 1779803 1779803
## 2017.342 1781530 1781530 1781530
## 2017.345 1783247 1783247 1783247
## 2017.348 1784954 1784954 1784954
## 2017.351 1786650 1786650 1786650
## 2017.353 1788336 1788336 1788336
## 2017.356 1790011 1790011 1790011
## 2017.359 1791676 1791676 1791676
## 2017.362 1793330 1793330 1793330
## 2017.364 1794975 1794975 1794975
## 2017.367 1796608 1796608 1796608
## 2017.370 1798232 1798232 1798232
## 2017.373 1799845 1799845 1799845
## 2017.375 1801447 1801447 1801447
## 2017.378 1803039 1803039 1803039
## 2017.381 1804621 1804621 1804621
## 2017.384 1806192 1806192 1806192
## 2017.386 1807753 1807753 1807753
## 2017.389 1809304 1809304 1809304
## 2017.392 1810844 1810844 1810844
## 2017.395 1812374 1812374 1812374
## 2017.397 1813893 1813893 1813893
## 2017.400 1815402 1815402 1815402
names(Visits.lm.tns.pred)
## [1] "model" "mean" "lower" "upper" "level"
## [6] "x" "method" "newdata" "residuals" "fitted"
round(Visits.lm.tns.pred$mean,0)
## Time Series:
## Start = c(2017, 118)
## End = c(2017, 147)
## Frequency = 365
## 1 2 3 4 5 6 7 8 9
## 1767421 1769221 1771011 1772790 1774559 1776317 1778066 1779803 1781530
## 10 11 12 13 14 15 16 17 18
## 1783247 1784954 1786650 1788336 1790011 1791676 1793330 1794975 1796608
## 19 20 21 22 23 24 25 26 27
## 1798232 1799845 1801447 1803039 1804621 1806192 1807753 1809304 1810844
## 28 29 30
## 1812374 1813893 1815402
accuracy(Visits.lm.tns.pred)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.095114e-11 276428.2 164764.7 -3.257002 12.43757 NaN
## ACF1
## Training set 0.8275723
# Calcualting Residuals of training period
train.lm.poly.trend <- tslm(train.ts ~ trend + I(trend^2))
res <- residuals(train.lm.poly.trend)
par(mfrow=c(1,2))
plot(res, ylab="Residuals",xlab="Year")
Acf(res, lag.max = 30, main="ACF of residuals")

hist(res, ylab = "Frequency", xlab = "Forecast Error", bty = "l", main ="")
# Check Corelation , we chose Trend model becuase of low MAPE and Run AR model
train.res.arima <- Arima(train.lm.poly.trend$residuals, order = c(2,0,0))
train.res.arima.pred <- forecast(train.res.arima, h = nValid)
plot(train.lm.poly.trend$residuals, ylab = "Residuals", xlab = "Time", bty = "l", xaxt = "n", main ="")
#axis(1, at = seq(20015, 2017, 1),labels = format(seq(2015, 2016, 1)))
lines(train.res.arima.pred$fitted, lwd = 2, col = "blue")

summary(train.res.arima)
## Series: train.lm.poly.trend$residuals
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 intercept
## 0.9873 -0.1736 -268.5008
## s.e. 0.0567 0.0567 45874.8214
##
## sigma^2 estimated as 2.263e+10: log likelihood=-4001.17
## AIC=8010.34 AICc=8010.47 BIC=8025.15
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -141.1313 149662.7 80801.86 -27.242 184.0771 NaN 0.01967882
# ACF plot of residuals of residuals
Acf(train.res.arima$residuals, lag.max = 30, main="ACF of residuals of residuals")
## After visualization of Residual-Residuals, it looks like we lefy with only noise
# now do adjusted forecast
train.res.arima.pred$mean
## Time Series:
## Start = c(2017, 88)
## End = c(2017, 117)
## Frequency = 365
## [1] -52699.7126 -45004.7502 -35331.2159 -27116.9249 -20686.8938
## [6] -15765.0356 -12022.3361 -9181.9075 -7027.5159 -5393.7589
## [11] -4154.8859 -3215.4677 -2503.1249 -1962.9698 -1553.3814
## [16] -1242.7990 -1007.2908 -828.7099 -693.2957 -590.6139
## [21] -512.7524 -453.7117 -408.9424 -374.9947 -349.2529
## [26] -329.7334 -314.9322 -303.7088 -295.1983 -288.7449
train.lm.poly.trend$mean
## NULL
modifedForecastValid <- train.res.arima.pred$mean + train.lm.poly.trend.pred$mean
accuracy(modifedForecastValid,valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 57882.45 208753.6 138533.9 2.234121 7.239291 0.5090777 1.029331
accuracy(train.lm.poly.trend)
## ME RMSE MAE MPE MAPE MASE
## Training set 8.126288e-12 282646.2 167232.4 -3.465474 12.97042 0.4708253
# above results shows improve MAPe from 12.97 to 7.24
################# Now use Smoothing Methods ###########################
# SMA
ma.trailing <- rollmean(train.ts, k = 30, align = "right")
last.ma <- tail(ma.trailing, 1)
ma.trailing.pred <- ts(rep(last.ma, nValid), start = c (2016,nTrain+153), end = c(2016,totalRecords+152), freq = 365)
plot(train.ts, xlab = "Daily", ylab = "Mobile Web Visits", bty = "l", col = 'black')
lines(ma.trailing, lwd = 2, col = "blue")
lines(ma.trailing.pred, lwd = 2, col = "blue", lty = 2)
lines(valid.ts)

summary(ma.trailing.pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1617000 1617000 1617000 1617000 1617000 1617000
accuracy(ma.trailing.pred, valid.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 147136.7 244140.4 156816.9 7.367908 7.984772 0.480145 1.179378
# using twice differencing
#Simple exponential smoothing
#When both trend and seasonality exist, we can apply differencing twice to the series in order to
#de-trend and deseasonalize it. We performed double differencing on the Amtrak data,
#which contain both trend and seasonality.
#The result is a series with no trend and no monthly seasonality.
diff.twice.ts <- diff(diff(msiteVisits.ts, lag = 7), lag = 1)
totalRecords <- length(msiteVisits.ts)
nValid <- 30
nTrain <- totalRecords - nValid
train.ts <- window(diff.twice.ts, end = c(2016,nTrain+152))
train.ts
## Time Series:
## Start = c(2016, 161)
## End = c(2017, 87)
## Frequency = 365
## [1] 36764 -4177 47367 -11572 -38330 13875 -3334
## [8] -16463 17513 -19439 1504 13022 -33218 20904
## [15] 76408 105925 -7003 -115672 18371 -22457 -35928
## [22] -44892 -104934 -14491 119677 111970 -96584 30661
## [29] -64758 -47079 14069 -43977 -170088 214081 -31029
## [36] 26846 27149 -22364 72190 84237 -48357 -30786
## [43] -15537 -5528 25949 -13399 -93775 -27674 89824
## [50] 44902 18901 54422 4308 665 -36927 -6710
## [57] 50541 -111068 -79643 -40581 69184 102970 178760
## [64] -150169 81627 -60641 15233 41658 -232627 -110100
## [71] 13283 -27854 81871 25088 -102484 199454 -44710
## [78] 66010 5404 24332 28601 -15480 -37306 9352
## [85] -66698 59292 -68467 -15182 51736 -27635 8666
## [92] 37118 -15253 36180 21430 -63595 88435 -102742
## [99] 29733 22593 -21194 16009 42803 85949 -47147
## [106] -93594 32642 36820 -27474 15218 -85311 144199
## [113] -21785 90936 383979 1606526 -76419 -612958 -278289
## [120] 54628 -721283 -652082 -1678072 -23516 524814 291637
## [127] -43087 593834 217914 48729 364476 85074 -47208
## [134] -143241 -64513 89586 -34934 -259077 336934 -226538
## [141] 106190 161229 -201932 -272362 45755 -463842 200792
## [148] 162961 -99433 110669 260636 -61645 -14550 -17484
## [155] -78960 -39068 47545 7296 79958 23805 78962
## [162] 21558 21812 -22747 40503 -95867 33463 14556
## [169] 81419 18796 15100 834 40770 30534 -69173
## [176] -48866 -4239 9784 13887 4585 208084 -159894
## [183] -83286 -68639 3134 -41372 -31941 -179393 198825
## [190] 101799 180282 -5652 784482 -158522 -253681 -129225
## [197] -236896 -123230 -17464 -824609 137118 274617 60408
## [204] 190605 32742 -183105 116882 256871 84816 119779
## [211] -110352 -91367 172629 -81108 -280967 -82530 -25096
## [218] 5225 -2679 25549 192927 -89016 42822 -94197
## [225] 125611 160071 -37518 -230461 347901 77375 98933
## [232] -24137 -485160 30657 1228 -174962 -369460 -35216
## [239] 31387 1022571 26720 -464028 -71221 300550 125881
## [246] -106798 -408245 -251121 480616 -49078 -123938 -92446
## [253] 2903 -199464 159367 34065 155886 89655 4956
## [260] -68670 63588 23617 20212 -89984 -83493 81070
## [267] 45101 -51547 -16128 39612 -46395 -75554 -247506
## [274] 107079 88502 19847 -138118 -27395 239636 297161
## [281] -20575 -249985 36954 113671 85409 -89511 155040
## [288] -54588 -575 -81560 -32003 -27229
valid.ts <- window(diff.twice.ts, start = c (2016,nTrain+153), end = c(2016,totalRecords+152))
valid.ts
## Time Series:
## Start = c(2017, 88)
## End = c(2017, 117)
## Frequency = 365
## [1] -59956 -298737 -80536 190451 408358 -96710 -99097 -87206
## [9] 750823 -120594 -248074 -321866 22109 -47685 57421 -811569
## [17] 322714 361069 -222515 83013 215168 -39311 11872 -93260
## [25] -158257 138245 -31353 40019 41619 -18071
#The ets function chooses the optimal ?? and initial level value by maximizing something called the likelihood over
#the training period. In the case of simple exponential smoothing model, maximizing the likelihood
#is equivalent to minimizing the RMSE in the training period
#(the RMSE in the training period is equal to ??, the standard deviation of the training residuals.)
ses <- ets(train.ts, model = "ANN", alpha = 0.2)
ses.pred <- forecast(ses, h = nValid, level = 0)
ses.pred
## Point Forecast Lo 0 Hi 0
## 2017.238 -11940.84 -11940.84 -11940.84
## 2017.241 -11940.84 -11940.84 -11940.84
## 2017.244 -11940.84 -11940.84 -11940.84
## 2017.247 -11940.84 -11940.84 -11940.84
## 2017.249 -11940.84 -11940.84 -11940.84
## 2017.252 -11940.84 -11940.84 -11940.84
## 2017.255 -11940.84 -11940.84 -11940.84
## 2017.258 -11940.84 -11940.84 -11940.84
## 2017.260 -11940.84 -11940.84 -11940.84
## 2017.263 -11940.84 -11940.84 -11940.84
## 2017.266 -11940.84 -11940.84 -11940.84
## 2017.268 -11940.84 -11940.84 -11940.84
## 2017.271 -11940.84 -11940.84 -11940.84
## 2017.274 -11940.84 -11940.84 -11940.84
## 2017.277 -11940.84 -11940.84 -11940.84
## 2017.279 -11940.84 -11940.84 -11940.84
## 2017.282 -11940.84 -11940.84 -11940.84
## 2017.285 -11940.84 -11940.84 -11940.84
## 2017.288 -11940.84 -11940.84 -11940.84
## 2017.290 -11940.84 -11940.84 -11940.84
## 2017.293 -11940.84 -11940.84 -11940.84
## 2017.296 -11940.84 -11940.84 -11940.84
## 2017.299 -11940.84 -11940.84 -11940.84
## 2017.301 -11940.84 -11940.84 -11940.84
## 2017.304 -11940.84 -11940.84 -11940.84
## 2017.307 -11940.84 -11940.84 -11940.84
## 2017.310 -11940.84 -11940.84 -11940.84
## 2017.312 -11940.84 -11940.84 -11940.84
## 2017.315 -11940.84 -11940.84 -11940.84
## 2017.318 -11940.84 -11940.84 -11940.84
plot(ses.pred, ylab = "Ridership (Twice-Differenced)", xlab = "Time", bty = "l", xaxt = "n", main ="", flty = 2)
axis(1, at = seq(1991, 2006, 1), labels = format(seq(1991, 2006, 1)))
lines(ses.pred$fitted, lwd = 2, col = "blue")
lines(valid.ts)
accuracy(ses.pred,valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -360.3507 237417.2 133355.9 112.95622 188.71335 NaN
## Test set 5543.6413 266667.4 180997.2 99.94186 99.94186 NaN
## ACF1 Theil's U
## Training set 0.1130451 NA
## Test set -0.1264115 0.8966885
## Automatic selection of model
ses.opt <- ets(train.ts, model = "ZZZ")
ses.opt.pred <- forecast(ses.opt, h = nValid, level = 0)
accuracy(ses.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -360.3507 237417.2 133355.9 112.95622 188.71335 NaN
## Test set 5543.6413 266667.4 180997.2 99.94186 99.94186 NaN
## ACF1 Theil's U
## Training set 0.1130451 NA
## Test set -0.1264115 0.8966885
accuracy(ses.opt.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -2087.820 222421.3 117578.1 97.51904 101.7140 NaN
## Test set -8777.889 266754.3 182906.7 100.01159 100.0116 NaN
## ACF1 Theil's U
## Training set 0.1685516 NA
## Test set -0.1264115 0.9237874
ses.opt
## ETS(A,N,N)
##
## Call:
## ets(y = train.ts, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 1e-04
##
## Initial states:
## l = 2441.7086
##
## sigma: 222421.3
##
## AIC AICc BIC
## 8854.012 8854.095 8865.042
### This model is used by Hyndsight for daily forecast
# we dont have full year series so we will sstick to ets for forecasting
mvisits.ts <- ts(eCommerceTrafficData$Msite,frequency = 7)
mvisits.ts.ets <- ets(mvisits.ts)
mvisits.ts.ets.pred <- forecast(mvisits.ts.ets,h=30)
mvisits.ts.ets.pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 48.14286 1517792 1325975.72 1709608 1224434.301 1811150
## 48.28571 1500512 1219262.15 1781762 1070377.554 1930646
## 48.42857 1536651 1167740.17 1905562 972450.441 2100852
## 48.57143 1613982 1147299.37 2080664 900252.686 2327711
## 48.71429 1501514 996434.43 2006594 729061.372 2273967
## 48.85714 1514621 935180.98 2094061 628444.090 2400797
## 49.00000 1533390 876906.63 2189874 529385.068 2537396
## 49.14286 1467175 772739.73 2161611 405127.759 2529223
## 49.28571 1453172 700037.59 2206305 301352.571 2604990
## 49.42857 1490801 651345.23 2330256 206964.265 2774637
## 49.57143 1568450 615080.78 2521819 110397.668 3026502
## 49.71429 1461476 507765.84 2415186 2902.133 2920050
## 49.85714 1476456 446928.51 2505984 -98070.537 3050983
## 50.00000 1496889 386113.73 2607664 -201895.095 3195673
## 50.14286 1434189 305670.72 2562707 -291730.723 3160109
## 50.28571 1422320 239315.89 2605325 -386928.871 3231570
## 50.42857 1460921 180134.16 2741707 -497873.328 3419715
## 50.57143 1538777 120409.03 2957146 -630429.872 3707985
## 50.71429 1435384 47491.97 2823276 -687213.662 3557982
## 50.85714 1451585 -17713.37 2920884 -795513.001 3698683
## 51.00000 1473101 -84900.29 3031103 -909656.611 3855860
## 51.14286 1412692 -145826.51 2971211 -970856.601 3796241
## 51.28571 1402216 -208913.19 3013344 -1061793.215 3866224
## 51.42857 1441449 -280997.13 3163895 -1192804.923 4075702
## 51.57143 1519441 -366340.22 3405222 -1364612.443 4403494
## 51.71429 1418381 -407769.77 3244531 -1374475.365 4211236
## 51.85714 1435377 -479591.75 3350347 -1493315.225 4364070
## 52.00000 1457600 -555373.58 3470574 -1620977.521 4536178
## 52.14286 1398684 -598920.84 3396289 -1656389.006 4453757
## 52.28571 1389114 -660782.72 3439011 -1745932.531 4524161
plot(mvisits.ts.ets.pred, xlab = "Daily", ylab = "Daily msite visits", main = "ETS")

accuracy(mvisits.ts.ets.pred)
## ME RMSE MAE MPE MAPE MASE
## Training set 807.8417 159309.2 82445.43 -0.1713742 5.721994 0.4411183
## ACF1
## Training set 0.07174062
Acf(mvisits.ts.ets.pred$residuals, lag.max = 30, main="ACF of residuals")
mvisits.ts.ets.pred$mean
## Time Series:
## Start = c(48, 2)
## End = c(52, 3)
## Frequency = 7
## [1] 1517792 1500512 1536651 1613982 1501514 1514621 1533390 1467175
## [9] 1453172 1490801 1568450 1461476 1476456 1496889 1434189 1422320
## [17] 1460921 1538777 1435384 1451585 1473101 1412692 1402216 1441449
## [25] 1519441 1418381 1435377 1457600 1398684 1389114
# next 30 days forecast
