library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.3
library(xts)
## Warning: package 'xts' was built under R version 3.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(readr)
fbdata<-read_csv("~/Dropbox/Boston College/Predictive Analytics/FB.csv")
## Parsed with column specification:
## cols(
##   Date = col_character(),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   `Adj Close` = col_double(),
##   Volume = col_integer()
## )
myts<-ts(fbdata$`Adj Close`, frequency=252, start=c(2013,1))
plot(myts, ylab="Adj Close")

Interesting to see the huge dip as of late. It will be cool to see how this affects our forecast.

#Train and test set
train<-myts[1:1008]
test<-myts[1009:1260]

Volume as external regressor

train.vol<-as.numeric(fbdata$Volume[1:1008])
test.vol<-as.numeric(fbdata$Volume[1009:1260])

Arima with regressor first

myfit1<-auto.arima(train, xreg=train.vol)
myfit1
## Series: train 
## Regression with ARIMA(0,1,0) errors 
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##        drift  xreg
##       0.1123     0
## s.e.     NaN   NaN
## 
## sigma^2 estimated as 2.274:  log likelihood=-1841.5
## AIC=3688.99   AICc=3689.02   BIC=3703.74

Forecast

forecast1<-forecast(myfit1, h=252, xreg=test.vol)
accuracy1<-accuracy(forecast1, test)
accuracy1
##                        ME      RMSE       MAE       MPE     MAPE
## Training set 2.755312e-05  1.505704  1.043122 -0.022958 1.361275
## Test set     1.291977e+01 15.638577 14.088724  7.399928 8.148390
##                    MASE      ACF1
## Training set  0.9972878 0.0284358
## Test set     13.4696801        NA
plot(forecast1)

Now fit Arima model without regressor to compare

myfit2<-auto.arima(train)
myfit2
## Series: train 
## ARIMA(2,1,4) with drift 
## 
## Coefficients:
##           ar1     ar2     ma1      ma2      ma3      ma4   drift
##       -0.5101  0.4071  0.5363  -0.4180  -0.1180  -0.1238  0.1127
## s.e.   0.2655  0.2657  0.2649   0.2723   0.0409   0.0405  0.0374
## 
## sigma^2 estimated as 2.241:  log likelihood=-1831.8
## AIC=3679.59   AICc=3679.73   BIC=3718.91

Forecast

forecast2<-forecast(myfit2, h=252)
accuracy2<-accuracy(forecast2, test)
accuracy2
##                         ME      RMSE       MAE        MPE     MAPE
## Training set -0.0001940347  1.491104  1.038689 -0.0275405 1.363181
## Test set     12.6790085282 15.420789 13.869541  7.2571026 8.019694
##                    MASE         ACF1
## Training set  0.9930499 -0.001307305
## Test set     13.2601279           NA
plot(forecast2)

The second model (without regressor) performed slightly better than the first (with regressor), which was interesting to see.