library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.3
library(xts)
## Warning: package 'xts' was built under R version 3.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readr)
fbdata<-read_csv("~/Dropbox/Boston College/Predictive Analytics/FB.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_integer()
## )
myts<-ts(fbdata$`Adj Close`, frequency=252, start=c(2013,1))
plot(myts, ylab="Adj Close")
Interesting to see the huge dip as of late. It will be cool to see how this affects our forecast.
#Train and test set
train<-myts[1:1008]
test<-myts[1009:1260]
Volume as external regressor
train.vol<-as.numeric(fbdata$Volume[1:1008])
test.vol<-as.numeric(fbdata$Volume[1009:1260])
Arima with regressor first
myfit1<-auto.arima(train, xreg=train.vol)
myfit1
## Series: train
## Regression with ARIMA(0,1,0) errors
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## drift xreg
## 0.1123 0
## s.e. NaN NaN
##
## sigma^2 estimated as 2.274: log likelihood=-1841.5
## AIC=3688.99 AICc=3689.02 BIC=3703.74
Forecast
forecast1<-forecast(myfit1, h=252, xreg=test.vol)
accuracy1<-accuracy(forecast1, test)
accuracy1
## ME RMSE MAE MPE MAPE
## Training set 2.755312e-05 1.505704 1.043122 -0.022958 1.361275
## Test set 1.291977e+01 15.638577 14.088724 7.399928 8.148390
## MASE ACF1
## Training set 0.9972878 0.0284358
## Test set 13.4696801 NA
plot(forecast1)
Now fit Arima model without regressor to compare
myfit2<-auto.arima(train)
myfit2
## Series: train
## ARIMA(2,1,4) with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 drift
## -0.5101 0.4071 0.5363 -0.4180 -0.1180 -0.1238 0.1127
## s.e. 0.2655 0.2657 0.2649 0.2723 0.0409 0.0405 0.0374
##
## sigma^2 estimated as 2.241: log likelihood=-1831.8
## AIC=3679.59 AICc=3679.73 BIC=3718.91
Forecast
forecast2<-forecast(myfit2, h=252)
accuracy2<-accuracy(forecast2, test)
accuracy2
## ME RMSE MAE MPE MAPE
## Training set -0.0001940347 1.491104 1.038689 -0.0275405 1.363181
## Test set 12.6790085282 15.420789 13.869541 7.2571026 8.019694
## MASE ACF1
## Training set 0.9930499 -0.001307305
## Test set 13.2601279 NA
plot(forecast2)
The second model (without regressor) performed slightly better than the first (with regressor), which was interesting to see.