ARIMA Prediction Modelling

#loading libriaries needed
library(quantmod)
library(forecast)
library(tseries)
library(timeSeries)
library(xts)
library(lmtest)

##Data Importing

FB <- getSymbols("FB", auto.assign = F)

Data Preprocessing

#select only the close price
FB_Close_Prices = FB[,4]

#to achieve lower variance, transform to log
FB.log <- log(FB_Close_Prices)
#Get log returns as against log price
FB_New <- diff(FB.log, lag=1)

#Removes the first row since it does not contain the daily return.
FB_New <-FB_New[!is.na(FB_New)]

##Plotting

#visualizing the data
plot(FB_New)

##Modelling the data

# run the Augmented Dickey-Fuller test
adf.test(FB_New)
## Warning in adf.test(FB_New): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  FB_New
## Dickey-Fuller = -13.363, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
#forecasting for the next 400 days
splitpoint = floor(nrow(FB_New)*((nrow(FB_New)-400)/nrow(FB_New)))
#To determine the parameters AR and MA for the ARIMA model we can plot the ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function).
par(mfrow=c(2,1))
acf.FB = acf(FB_New[c(1:splitpoint),], main='ACF Plot', lag.max = 25)
pacf.FB = pacf(FB_New[c(1:splitpoint),], main='PACF Plot', lag.max = 25)

Our final ARIMA model parameters are (1, 0, 1). Since the data already was stationary no differencing is applied hence parameter d is 0.

FB_train = FB_New[1:splitpoint]
FB_test = FB_New[(splitpoint+1):nrow(FB_New)]
fit = arima(FB_train, order=c(1,0,1), include.mean = FALSE)

Call a summary of a fitted model to read coefficients, standard errors etc.

summary(fit)
## 
## Call:
## arima(x = FB_train, order = c(1, 0, 1), include.mean = FALSE)
## 
## Coefficients:
##           ar1     ma1
##       -0.2580  0.2307
## s.e.   0.5068  0.5053
## 
## sigma^2 estimated as 0.0005468:  log likelihood = 4925.89,  aic = -9845.78
## 
## Training set error measures:
##                        ME       RMSE        MAE MPE MAPE      MASE        ACF1
## Training set 0.0009379521 0.02338409 0.01531847 NaN  Inf 0.6749818 0.001297893
coeftest(fit)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)
## ar1 -0.25797    0.50685 -0.5090   0.6108
## ma1  0.23066    0.50525  0.4565   0.6480
arima.forecast = forecast(fit, h=400, level = 99)

FB_forecasts = xts(arima.forecast[["mean"]],order.by = index(FB_test))
colnames(FB_forecasts) = c("Forecasted")
#merging actual and forecasted
FB_compare = merge(FB_test,FB_forecasts)
colnames(FB_compare) = c("Actual","Forecasted")
#plotting actual return vs forecasted return
plot(FB_compare, main="Actual vs Forecasted Returns")

#assign a binary accuracy to accuracy column
FB_compare$Accuracy = sign(FB_compare$Actual) == sign(FB_compare$Forecasted)
#print(FB_compare)
#view part of the data
head(FB_compare)
##                  Actual    Forecasted Accuracy
## 2020-10-06 -0.022893702 -6.777774e-04        1
## 2020-10-07 -0.002089900  1.748456e-04        0
## 2020-10-08  0.021615063 -4.510474e-05        0
## 2020-10-09  0.002612606  1.163563e-05        1
## 2020-10-12  0.041842415 -3.001631e-06        0
## 2020-10-13  0.001413380  7.743279e-07        1

#Accuracy check

table(FB_compare$Accuracy) #53% accuracy
## 
##   0   1 
## 188 212

Out of 400 days, 53% is accurate.