Overview of Bitcoin data analysis using GARCH

library(ggplot2)
library(caTools)
library(dygraphs)
library(xts)
library(forecast)
library(fGarch)
library(tseries)

Importing Data

getwd()
## [1] "C:/Users/abhil/Desktop/Rfiles"
setwd ("C:/Users/abhil/Desktop/Rfiles")
data=read.csv("BTC_USD Bitfinex Historical Data.csv", header = TRUE)

Dropping price and change for candlestick chart.

df=data
df =df[,-c(2,7)]

# Formating Dates.
df=xts(df[,-1],order.by=as.Date(df[,1],"%m/%d/%Y"))

# Creating candlestick chart.
m <- head(df, n=39)
dygraph(m) %>%
dyCandlestick()
da=data.frame(data$Date,data$Price)
class(da$data.Price)
## [1] "integer"
da$data.Date = as.Date(da$data.Date, '%m/%d/%Y')
ggplot(data=da, aes(data.Date,as.numeric(data.Price))) + geom_line()

## TimeSeries Plot

The data will be skewed data. There is so much variance/volatility coming towards the end.

da$logPrice= log(as.numeric(da$data.Price))
ggplot(data=da, aes(data.Date,as.numeric(logPrice))) + geom_line()

# log of price

(Trying to make teh data stationary using various methods)

da$sqrt= sqrt(da$logPrice)
ggplot(data=da, aes(data.Date,as.numeric(sqrt))) + geom_line()

#SQRT of LogPrice

since there is still a lot of raneg in y axis, let us use the square root function to the data

diffPrice=diff(da$logPrice)
newFrame=da[-c(1),]
acf(diffPrice)

ggplot(data=newFrame, aes(data.Date,diffPrice)) + geom_line()

# diff of sqrt

Using diff once makes the data more stationary.

adf.test(diffPrice)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diffPrice
## Dickey-Fuller = -11.304, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary

The data is Stationary

fit1 = auto.arima(da$sqrt, trace = TRUE, test = "kpss", ic = "bic")
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,2,2)                    : Inf
##  ARIMA(0,2,0)                    : -9484.479
##  ARIMA(1,2,0)                    : -9928.875
##  ARIMA(0,2,1)                    : -10592.62
##  ARIMA(1,2,1)                    : -10588.41
##  ARIMA(0,2,2)                    : -10590.17
##  ARIMA(1,2,2)                    : -10583.28
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(0,2,1)                    : Inf
##  ARIMA(0,2,2)                    : Inf
##  ARIMA(1,2,1)                    : Inf
##  ARIMA(1,2,2)                    : Inf
##  ARIMA(1,2,0)                    : -9947.43
## 
##  Best model: ARIMA(1,2,0)
Box.test(fit1$residuals, lag = 12, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  fit1$residuals
## X-squared = 286.35, df = 12, p-value < 2.2e-16
acf(fit1$residuals^2)

tsdisplay(fit1$residuals)

tsdiag(fit1)

#### Model arima(1,0) will be selecte dfor GARCH analysis and the diff data is used for GARCH analysis

# garch effect is there
model=garchFit(~arma(1,0)+garch(1,1), data=diff(da$sqrt), trace=F, cond.dist ='std')

summary (model)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~arma(1, 0) + garch(1, 1), data = diff(da$sqrt), 
##     cond.dist = "std", trace = F) 
## 
## Mean and Variance Equation:
##  data ~ arma(1, 0) + garch(1, 1)
## <environment: 0x0000000024554e58>
##  [data = diff(da$sqrt)]
## 
## Conditional Distribution:
##  std 
## 
## Coefficient(s):
##          mu          ar1        omega       alpha1        beta1  
##  3.0929e-04  -4.6090e-02   1.1145e-06   2.6999e-01   8.2504e-01  
##       shape  
##  2.7921e+00  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##          Estimate  Std. Error  t value Pr(>|t|)    
## mu      3.093e-04   1.043e-04    2.965  0.00303 ** 
## ar1    -4.609e-02   2.444e-02   -1.886  0.05928 .  
## omega   1.115e-06   4.805e-07    2.320  0.02037 *  
## alpha1  2.700e-01   6.277e-02    4.301  1.7e-05 ***
## beta1   8.250e-01   2.230e-02   36.999  < 2e-16 ***
## shape   2.792e+00   2.374e-01   11.761  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  5714.605    normalized:  3.65384 
## 
## Description:
##  Thu May 03 15:22:46 2018 by user: abhil 
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  79774.68  0           
##  Shapiro-Wilk Test  R    W      0.8556633 0           
##  Ljung-Box Test     R    Q(10)  34.47074  0.0001536681
##  Ljung-Box Test     R    Q(15)  40.46234  0.0003860302
##  Ljung-Box Test     R    Q(20)  49.06471  0.0003010221
##  Ljung-Box Test     R^2  Q(10)  5.440571  0.8598744   
##  Ljung-Box Test     R^2  Q(15)  5.83996   0.9823124   
##  Ljung-Box Test     R^2  Q(20)  6.275422  0.9984708   
##  LM Arch Test       R    TR^2   5.556593  0.9367664   
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -7.300007 -7.279464 -7.300036 -7.292370
resid=residuals(model, standardize=T)
Box.test(resid)
## 
##  Box-Pierce test
## 
## data:  resid
## X-squared = 7.7622, df = 1, p-value = 0.005335
plot(resid,type='l', main="Residual plot for model") 

Box.test(resid^2)
## 
##  Box-Pierce test
## 
## data:  resid^2
## X-squared = 7.3318e-07, df = 1, p-value = 0.9993
tsdisplay(resid^2)

#### The variables are significant with an AIC of -7.3

pred=predict(model,n.ahead=15,plot=TRUE,crit_val =1)

#plot(forecast(fit1),xlab="Year",ylab="Stock Value")

The plot above shows the upper limit, lower limit and the mean values for the differenced data

tail(diff(da$sqrt))
## [1] -0.0136927260  0.0073169573 -0.0065344133  0.0077066686  0.0009526415
## [6] -0.0028217341
lowerbound = pred$lowerInterval
lowpred = diffinv(lowerbound, xi= 3.021804)
lowpred
##  [1] 3.021804 3.013562 3.004697 2.995356 2.985512 2.975145 2.964232
##  [8] 2.952751 2.940679 2.927988 2.914654 2.900648 2.885941 2.870501
## [15] 2.854297 2.837294
actuallow = exp(lowpred^2)
actuallow
##  [1] 9240.021 8791.655 8334.901 7880.600 7430.009 6984.753 6546.400
##  [8] 6116.467 5696.404 5287.583 4891.284 4508.683 4140.841 3788.693
## [15] 3453.040 3134.540
midbound = pred$meanForecast
valpred = diffinv(midbound, xi= 3.021804)
valpred
##  [1] 3.021804 3.022243 3.022532 3.022828 3.023124 3.023420 3.023715
##  [8] 3.024011 3.024307 3.024602 3.024898 3.025194 3.025489 3.025785
## [15] 3.026081 3.026376
actualval = exp(valpred^2)
actualval
##  [1] 9240.021 9264.590 9280.791 9297.411 9314.045 9330.711 9347.409
##  [8] 9364.138 9380.898 9397.690 9414.514 9431.370 9448.257 9465.177
## [15] 9482.128 9499.112
upperbound = pred$upperInterval
uppred = diffinv(upperbound, xi= 3.021804)
uppred
##  [1] 3.021804 3.030924 3.040367 3.050300 3.060736 3.071695 3.083199
##  [8] 3.095271 3.107935 3.121216 3.135142 3.149739 3.165038 3.181069
## [15] 3.197864 3.215459
actualup = exp(uppred^2)
actualup
##  [1]  9240.021  9764.436 10340.601 10985.512 11708.900 12522.836 13441.669
##  [8] 14482.567 15666.163 17017.378 18566.477 20350.405 22414.531 24814.893
## [15] 27621.152 30920.465

The inverse differenced values are calculated as above

actPrice = data$Price[1300:length(data$Price)]

lPrice = c(actPrice,actuallow)
mPrice=c(actPrice,actualval)
hPrice=(c(actPrice,actualup))

plot.ts(lPrice, main="Forecast plot for Bitcoin for 20 days",col="red")
lines(1:length(hPrice),hPrice,col="Green")
lines(1:length(mPrice),mPrice,col="Blue")

## The forecast values are shown above with the original values. The lower bound and upper bound indicate the range in which the forecast value smight fall.