ANLY 699 - Project

R Code

BTC <- read.csv("~/Documents/HU/ANLY 699-90-O/699 R/BTC-USD.csv")
BTC$Date <- as.Date(BTC$Date,format="%Y-%m-%d")
datatable(BTC,filter="top")

sum(is.na(BTC))

## [1] 0

sum(BTC$Close==BTC$Adj.Close)==length(BTC$Adj.Close)

## [1] TRUE

sum(BTC$Open[-1]!=BTC$Close[-3005])

## [1] 195

(tail(BTC$Date,1)-head(BTC$Date,1)+1)==nrow(BTC)

## [1] TRUE

BTC[BTC$High==max(BTC$High),]

##            Date    Open     High      Low    Close Adj.Close     Volume
## 2711 2017-12-17 19346.6 19870.62 18750.91 19065.71  19065.71 2264650369

BTC[BTC$Low==min(BTC$Low),]

##          Date  Open  High  Low   Close Adj.Close Volume
## 84 2010-10-07 0.067 0.088 0.01 0.08685   0.08685  10784

fig1 <- ggplot(BTC,aes(Date,Close))+geom_line()+xlab("Trading Date")+ylab("Closing Price (USD)")+ggtitle("Bitcoin Trading Price: 2010-2018")+theme_classic();fig1

p1 <- ggplot(BTC[which(BTC$Date=="2010-07-16"):which(BTC$Date=="2013-03-16"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2010JUL16 - 2013MAR16",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p2 <- ggplot(BTC[which(BTC$Date=="2013-03-17"):which(BTC$Date=="2017-01-11"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2013MAR17 - 2017JAN11",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p3 <- ggplot(BTC[which(BTC$Date=="2017-01-12"):which(BTC$Date=="2017-12-15"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2017JAN12 - 2017DEC15",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p4 <- ggplot(BTC[which(BTC$Date=="2017-12-16"):which(BTC$Date=="2018-10-6"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2017DEC16 - 2018OCT06",y="Closing Price (USD)",x="Trading Date")+theme_classic()
grid.arrange(p1,p2,p3,p4,nrow=2,bottom="Four Phases")

adf.test(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5])

## 
##  Augmented Dickey-Fuller Test
## 
## data:  BTC[which(BTC$Date == "2017-1-12"):which(BTC$Date == "2018-10-6"),     5]
## Dickey-Fuller = -1.5988, Lag order = 8, p-value = 0.7481
## alternative hypothesis: stationary

price <- ts(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],freq=30)
adf.test(diff(price,1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(price, 1)
## Dickey-Fuller = -7.6665, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary

par(mfrow=c(1,2))
acf(diff(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],1),lag.max=30,main="Autocorrelation Plot, d=1")
pacf(diff(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],1),lag.max=30,main="Partial Autocorrelation Plot, d=1")

fit <-auto.arima(price)
summary(fit)

## Series: price 
## ARIMA(1,1,1) 
## 
## Coefficients:
##           ar1     ma1
##       -0.7287  0.8018
## s.e.   0.1148  0.0986
## 
## sigma^2 estimated as 163448:  log likelihood=-4696.55
## AIC=9399.1   AICc=9399.13   BIC=9412.45
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE    MAPE      MASE
## Training set 8.768421 403.3297 222.9493 0.2130409 3.36429 0.1436135
##                       ACF1
## Training set -0.0004032522

tsdiag(fit)

pred <- forecast(fit,h=30)
par(mfrow=c(1,1))
plot(pred,xlab="Observation",ylab="Closing Price (USD)",main="Bitcoin Trading Price: 2017-2018\n Forecasts from ARIMA(1,1,1)",lwd=2)
lines(pred$fitted,col="red")
legend("topleft",legend=c("Fitted","Predicted","Original"),col=c("red","blue","black"),lty=c(1,1,1),lwd=c(2,2,2),bty="n")

phase4.training <- BTC[which(BTC$Date=="2017-12-16"):which(BTC$Date=="2018-9-6"),5]
phase4.test <- BTC[which(BTC$Date=="2018-9-7"):which(BTC$Date=="2018-10-6"),5]
adf.test(phase4.training)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  phase4.training
## Dickey-Fuller = -2.4155, Lag order = 6, p-value = 0.4009
## alternative hypothesis: stationary

adf.test(diff(phase4.training,1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(phase4.training, 1)
## Dickey-Fuller = -7.1118, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary

plot(decompose(ts(phase4.training,freq=30)),xlab="Time (Frequency = 30 days)")

par(mfrow=c(1,2))
acf(diff(phase4.training,1),lag.max=30,main="Autocorrelation Plot, d=1")
pacf(diff(phase4.training,1),lag.max=30,main="Partial Autocorrelation Plot, d=1")

fit2 <- auto.arima(ts(phase4.training,freq=30))
summary(fit2)

## Series: ts(phase4.training, freq = 30) 
## ARIMA(1,1,2)(2,0,0)[30] 
## 
## Coefficients:
##          ar1      ma1     ma2     sar1    sar2
##       0.3112  -0.3539  0.1154  -0.0451  0.0123
## s.e.  0.0087   0.0580  0.0410   0.0018  0.0005
## 
## sigma^2 estimated as 266213:  log likelihood=-2028.47
## AIC=4068.94   AICc=4069.27   BIC=4090.42
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -45.49886 510.1065 339.7482 -0.4978456 3.575298 0.1837435
##                    ACF1
## Training set 0.01017444

#fit2 <- arima(ts(phase4.training),order=c(1,1,2),seasonal=list(order=c(2,0,0),period=30))
tsdiag(fit2)

pred2 <- forecast(fit2,h=30)
pred2 <- as.data.frame(pred2)
par(mfrow=c(1,1))
plot(ts(pred2$`Point Forecast`),ylim=c(6170,6770),ylab="Predicted Price (USD)",xlab="2018SEPT7 - 2018OCT6",main="Predicted Bitcoin Price",pch=17,type="b")
lines(ts(phase4.test),col="red",pch=19,type="b")
legend("topleft",legend=c("Predicted","Test Set"),col=c("black","red"),pch=c(17,19),bty="n")

ANLY 699 - Project

ARIMA Modeling on Bitcoin Price: 2010-2018

Zhengxiao Wei

2020-06-06

R Code