============================================================================================================
Data source: https://www.kaggle.com/sherloconan/grad-695-anly-699
BTC <- read.csv("~/Documents/HU/ANLY 699-90-O/699 R/BTC-USD.csv")
BTC$Date <- as.Date(BTC$Date,format="%Y-%m-%d")
datatable(BTC,filter="top")
sum(is.na(BTC))
## [1] 0
sum(BTC$Close==BTC$Adj.Close)==length(BTC$Adj.Close)
## [1] TRUE
sum(BTC$Open[-1]!=BTC$Close[-3005])
## [1] 195
(tail(BTC$Date,1)-head(BTC$Date,1)+1)==nrow(BTC)
## [1] TRUE
BTC[BTC$High==max(BTC$High),]
## Date Open High Low Close Adj.Close Volume
## 2711 2017-12-17 19346.6 19870.62 18750.91 19065.71 19065.71 2264650369
BTC[BTC$Low==min(BTC$Low),]
## Date Open High Low Close Adj.Close Volume
## 84 2010-10-07 0.067 0.088 0.01 0.08685 0.08685 10784
fig1 <- ggplot(BTC,aes(Date,Close))+geom_line()+xlab("Trading Date")+ylab("Closing Price (USD)")+ggtitle("Bitcoin Trading Price: 2010-2018")+theme_classic();fig1
p1 <- ggplot(BTC[which(BTC$Date=="2010-07-16"):which(BTC$Date=="2013-03-16"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2010JUL16 - 2013MAR16",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p2 <- ggplot(BTC[which(BTC$Date=="2013-03-17"):which(BTC$Date=="2017-01-11"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2013MAR17 - 2017JAN11",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p3 <- ggplot(BTC[which(BTC$Date=="2017-01-12"):which(BTC$Date=="2017-12-15"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2017JAN12 - 2017DEC15",y="Closing Price (USD)",x="Trading Date")+theme_classic()
p4 <- ggplot(BTC[which(BTC$Date=="2017-12-16"):which(BTC$Date=="2018-10-6"),],aes(Date,Close))+geom_line()+labs(title="Bitcoin Trading Price",subtitle="2017DEC16 - 2018OCT06",y="Closing Price (USD)",x="Trading Date")+theme_classic()
grid.arrange(p1,p2,p3,p4,nrow=2,bottom="Four Phases")
adf.test(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5])
##
## Augmented Dickey-Fuller Test
##
## data: BTC[which(BTC$Date == "2017-1-12"):which(BTC$Date == "2018-10-6"), 5]
## Dickey-Fuller = -1.5988, Lag order = 8, p-value = 0.7481
## alternative hypothesis: stationary
price <- ts(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],freq=30)
adf.test(diff(price,1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(price, 1)
## Dickey-Fuller = -7.6665, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
par(mfrow=c(1,2))
acf(diff(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],1),lag.max=30,main="Autocorrelation Plot, d=1")
pacf(diff(BTC[which(BTC$Date=="2017-1-12"):which(BTC$Date=="2018-10-6"),5],1),lag.max=30,main="Partial Autocorrelation Plot, d=1")
fit <-auto.arima(price)
summary(fit)
## Series: price
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## -0.7287 0.8018
## s.e. 0.1148 0.0986
##
## sigma^2 estimated as 163448: log likelihood=-4696.55
## AIC=9399.1 AICc=9399.13 BIC=9412.45
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 8.768421 403.3297 222.9493 0.2130409 3.36429 0.1436135
## ACF1
## Training set -0.0004032522
tsdiag(fit)
pred <- forecast(fit,h=30)
par(mfrow=c(1,1))
plot(pred,xlab="Observation",ylab="Closing Price (USD)",main="Bitcoin Trading Price: 2017-2018\n Forecasts from ARIMA(1,1,1)",lwd=2)
lines(pred$fitted,col="red")
legend("topleft",legend=c("Fitted","Predicted","Original"),col=c("red","blue","black"),lty=c(1,1,1),lwd=c(2,2,2),bty="n")
phase4.training <- BTC[which(BTC$Date=="2017-12-16"):which(BTC$Date=="2018-9-6"),5]
phase4.test <- BTC[which(BTC$Date=="2018-9-7"):which(BTC$Date=="2018-10-6"),5]
adf.test(phase4.training)
##
## Augmented Dickey-Fuller Test
##
## data: phase4.training
## Dickey-Fuller = -2.4155, Lag order = 6, p-value = 0.4009
## alternative hypothesis: stationary
adf.test(diff(phase4.training,1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(phase4.training, 1)
## Dickey-Fuller = -7.1118, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
plot(decompose(ts(phase4.training,freq=30)),xlab="Time (Frequency = 30 days)")
par(mfrow=c(1,2))
acf(diff(phase4.training,1),lag.max=30,main="Autocorrelation Plot, d=1")
pacf(diff(phase4.training,1),lag.max=30,main="Partial Autocorrelation Plot, d=1")
fit2 <- auto.arima(ts(phase4.training,freq=30))
summary(fit2)
## Series: ts(phase4.training, freq = 30)
## ARIMA(1,1,2)(2,0,0)[30]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sar2
## 0.3112 -0.3539 0.1154 -0.0451 0.0123
## s.e. 0.0087 0.0580 0.0410 0.0018 0.0005
##
## sigma^2 estimated as 266213: log likelihood=-2028.47
## AIC=4068.94 AICc=4069.27 BIC=4090.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -45.49886 510.1065 339.7482 -0.4978456 3.575298 0.1837435
## ACF1
## Training set 0.01017444
#fit2 <- arima(ts(phase4.training),order=c(1,1,2),seasonal=list(order=c(2,0,0),period=30))
tsdiag(fit2)
pred2 <- forecast(fit2,h=30)
pred2 <- as.data.frame(pred2)
par(mfrow=c(1,1))
plot(ts(pred2$`Point Forecast`),ylim=c(6170,6770),ylab="Predicted Price (USD)",xlab="2018SEPT7 - 2018OCT6",main="Predicted Bitcoin Price",pch=17,type="b")
lines(ts(phase4.test),col="red",pch=19,type="b")
legend("topleft",legend=c("Predicted","Test Set"),col=c("black","red"),pch=c(17,19),bty="n")