Background

Data

Library

library(quantmod) #Get Data
library(ggplot2) # Charting
library(dplyr)
library(tidyverse)
library(padr) # Padding
library(zoo) # na filling
library (lubridate) # date
library(magrittr) # another piping
library(tidymodels)
library(tseries) # Stationarity test
library(forecast) 

# Theme
theme_set(theme_minimal())

Fetch Dataset

base<-c("BTC-USD")

crypto <- getSymbols(Symbols = base, src = "yahoo", from = "2019-04-02", 
                        to = "2021-04-02", auto.assign = TRUE)


btc<-Cl(`BTC-USD`)

crypto<-as.data.frame(btc)


write.csv(crypto, file="data/crypto.csv")

Pre-processing

NA Checking

crypto%>%
  mutate(date=as.Date(rownames(crypto)))%>%
  pad()%>%
  anyNA()
## [1] TRUE
crypto<-crypto%>%
  mutate(date=as.Date(rownames(crypto),
         date= floor_date(date, "days")))%>%
  pad()%>%
  na.locf()
colSums(is.na(crypto))
## BTC-USD.Close          date 
##             0             0
crypto%>%
  select("BTC-USD.Close",date)%>%
  rename(btc = 'BTC-USD.Close')%>%
  ggplot(aes(date, y=btc))+geom_line()

crypto%<>%
  rename(btc = 'BTC-USD.Close')%>%
  mutate(btc.change = ((lag(btc)-btc)/lag(btc)) * 100 )%>%
  select(-btc)%>%
  drop_na()

crypto
# train-val-test size
eval_size <- 30
test_size <- 30 
train_size <- 732 - (30*2)

# min-max 
t_end <- max(crypto$date)
t_start <- t_end - days(test_size) + days(1)

ev_end<-t_start - days(1)
ev_start<-ev_end - days(eval_size) + days(1)

tr_end <- ev_start - days(1)
tr_start <- tr_end - days(train_size) + days(1)

# Interval
intrain <- interval(tr_start, tr_end)
intest <- interval(t_start, t_end)
ineval <- interval(ev_start, ev_end)

# plot the train and test
crypto %>%
  mutate(sample = case_when(
    date %within% intrain ~ "train",
    date %within% intest ~ "test",
    date %within% ineval ~ "evaluation"
  )) %>%
  drop_na() %>%
  mutate(sample = factor(sample, levels = c("train", "evaluation", "test"))) %>%
  ggplot(aes(x = date, y = btc.change, colour = sample)) +
    geom_line() +
    labs(x = "Date", y = "Change(%)")

Modelling

btc_ts <- ts(data = crypto$btc.change, # vektor numerik
             start = 1, # tahun awal
             frequency = 30) 
adf.test(btc_ts)
## Warning in adf.test(btc_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btc_ts
## Dickey-Fuller = -8.3462, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
btc_ts%>%
  decompose()%>%
  autoplot()

acf(btc_ts)

pacf(btc_ts)

Forecast

train<-crypto%>%
  filter(date %within% intrain)%>%
  .$btc.change%>%
  ts(start=c(2019,04), frequency = 30)

btc_model<-auto.arima(train, seasonal=FALSE)
btc_model2<-Arima(train, order=c(1,0,18))
btc_forecast <- forecast(btc_model, h = 30)
btc_forecast2 <- forecast(btc_model2, h = 30)
train %>% 
  autoplot(series = "Actual") +
  autolayer(btc_forecast2$fitted, series = "Train") +
  autolayer(btc_forecast2$mean, series = "Test")

Conclusion