Background
Data
Library
library(quantmod) #Get Data
library(ggplot2) # Charting
library(dplyr)
library(tidyverse)
library(padr) # Padding
library(zoo) # na filling
library (lubridate) # date
library(magrittr) # another piping
library(tidymodels)
library(tseries) # Stationarity test
library(forecast)
# Theme
theme_set(theme_minimal())
Fetch Dataset
base<-c("BTC-USD")
crypto <- getSymbols(Symbols = base, src = "yahoo", from = "2019-04-02",
to = "2021-04-02", auto.assign = TRUE)
btc<-Cl(`BTC-USD`)
crypto<-as.data.frame(btc)
write.csv(crypto, file="data/crypto.csv")
Pre-processing
NA Checking
crypto%>%
mutate(date=as.Date(rownames(crypto)))%>%
pad()%>%
anyNA()
## [1] TRUE
crypto<-crypto%>%
mutate(date=as.Date(rownames(crypto),
date= floor_date(date, "days")))%>%
pad()%>%
na.locf()
colSums(is.na(crypto))
## BTC-USD.Close date
## 0 0
crypto%>%
select("BTC-USD.Close",date)%>%
rename(btc = 'BTC-USD.Close')%>%
ggplot(aes(date, y=btc))+geom_line()

crypto%<>%
rename(btc = 'BTC-USD.Close')%>%
mutate(btc.change = ((lag(btc)-btc)/lag(btc)) * 100 )%>%
select(-btc)%>%
drop_na()
crypto
# train-val-test size
eval_size <- 30
test_size <- 30
train_size <- 732 - (30*2)
# min-max
t_end <- max(crypto$date)
t_start <- t_end - days(test_size) + days(1)
ev_end<-t_start - days(1)
ev_start<-ev_end - days(eval_size) + days(1)
tr_end <- ev_start - days(1)
tr_start <- tr_end - days(train_size) + days(1)
# Interval
intrain <- interval(tr_start, tr_end)
intest <- interval(t_start, t_end)
ineval <- interval(ev_start, ev_end)
# plot the train and test
crypto %>%
mutate(sample = case_when(
date %within% intrain ~ "train",
date %within% intest ~ "test",
date %within% ineval ~ "evaluation"
)) %>%
drop_na() %>%
mutate(sample = factor(sample, levels = c("train", "evaluation", "test"))) %>%
ggplot(aes(x = date, y = btc.change, colour = sample)) +
geom_line() +
labs(x = "Date", y = "Change(%)")

Modelling
btc_ts <- ts(data = crypto$btc.change, # vektor numerik
start = 1, # tahun awal
frequency = 30)
adf.test(btc_ts)
## Warning in adf.test(btc_ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: btc_ts
## Dickey-Fuller = -8.3462, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
btc_ts%>%
decompose()%>%
autoplot()

acf(btc_ts)

pacf(btc_ts)

Forecast
train<-crypto%>%
filter(date %within% intrain)%>%
.$btc.change%>%
ts(start=c(2019,04), frequency = 30)
btc_model<-auto.arima(train, seasonal=FALSE)
btc_model2<-Arima(train, order=c(1,0,18))
btc_forecast <- forecast(btc_model, h = 30)
btc_forecast2 <- forecast(btc_model2, h = 30)
train %>%
autoplot(series = "Actual") +
autolayer(btc_forecast2$fitted, series = "Train") +
autolayer(btc_forecast2$mean, series = "Test")

Conclusion