library(tidyverse)
library(patchwork)
library(zoo)
library(caret)
Loading required package: lattice
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: ‘caret’
The following object is masked from ‘package:purrr’:
lift
dir()
[1] "ADA-USD.csv" "BTC-USD.csv" "DOGE-USD.csv" "ETH-USD.csv"
[5] "XGBoost.nb.html" "XGBoost.Rmd"
1. Import các dataset
Có 4 loại tiền điện tử cần phần tích:
- Bitcoin
- Dogecoin
- Ethereum
- Cardano
Khái niệm:
Bitcoin is a digital currency which operates free of
any central control or the oversight of banks or governments. Instead it
relies on peer-to-peer software and cryptography. A public ledger
records all bitcoin transactions and copies are held on servers around
the world.
Dogecoin is primarily used for tipping users on
Reddit and Twitter, but it is also accepted as a method of payment by a
few dozen merchants. It can be used to buy food, household supplies and
even website domains.
Ethereum operates on a decentralized computer
network, or distributed ledger called a blockchain, which manages and
tracks the currency. It can be useful to think of a blockchain like a
running receipt of every transaction that’s ever taken place in the
cryptocurrency.
Cardano: The cardano blockchain can be used to build
smart contracts, and in turn, create decentralized applications and
protocols. Additionally, the ability to send and receive funds instantly
through, for minimal fees, have many applications in the world of
business and finance.
1.1. Bitcoin
bitcoindf <-
read.csv(
'./BTC-USD.csv',
col.names = c("date", "open", "high", "low", "close", "adj_close", "volume")
)
head(bitcoindf)
1.2. Dogecoin
dogecoindf <-
read.csv(
'DOGE-USD.csv',
col.names = c("date", "open", "high", "low", "close", "adj_close", "volume")
)
head(dogecoindf)
1.3. Ethereum
ethereumdf <-
read.csv(
'ETH-USD.csv',
col.names = c("date", "open", "high", "low", "close", "adj_close", "volume")
)
head(ethereumdf)
### 1.4. Cardano
cardanodf <-
read.csv(
'ADA-USD.csv',
col.names = c("date", "open", "high", "low", "close", "adj_close", "volume")
)
head(cardanodf)
Preprocessing
Bài viết gốc sử dụng forward fill của pandas, đối với R có thể dùng
hàm fill của package tidyverse
bitcoindf <- bitcoindf %>% fill(open, close, .direction = 'down')
dogecoindf <- dogecoindf %>% fill(open, close, .direction = 'down')
ethereumdf <- ethereumdf %>% fill(open, close, .direction = 'down')
cardanodf <- cardanodf %>% fill(open, close, .direction = 'down')
Convert others column into numeric
bitcoindf <- bitcoindf %>% mutate_if(is.character, as.numeric)
Warning: There were 6 warnings in `mutate()`.
The first warning was:
ℹ In argument: `open = .Primitive("as.double")(open)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 5 remaining warnings.
dogecoindf <- dogecoindf %>% mutate_if(is.character, as.numeric)
Warning: There were 6 warnings in `mutate()`.
The first warning was:
ℹ In argument: `open = .Primitive("as.double")(open)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 5 remaining warnings.
ethereumdf <- ethereumdf %>% mutate_if(is.character, as.numeric)
Warning: There were 6 warnings in `mutate()`.
The first warning was:
ℹ In argument: `open = .Primitive("as.double")(open)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 5 remaining warnings.
cardanodf <- cardanodf %>% mutate_if(is.character, as.numeric)
Warning: There were 6 warnings in `mutate()`.
The first warning was:
ℹ In argument: `open = .Primitive("as.double")(open)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 5 remaining warnings.
Visualization
Plotting close price of Bitcoin, Cardano, Dogecoin and Ethereum
line_bitcoindf <- bitcoindf %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "red") +
labs(
title = "Bitcoin Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_cardanodf <- cardanodf %>%
ggplot(aes(x = date, y = close)) +
geom_line() +
labs(
title = "Cardano Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_dogecoindf <- dogecoindf %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "orange") +
labs(
title = "Dogecoin Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_ethereumdf <- ethereumdf %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "darkgreen") +
labs(
title = "Dogecoin Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_bitcoindf + line_cardanodf + line_dogecoindf + line_ethereumdf

Plotting only 2020-2021 year close price of Bitcoin, Cardano,
Dogecoin and Ethereum
last1year_bitcoindf <- bitcoindf %>%
filter(date > as.Date("01-09-2020", format = "%d-%m-%Y"))
last1year_cardanodf <- cardanodf %>%
filter(date > as.Date("01-09-2020", format = "%d-%m-%Y"))
last1year_dogecoindf <- dogecoindf %>%
filter(date > as.Date("01-09-2020", format = "%d-%m-%Y"))
last1year_ethereumdf <- ethereumdf %>%
filter(date > as.Date("01-09-2020", format = "%d-%m-%Y"))
line_bitcoindf <- last1year_bitcoindf %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "red") +
labs(
title = "Bitcoin Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_cardanodf <- last1year_cardanodf %>%
ggplot(aes(x = date, y = close)) +
geom_line() +
labs(
title = "Cardano Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_dogecoindf <- last1year_dogecoindf %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "orange") +
labs(
title = "Dogecoin Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_ethereumdf <- last1year_ethereumdf %>%
filter(date > as.Date("01-09-2020", format = "%d-%m-%Y")) %>%
ggplot(aes(x = date, y = close)) +
geom_line(col = "darkgreen") +
labs(
title = "Ethereum Close Price",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)
line_bitcoindf +
line_cardanodf +
line_dogecoindf +
line_ethereumdf +
plot_layout(nrow = 4)

Plotting only 2020-2021 year volume of Bitcoin, Cardano, Dogecoin and
Ethereum
ggplot(NULL) +
geom_line(aes(last1year_bitcoindf$date, last1year_bitcoindf$volume, col = "bitcoin")) +
geom_line(aes(last1year_cardanodf$date, last1year_cardanodf$volume, col = "cardanod")) +
geom_line(aes(last1year_dogecoindf$date, last1year_dogecoindf$volume, col = "dogecoin")) +
geom_line(aes(last1year_ethereumdf$date, last1year_ethereumdf$volume, col = "ethereum")) +
theme_bw() +
labs(
title = "Volume of Bitcoin, Cardano, Dogecoin, Ethereum",
x = element_blank(),
y = element_blank()
) +
scale_color_manual(
name=element_blank(),
breaks=c('bitcoin', 'cardanod', 'dogecoin', "ethereum"),
values=c('bitcoin'='red', 'cardanod'='black', 'dogecoin'='orange', 'ethereum' = "darkgreen")
)

Plotting last month open and close price of Bitcoin, Cardano,
Dogecoin and Ethereum with comparision
last1month_bitcoindf <-bitcoindf %>%
filter(date > as.Date("01-08-2021", format = "%d-%m-%Y"))
last1month_cardanodf <- cardanodf %>%
filter(date > as.Date("01-08-2021", format = "%d-%m-%Y"))
last1month_dogecoindf <- dogecoindf %>%
filter(date > as.Date("01-08-2021", format = "%d-%m-%Y"))
last1month_ethereumdf <- ethereumdf %>%
filter(date > as.Date("01-08-2021", format = "%d-%m-%Y"))
last1month_bitcoindf %>%
ggplot(aes(date)) +
geom_line(aes(y = close, col = "Close")) +
geom_line(aes(y = open, col = "Open")) +
labs(
col = element_blank(),
title = "bitcoin",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
last1month_cardanodf %>%
ggplot(aes(date)) +
geom_line(aes(y = close, col = "C"), show.legend = F) +
geom_line(aes(y = open, col = "O"), show.legend = F) +
labs(
col = element_blank(),
title = "cardano",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
last1month_dogecoindf %>%
ggplot(aes(date)) +
geom_line(aes(y = close, col = "C"), show.legend = F) +
geom_line(aes(y = open, col = "O"), show.legend = F) +
labs(
col = element_blank(),
title = "dogecoin",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
last1month_ethereumdf %>%
ggplot(aes(date)) +
geom_line(aes(y = close, col = "C"), show.legend = F) +
geom_line(aes(y = open, col = "O"), show.legend = F) +
labs(
col = element_blank(),
title = "ethereum",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
plot_layout(ncol = 1)

Moving Averages
As we know the stock prices are highly volatile and prices change
quickly with time. To observe any trend or pattern we can take the help
of a 50-day 200-day average.
bitcoindf <- bitcoindf %>% fill(open, close, .direction = 'down')
dogecoindf <- dogecoindf %>% fill(open, close, .direction = 'down')
ethereumdf <- ethereumdf %>% fill(open, close, .direction = 'down')
cardanodf <- cardanodf %>% fill(open, close, .direction = 'down')
bitcoindf %>%
ggplot(aes(x = date)) +
geom_line(aes(y = rollmean(close, 50, fill = NA, align = "right")), col = "blue") +
geom_line(aes(y = rollmean(close, 200, fill = NA, align = "right")), col = "orange") +
labs(
title = "Bitcoin Close Price moving average",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
cardanodf %>%
ggplot(aes(x = date)) +
geom_line(aes(y = rollmean(close, 50, fill = NA, align = "right")), col = "black") +
geom_line(aes(y = rollmean(close, 200, fill = NA, align = "right")), col = "red") +
labs(
title = "Bitcoin Close Price moving average",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
dogecoindf %>%
ggplot(aes(x = date)) +
geom_line(aes(y = rollmean(close, 50, fill = NA, align = "right")), col = "orange") +
geom_line(aes(y = rollmean(close, 200, fill = NA, align = "right")), col = "grey") +
labs(
title = "Bitcoin Close Price moving average",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9) +
ethereumdf %>%
ggplot(aes(x = date)) +
geom_line(aes(y = rollmean(close, 50, fill = NA, align = "right")), col = "green") +
geom_line(aes(y = rollmean(close, 200, fill = NA, align = "right")), col = "blue") +
labs(
title = "Bitcoin Close Price moving average",
x = element_blank(),
y = element_blank()
) +
theme_bw(base_size = 9)

Plotting histogram with mean indicator of all four
cryptocurrencies
bitcoindf %>%
ggplot(aes(close)) +
geom_histogram(fill ="darkred", alpha = 0.5, col = "black") +
ylab("count") +
theme_bw(base_size = 9) +
annotate("text", label = "Bitcoin Close Price", 50000, 400) +
cardanodf %>%
ggplot(aes(close)) +
geom_histogram(fill ="darkgreen", alpha = 0.5, col = "black") +
ylab("count") +
theme_bw(base_size = 9) +
annotate("text", label = "Cardano Close Price", 2.3, 200) +
dogecoindf %>%
ggplot(aes(close)) +
geom_histogram(fill ="grey", bins = 400) +
theme_bw(base_size = 9) +
ylab("count") +
annotate("text", label = "Dogecoin Close Price", 0.52, 200) +
ethereumdf %>%
ggplot(aes(close)) +
geom_histogram(fill ="darkorange", alpha = 0.5, col = "black") +
theme_bw(base_size = 9) +
annotate("text", label = "Ethereum Close Price", 3100, 400) +
plot_layout(ncol = 1)

bitcoindf %>%
ggplot(aes(date)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(open)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(high)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(low)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(close)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(adj_close)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw() +
bitcoindf %>%
ggplot(aes(volume)) +
geom_histogram() +
ylab(element_blank()) +
theme_bw()

rm(cardanodf)
rm(dogecoindf)
rm(ethereumdf)
rm(last1month_bitcoindf)
rm(last1month_cardanodf)
rm(last1month_dogecoindf)
rm(last1month_ethereumdf)
rm(last1year_bitcoindf)
rm(last1year_cardanodf)
rm(last1year_dogecoindf)
rm(last1year_ethereumdf)
rm(line_bitcoindf)
rm(line_cardanodf)
rm(line_dogecoindf)
rm(line_ethereumdf)
Bitcoin Close Price Prediction using XGBoost
Dataframe of bitcoin close price
closedf <- select(bitcoindf, date, close)
cat("Shape of close dataframe:", dim(closedf))
Shape of close dataframe: 2193 2
closedf <- closedf %>% filter(date > as.Date('2020-09-13'))
close_stock <- closedf
cat("Total data for prediction: ", nrow(closedf))
Total data for prediction: 365
Normalizing close price value
closedf <-
predict(select(closedf, close) %>% preProcess(method = "range"),
select(closedf, close))
dim(closedf)
[1] 365 1
Separate data for Train and Test
training_size <-round(nrow(closedf)*0.70)
train_data <- data.frame(close = closedf[1:training_size, ])
test_data <- data.frame(close = closedf[(training_size + 1):nrow(closedf), ])
cat("train_data: ", dim(train_data))
train_data: 255 1
cat("test_data: ", dim(test_data))
test_data: 110 1
ggplot(NULL) +
geom_line(
aes(
x = close_stock$date[1:training_size],
y = train_data$close,
col = "train"
)
) +
geom_line(
aes(
x = close_stock$date[(training_size + 1):nrow(closedf)],
y = test_data$close,
col = "test"
)
) +
scale_color_manual(
breaks = c("train", "test"),
values = c("train" = "black", "test" = "red")
) +
labs(title = 'Train & Test data', x = 'Date', y = 'Weekly Sales', col = element_blank()) +
theme_bw()

create_dataset <- function (dataset, time_step=1) {
dataX <- list()
dataY <- list()
for (i in 1:(nrow(dataset)-time_step-1)) {
a <- dataset[i:(i+time_step-1), 1]
dataX[[i]] <- a
dataY[[i]] <- dataset[i + time_step, 1]
}
dataX <- matrix(unlist(dataX), ncol = time_step, byrow = TRUE)
colnames(dataX) <- paste0("V", 1:time_step)
dataY <- unlist(dataY)
return(list(dataX = dataX, dataY = dataY))
}
train_set <- create_dataset(train_data, time_step = 15)
X_train <- train_set[[1]]
Y_train <- train_set[[2]]
rm(train_set)
test_set <- create_dataset(test_data, time_step = 15)
X_test <- test_set[[1]]
Y_test <- test_set[[2]]
rm(test_set)
cat("X_train: ", dim(X_train))
X_train: 239 15
cat("Y_train: ", dim(Y_train))
Y_train:
cat("X_test: ", dim(X_test))
X_test: 94 15
cat("Y_test", dim(Y_test))
Y_test
xgb_trcontrol <- caret::trainControl(
method = "cv",
number = 5,
allowParallel = TRUE,
verboseIter = FALSE,
returnData = FALSE
)
xgb_grid <- base::expand.grid(
list(
nrounds=1000,
# base_score=0.5,
# colsample_bylevel=1,
# colsample_bynode=1,
colsample_bytree = 1, # subsample ratio of columns when construction each tree
# # nrounds = 1000,
max_depth = 6, # maximum depth of a tree
eta = 0.3, # learning rate
gamma = 0, # minimum loss reduction
min_child_weight = 1, # minimum sum of instance weight (hessian) needed ina child
subsample = 1 # subsample ratio of the training instances
# tree_method='exact',
# interaction_constraints='',
# max_delta_step=0,
# n_estimators=1000,
# validate_parameters=1,
# random_state=0,
# n_jobs=4
))
xgb_model <- train(
X_train, Y_train,
trControl = xgb_trcontrol,
tuneGrid = xgb_grid,
method = "xgbTree",
nthread = 1
)
predicted <- predict(xgb_model, X_test)
cat("Mean Absolute Error - MAE : ", MAE(Y_test, predicted))
Mean Absolute Error - MAE : 0.02658388
cat("Root Mean squared Error - RMSE : ", RMSE(Y_test, predicted))
Root Mean squared Error - RMSE : 0.03641536
train_predict <- predict(xgb_model, X_train)
test_predict <- predict(xgb_model, X_test)
descaler <- function(xv, mx, nx) {
dvals <- xv * (mx - nx) + nx
dvals <- round(dvals, digits = 5)
return(dvals)
}
ggplot(NULL) +
geom_line(aes(
x = close_stock$date,
y = closedf$close %>% descaler(max(close_stock$close), min(close_stock$close)),
col = 'Original close price'
),) +
geom_line(
aes(
x = close_stock$date[17:training_size - 1],
y = train_predict %>% descaler(max(close_stock$close), min(close_stock$close)),
col = 'Train predicted close price'),
) +
geom_line(
aes(
x = close_stock$date[(training_size + 16):(nrow(close_stock) - 1)],
y = test_predict %>% descaler(max(close_stock$close), min(close_stock$close)),
col = 'Test predicted close price'),
col = "green"
) +
scale_color_manual(
breaks = c(
'Original close price',
'Train predicted close price',
'Test predicted close price'
),
values = c(
'Original close price' = "blue",
'Train predicted close price' = "red",
'Test predicted close price' = "green"
),
) +
theme_bw() +
labs(
x = 'Date',
y = 'Close price',
title = 'Comparision between original close price vs predicted close price',
col = element_blank()
)

