Cryptocurrency is a new form of asset which is used for paying or investing digitally. The upsurge in the various cryptocurrency prices over the past 10 years has increased the curiosity of researchers and investors to analyze and forecast its prices in the future. The primary focus of this research is to develop an Autoregressive Integrated Moving Average (ARIMA) model, which is a time-series statistical model, for forecasting the cryptocurrency prices. This research focuses on six different cryptocurrencies – Bitcoin, Doge Coin, Ethereum, Binance Coin, XRP and Cardano due to their popularity, and discusses the price movements and the stability of these cryptocurrencies using various exploratory data analysis techniques and visualization techniques. The dataset for these six coins has been collected from Kaggle datasets and has been merged into a single dataset. In this research, the dataset has a record of the prices of these six cryptocurrencies dated from 2018 to 2021.
Crypto-Currency dataset by Kaggle consists of 244855 observations of six popular cryptocurrency details with 6 independent and 1 dependent variables. Please find the sample dataset below,
eth_raw_data = read.csv('ethusdt.csv')
head(eth_raw_data, 10)
## Date Open High Low Close Volume
## 1 2018-07-22 12:00:00 460.81 460.81 460.81 460.81 0.050
## 2 2018-07-22 12:30:00 460.81 461.03 460.77 460.77 0.610
## 3 2018-07-22 13:00:00 460.77 460.77 460.77 460.77 0.000
## 4 2018-07-22 13:30:00 460.77 464.12 460.77 464.12 0.223
## 5 2018-07-22 14:00:00 464.12 466.55 463.20 466.55 1.437
## 6 2018-07-22 14:30:00 466.70 466.73 465.23 465.23 0.709
## 7 2018-07-22 15:00:00 465.23 467.33 465.23 467.18 0.281
## 8 2018-07-22 15:30:00 467.18 467.18 467.18 467.18 0.000
## 9 2018-07-22 16:00:00 467.18 467.53 467.18 467.53 0.015
## 10 2018-07-22 16:30:00 467.53 467.53 467.53 467.53 0.000
We have converted the dataset into dataset with daily data i.e. each data will have only one record instead of multiple records. We have taken opening of first record and closing of last record for any particular day.
aggregating_data <- function(raw_data, cryptoname){
idx =1
# creating new data frame to copy the cleaned dataset
new_data_frame = data.frame(Date = as.Date(character()),Open = double(),High = double(),Low=double(),Close = double(),Volume = double(),Cryptocurrency = character())
volume = 0;
low = 0
high = 0
for(row in 1:nrow(raw_data)){
date_time = raw_data[row,"Date"]
date = strsplit(date_time," ")[[1]][1]
time = strsplit(date_time," ")[[1]][2]
if(grepl("00:00:00",time,fixed=TRUE)){
new_data_frame[idx,"Date"] = date
new_data_frame[idx,"Open"] = raw_data[row,"Open"]
low = raw_data[row,"Low"]
high = raw_data[row,"High"]
volume = volume + raw_data[row,"Volume"]
}
else if(grepl("23:30:00",time,fixed=TRUE)){
new_data_frame[idx,"Close"] = raw_data[row,"Close"]
volume = volume + raw_data[row,"Volume"]
if(low>raw_data[row,"Low"]){
low = raw_data[row,"Low"]
}
if(high < raw_data[row,"High"]){
high = raw_data[row,"High"]
}
new_data_frame[idx,"Volume"] = volume
new_data_frame[idx,"Low"] = low
new_data_frame[idx,"High"] = high
new_data_frame[idx,"Cryptocurrency"] = cryptoname
volume = 0;
idx = idx+1;
}
else{
if(low>raw_data[row,"Low"]){
low = raw_data[row,"Low"]
}
if(high < raw_data[row,"High"]){
high = raw_data[row,"High"]
}
volume = volume + raw_data[row,"Volume"]
}
}
imputed_data <- new_data_frame
imputed_data <- na.omit(imputed_data)
imputed_data$Date = as.Date(imputed_data$Date,format='%Y-%m-%d')
return(imputed_data)
}
eth_imputed_data <- aggregating_data(eth_raw_data, "Ethereum")
head(eth_imputed_data, 10)
## Date Open High Low Close Volume Cryptocurrency
## 2 2018-07-23 460.220 469.89 452.59 455.000 5.634 Ethereum
## 3 2018-07-24 455.000 481.00 440.01 481.000 7.235 Ethereum
## 4 2018-07-25 481.000 485.76 462.41 482.849 3.932 Ethereum
## 5 2018-07-26 482.849 485.00 463.49 463.730 12.349 Ethereum
## 6 2018-07-27 463.730 471.39 456.80 471.390 5.280 Ethereum
## 7 2018-07-28 471.390 482.42 466.02 469.030 5.446 Ethereum
## 8 2018-07-29 469.030 490.00 464.00 490.000 2.855 Ethereum
## 9 2018-07-30 490.000 490.00 443.68 443.680 8.787 Ethereum
## 10 2018-07-31 443.680 464.01 432.50 432.500 3.876 Ethereum
## 11 2018-08-01 432.500 459.99 415.65 422.980 36.051 Ethereum
We can see that for every record above indicates aggregated data of ethereum on any particular day.
# binance data processing
binance_raw_data <- read.csv('bnbusdt.csv')
binance_imputed_data <- aggregating_data(binance_raw_data, "Binance")
# bitcoin data processsing
bitcoin_raw_data <- read.csv('btcusdt.csv')
bitcoin_imputed_data <- aggregating_data(bitcoin_raw_data, "Bitcoin")
# doge coin data processing
dodge_raw_data <- read.csv('dogeusdt.csv')
dodge_imputed_data <- aggregating_data(dodge_raw_data, "Doge")
# xrp data processing
xrp_raw_data <- read.csv('xrpusdt.csv')
xrp_imputed_data <- aggregating_data(xrp_raw_data, "XRP")
# cardano data procesing
cardano_raw_data <- read.csv('adausdt.csv')
cardano_imputed_data <- aggregating_data(cardano_raw_data, "Cardano")
All the six crypto-currencies datasets are merged to one dataset where a column indicating cryptocurrency name is created for clear understanding of the trends.
## X Date Open High Low Close Volume Cryptocurrency
## 1 2 2018-07-23 460.220 469.89 452.59 455.000 5.634 Ethereum
## 2 3 2018-07-24 455.000 481.00 440.01 481.000 7.235 Ethereum
## 3 4 2018-07-25 481.000 485.76 462.41 482.849 3.932 Ethereum
## 4 5 2018-07-26 482.849 485.00 463.49 463.730 12.349 Ethereum
## 5 6 2018-07-27 463.730 471.39 456.80 471.390 5.280 Ethereum
## 6 7 2018-07-28 471.390 482.42 466.02 469.030 5.446 Ethereum
## 7 8 2018-07-29 469.030 490.00 464.00 490.000 2.855 Ethereum
## 8 9 2018-07-30 490.000 490.00 443.68 443.680 8.787 Ethereum
## 9 10 2018-07-31 443.680 464.01 432.50 432.500 3.876 Ethereum
## 10 11 2018-08-01 432.500 459.99 415.65 422.980 36.051 Ethereum
## # A tibble: 6 × 4
## Cryptocurrency min_Open max_Close max_Volume
## <chr> <dbl> <dbl> <dbl>
## 1 Binance 9.27 680 15454.
## 2 Bitcoin 3205 63214 244.
## 3 Cardano 0.0237 2.28 3453160.
## 4 Doge 0.00249 0.671 219205228
## 5 Ethereum 84.0 4123. 4224.
## 6 XRP 0.140 1.82 8672204
pi = as.data.frame(data %>%
group_by(Cryptocurrency) %>%
summarise(Frequency = sum(Volume*Close)))
pct <- round(pi$Frequency/sum(pi$Frequency)*100)
lbls = pi$Cryptocurrency
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie3D(pi$Frequency,labels=lbls, col=c("skyblue","skyblue3","paleturquoise3","paleturquoise4","grey","white"),main = "Volume of Cryptocurrencies traded")
#### Converting data into timeseries data
####### Creating Time-series object for each cryptocurrencies #######
close_ts_eth <- eth_imputed_data[,c("Date", "Close")] %>% arrange(Date)
eth_ts_data = close_ts_eth
close_ts_eth <- ts(eth_ts_data$Close,
start = c(as.numeric(format(eth_ts_data$Date[1], "%Y")), as.numeric(format(eth_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(eth_ts_data$Date[nrow(eth_ts_data)], "%Y")), as.numeric(format(eth_ts_data$Date[nrow(eth_ts_data)], "%j"))),
frequency = 365)
close_ts_btc <- bitcoin_imputed_data[,c("Date", "Close")] %>% arrange(Date)
btc_ts_data = close_ts_btc
close_ts_btc <- ts(btc_ts_data$Close,
start = c(as.numeric(format(btc_ts_data$Date[1], "%Y")), as.numeric(format(btc_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(btc_ts_data$Date[nrow(btc_ts_data)], "%Y")), as.numeric(format(btc_ts_data$Date[nrow(btc_ts_data)], "%j"))),
frequency = 365)
close_ts_ada <- cardano_imputed_data[,c("Date", "Close")] %>% arrange(Date)
ada_ts_data = close_ts_ada
close_ts_ada <- ts(ada_ts_data$Close,
start = c(as.numeric(format(ada_ts_data$Date[1], "%Y")), as.numeric(format(ada_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(ada_ts_data$Date[nrow(ada_ts_data)], "%Y")), as.numeric(format(ada_ts_data$Date[nrow(ada_ts_data)], "%j"))),
frequency = 365)
close_ts_bin <- binance_imputed_data[,c("Date", "Close")] %>% arrange(Date)
bin_ts_data = close_ts_bin
close_ts_bin <- ts(bin_ts_data$Close,
start = c(as.numeric(format(bin_ts_data$Date[1], "%Y")), as.numeric(format(bin_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(bin_ts_data$Date[nrow(bin_ts_data)], "%Y")), as.numeric(format(bin_ts_data$Date[nrow(bin_ts_data)], "%j"))),
frequency = 365)
close_ts_doge <- dodge_imputed_data[,c("Date", "Close")] %>% arrange(Date)
doge_ts_data = close_ts_doge
close_ts_doge <- ts(doge_ts_data$Close,
start = c(as.numeric(format(doge_ts_data$Date[1], "%Y")), as.numeric(format(doge_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(doge_ts_data$Date[nrow(doge_ts_data)], "%Y")), as.numeric(format(doge_ts_data$Date[nrow(doge_ts_data)], "%j"))),
frequency = 365)
close_ts_xrp <- xrp_imputed_data[,c("Date", "Close")] %>% arrange(Date)
xrp_ts_data = close_ts_xrp
close_ts_xrp <- ts(xrp_ts_data$Close,
start = c(as.numeric(format(xrp_ts_data$Date[1], "%Y")), as.numeric(format(xrp_ts_data$Date[1], "%j"))),
end = c(as.numeric(format(xrp_ts_data$Date[nrow(xrp_ts_data)], "%Y")), as.numeric(format(xrp_ts_data$Date[nrow(xrp_ts_data)], "%j"))),
frequency = 365)
head(close_ts_btc, 5)
## Time Series:
## Start = c(2018, 192)
## End = c(2018, 196)
## Frequency = 365
## [1] 6332.090 6174.555 6450.000 6310.000 6415.000
dects_eth <- decompose(close_ts_eth)
plot(dects_eth)
dects_btc <- decompose(close_ts_btc)
plot(dects_btc)
dects_ada <- decompose(close_ts_ada)
plot(dects_ada)
dects_xrp <- decompose(close_ts_xrp)
plot(dects_xrp)
# dects_bin <- decompose(close_ts_bin)
# plot(dects_bin)
# dects_doge <- decompose(close_ts_doge)
# plot(dects_doge)
close_arima_eth <- auto.arima(close_ts_eth,D=1)
close_arima_btc <- auto.arima(close_ts_btc,D=1)
close_arima_bin <- auto.arima(close_ts_bin,D=1)
close_arima_ada <- auto.arima(close_ts_ada,D=1)
close_arima_doge <- auto.arima(close_ts_doge,D=1)
close_arima_xrp <- auto.arima(close_ts_xrp,D=1)
par(mfrow=c(3,2))
#### Forecasting model ####
plot(forecast(close_arima_eth),main="Ethereum Coin Forecasting")
plot(forecast(close_arima_btc),main="Bitcoin Forecasting")
plot(forecast(close_arima_ada),main="Cardano Forecasting")
plot(forecast(close_arima_xrp),main="XRP coin Forecasting")
plot(forecast(close_arima_bin),main="Binance coin Forecasting")
plot(forecast(close_arima_doge),main="Dogecoin Forecasting")