Cryptocurrency:

Cryptocurrency is a new form of asset which is used for paying or investing digitally. The upsurge in the various cryptocurrency prices over the past 10 years has increased the curiosity of researchers and investors to analyze and forecast its prices in the future. The primary focus of this research is to develop an Autoregressive Integrated Moving Average (ARIMA) model, which is a time-series statistical model, for forecasting the cryptocurrency prices. This research focuses on six different cryptocurrencies – Bitcoin, Doge Coin, Ethereum, Binance Coin, XRP and Cardano due to their popularity, and discusses the price movements and the stability of these cryptocurrencies using various exploratory data analysis techniques and visualization techniques. The dataset for these six coins has been collected from Kaggle datasets and has been merged into a single dataset. In this research, the dataset has a record of the prices of these six cryptocurrencies dated from 2018 to 2021.

About dataset:

Crypto-Currency dataset by Kaggle consists of 244855 observations of six popular cryptocurrency details with 6 independent and 1 dependent variables. Please find the sample dataset below,

eth_raw_data = read.csv('ethusdt.csv')
head(eth_raw_data, 10)
##                   Date   Open   High    Low  Close Volume
## 1  2018-07-22 12:00:00 460.81 460.81 460.81 460.81  0.050
## 2  2018-07-22 12:30:00 460.81 461.03 460.77 460.77  0.610
## 3  2018-07-22 13:00:00 460.77 460.77 460.77 460.77  0.000
## 4  2018-07-22 13:30:00 460.77 464.12 460.77 464.12  0.223
## 5  2018-07-22 14:00:00 464.12 466.55 463.20 466.55  1.437
## 6  2018-07-22 14:30:00 466.70 466.73 465.23 465.23  0.709
## 7  2018-07-22 15:00:00 465.23 467.33 465.23 467.18  0.281
## 8  2018-07-22 15:30:00 467.18 467.18 467.18 467.18  0.000
## 9  2018-07-22 16:00:00 467.18 467.53 467.18 467.53  0.015
## 10 2018-07-22 16:30:00 467.53 467.53 467.53 467.53  0.000

Data Pre-processing:

We have converted the dataset into dataset with daily data i.e. each data will have only one record instead of multiple records. We have taken opening of first record and closing of last record for any particular day.

aggregating_data <- function(raw_data, cryptoname){
  idx =1
  # creating new data frame to copy the cleaned dataset
  new_data_frame = data.frame(Date = as.Date(character()),Open = double(),High = double(),Low=double(),Close = double(),Volume = double(),Cryptocurrency = character())
  
  volume = 0;
  low = 0
  high = 0
  for(row in 1:nrow(raw_data)){
    date_time = raw_data[row,"Date"]
    date = strsplit(date_time," ")[[1]][1]
    time = strsplit(date_time," ")[[1]][2]
    if(grepl("00:00:00",time,fixed=TRUE)){
      new_data_frame[idx,"Date"] = date
      new_data_frame[idx,"Open"] = raw_data[row,"Open"]
      low = raw_data[row,"Low"]
      high = raw_data[row,"High"]
      volume = volume + raw_data[row,"Volume"]
    }
    else if(grepl("23:30:00",time,fixed=TRUE)){
      new_data_frame[idx,"Close"] = raw_data[row,"Close"]
      volume = volume + raw_data[row,"Volume"]
      if(low>raw_data[row,"Low"]){
        low = raw_data[row,"Low"]
      }
      if(high < raw_data[row,"High"]){
        high = raw_data[row,"High"]
      }
      new_data_frame[idx,"Volume"] = volume
      new_data_frame[idx,"Low"] = low
      new_data_frame[idx,"High"] = high
      new_data_frame[idx,"Cryptocurrency"] = cryptoname
      volume = 0;
      idx = idx+1;
    }
    else{
      if(low>raw_data[row,"Low"]){
        low = raw_data[row,"Low"]
      }
      if(high < raw_data[row,"High"]){
        high = raw_data[row,"High"]
      }
      volume = volume + raw_data[row,"Volume"]
    }
  }
  
  imputed_data <- new_data_frame
  imputed_data <- na.omit(imputed_data)
  imputed_data$Date = as.Date(imputed_data$Date,format='%Y-%m-%d')
  return(imputed_data)
}

eth_imputed_data <- aggregating_data(eth_raw_data, "Ethereum")
head(eth_imputed_data, 10)
##          Date    Open   High    Low   Close Volume Cryptocurrency
## 2  2018-07-23 460.220 469.89 452.59 455.000  5.634       Ethereum
## 3  2018-07-24 455.000 481.00 440.01 481.000  7.235       Ethereum
## 4  2018-07-25 481.000 485.76 462.41 482.849  3.932       Ethereum
## 5  2018-07-26 482.849 485.00 463.49 463.730 12.349       Ethereum
## 6  2018-07-27 463.730 471.39 456.80 471.390  5.280       Ethereum
## 7  2018-07-28 471.390 482.42 466.02 469.030  5.446       Ethereum
## 8  2018-07-29 469.030 490.00 464.00 490.000  2.855       Ethereum
## 9  2018-07-30 490.000 490.00 443.68 443.680  8.787       Ethereum
## 10 2018-07-31 443.680 464.01 432.50 432.500  3.876       Ethereum
## 11 2018-08-01 432.500 459.99 415.65 422.980 36.051       Ethereum

We can see that for every record above indicates aggregated data of ethereum on any particular day.

# binance data processing
binance_raw_data <- read.csv('bnbusdt.csv')
binance_imputed_data <- aggregating_data(binance_raw_data, "Binance")

# bitcoin data processsing
bitcoin_raw_data <- read.csv('btcusdt.csv')
bitcoin_imputed_data <- aggregating_data(bitcoin_raw_data, "Bitcoin")

# doge coin data processing
dodge_raw_data <- read.csv('dogeusdt.csv')
dodge_imputed_data <- aggregating_data(dodge_raw_data, "Doge")

# xrp data processing
xrp_raw_data <- read.csv('xrpusdt.csv')
xrp_imputed_data <- aggregating_data(xrp_raw_data, "XRP")

# cardano data procesing
cardano_raw_data <- read.csv('adausdt.csv')
cardano_imputed_data <- aggregating_data(cardano_raw_data, "Cardano")

Merging all the data

All the six crypto-currencies datasets are merged to one dataset where a column indicating cryptocurrency name is created for clear understanding of the trends.

##     X       Date    Open   High    Low   Close Volume Cryptocurrency
## 1   2 2018-07-23 460.220 469.89 452.59 455.000  5.634       Ethereum
## 2   3 2018-07-24 455.000 481.00 440.01 481.000  7.235       Ethereum
## 3   4 2018-07-25 481.000 485.76 462.41 482.849  3.932       Ethereum
## 4   5 2018-07-26 482.849 485.00 463.49 463.730 12.349       Ethereum
## 5   6 2018-07-27 463.730 471.39 456.80 471.390  5.280       Ethereum
## 6   7 2018-07-28 471.390 482.42 466.02 469.030  5.446       Ethereum
## 7   8 2018-07-29 469.030 490.00 464.00 490.000  2.855       Ethereum
## 8   9 2018-07-30 490.000 490.00 443.68 443.680  8.787       Ethereum
## 9  10 2018-07-31 443.680 464.01 432.50 432.500  3.876       Ethereum
## 10 11 2018-08-01 432.500 459.99 415.65 422.980 36.051       Ethereum
## # A tibble: 6 × 4
##   Cryptocurrency   min_Open max_Close max_Volume
##   <chr>               <dbl>     <dbl>      <dbl>
## 1 Binance           9.27      680         15454.
## 2 Bitcoin        3205       63214           244.
## 3 Cardano           0.0237      2.28    3453160.
## 4 Doge              0.00249     0.671 219205228 
## 5 Ethereum         84.0      4123.         4224.
## 6 XRP               0.140       1.82    8672204

Visualization:

Observing trend in Closing price of various cryptocurrencies

Pie chart for highly traded crypto

pi = as.data.frame(data %>% 
                     group_by(Cryptocurrency) %>% 
                     summarise(Frequency = sum(Volume*Close)))
pct <- round(pi$Frequency/sum(pi$Frequency)*100)
lbls = pi$Cryptocurrency
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie3D(pi$Frequency,labels=lbls, col=c("skyblue","skyblue3","paleturquoise3","paleturquoise4","grey","white"),main = "Volume of Cryptocurrencies traded")

#### Converting data into timeseries data

####### Creating Time-series object for each cryptocurrencies #######

close_ts_eth <- eth_imputed_data[,c("Date", "Close")] %>% arrange(Date)
eth_ts_data = close_ts_eth
close_ts_eth <- ts(eth_ts_data$Close,
                   start = c(as.numeric(format(eth_ts_data$Date[1], "%Y")), as.numeric(format(eth_ts_data$Date[1], "%j"))),
                   end = c(as.numeric(format(eth_ts_data$Date[nrow(eth_ts_data)], "%Y")), as.numeric(format(eth_ts_data$Date[nrow(eth_ts_data)], "%j"))),
                   frequency = 365)


close_ts_btc <- bitcoin_imputed_data[,c("Date", "Close")] %>% arrange(Date)
btc_ts_data = close_ts_btc
close_ts_btc <- ts(btc_ts_data$Close,
                   start = c(as.numeric(format(btc_ts_data$Date[1], "%Y")), as.numeric(format(btc_ts_data$Date[1], "%j"))),
                   end = c(as.numeric(format(btc_ts_data$Date[nrow(btc_ts_data)], "%Y")), as.numeric(format(btc_ts_data$Date[nrow(btc_ts_data)], "%j"))),
                   frequency = 365)


close_ts_ada <- cardano_imputed_data[,c("Date", "Close")] %>% arrange(Date)
ada_ts_data = close_ts_ada
close_ts_ada <- ts(ada_ts_data$Close,
                   start = c(as.numeric(format(ada_ts_data$Date[1], "%Y")), as.numeric(format(ada_ts_data$Date[1], "%j"))),
                   end = c(as.numeric(format(ada_ts_data$Date[nrow(ada_ts_data)], "%Y")), as.numeric(format(ada_ts_data$Date[nrow(ada_ts_data)], "%j"))),
                   frequency = 365)


close_ts_bin <- binance_imputed_data[,c("Date", "Close")] %>% arrange(Date)
bin_ts_data = close_ts_bin
close_ts_bin <- ts(bin_ts_data$Close,
                   start = c(as.numeric(format(bin_ts_data$Date[1], "%Y")), as.numeric(format(bin_ts_data$Date[1], "%j"))),
                   end = c(as.numeric(format(bin_ts_data$Date[nrow(bin_ts_data)], "%Y")), as.numeric(format(bin_ts_data$Date[nrow(bin_ts_data)], "%j"))),
                   frequency = 365)

close_ts_doge <- dodge_imputed_data[,c("Date", "Close")] %>% arrange(Date)
doge_ts_data = close_ts_doge
close_ts_doge <- ts(doge_ts_data$Close,
                    start = c(as.numeric(format(doge_ts_data$Date[1], "%Y")), as.numeric(format(doge_ts_data$Date[1], "%j"))),
                    end = c(as.numeric(format(doge_ts_data$Date[nrow(doge_ts_data)], "%Y")), as.numeric(format(doge_ts_data$Date[nrow(doge_ts_data)], "%j"))),
                    frequency = 365)

close_ts_xrp <- xrp_imputed_data[,c("Date", "Close")] %>% arrange(Date)
xrp_ts_data = close_ts_xrp
close_ts_xrp <- ts(xrp_ts_data$Close,
                   start = c(as.numeric(format(xrp_ts_data$Date[1], "%Y")), as.numeric(format(xrp_ts_data$Date[1], "%j"))),
                   end = c(as.numeric(format(xrp_ts_data$Date[nrow(xrp_ts_data)], "%Y")), as.numeric(format(xrp_ts_data$Date[nrow(xrp_ts_data)], "%j"))),
                   frequency = 365)

head(close_ts_btc, 5)
## Time Series:
## Start = c(2018, 192) 
## End = c(2018, 196) 
## Frequency = 365 
## [1] 6332.090 6174.555 6450.000 6310.000 6415.000

model building for each crypto

close_arima_eth <- auto.arima(close_ts_eth,D=1)

close_arima_btc <- auto.arima(close_ts_btc,D=1)

close_arima_bin <- auto.arima(close_ts_bin,D=1)

close_arima_ada <- auto.arima(close_ts_ada,D=1)

close_arima_doge <- auto.arima(close_ts_doge,D=1)

close_arima_xrp <- auto.arima(close_ts_xrp,D=1)

Forecasting model

par(mfrow=c(3,2))

#### Forecasting model ####

plot(forecast(close_arima_eth),main="Ethereum Coin Forecasting")
plot(forecast(close_arima_btc),main="Bitcoin Forecasting")
plot(forecast(close_arima_ada),main="Cardano Forecasting")
plot(forecast(close_arima_xrp),main="XRP coin Forecasting")
plot(forecast(close_arima_bin),main="Binance coin Forecasting")
plot(forecast(close_arima_doge),main="Dogecoin Forecasting")