First Steps

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(readr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v stringr 1.4.0
## v tidyr   1.2.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Starting the Problem- A six-day rolling average on a fixed file

First Things first let’s work on a singular goal of getting a six-day rolling average.

For this we will use a function named rollmean, which handles null values as well as allows for the choice in how the window is calculated. We use right so that it’s the prior days.

file <- "coin_Dogecoin.csv"
df <- read_delim("coin_Dogecoin.csv", delim=",", col_names=TRUE, show_col_types = FALSE)
df$Six_Day_RollingAvg <- rollmean(df$Close, 6, fill = 0, na.pad = FALSE, 
  align = c("right"),)
head(df,10)
## # A tibble: 10 x 11
##      SNo Name  Symbol Date                   High     Low    Open   Close Volume
##    <dbl> <chr> <chr>  <dttm>                <dbl>   <dbl>   <dbl>   <dbl>  <dbl>
##  1     1 Doge~ DOGE   2013-12-16 23:59:59 8.66e-4 1.50e-4 2.99e-4 2.05e-4      0
##  2     2 Doge~ DOGE   2013-12-17 23:59:59 2.89e-4 1.16e-4 2.07e-4 2.69e-4      0
##  3     3 Doge~ DOGE   2013-12-18 23:59:59 3.62e-4 2.05e-4 2.67e-4 3.62e-4      0
##  4     4 Doge~ DOGE   2013-12-19 23:59:59 1.52e-3 3.28e-4 3.95e-4 1.16e-3      0
##  5     5 Doge~ DOGE   2013-12-20 23:59:59 1.14e-3 6.62e-4 1.14e-3 7.04e-4      0
##  6     6 Doge~ DOGE   2013-12-21 23:59:59 7.00e-4 3.54e-4 7.00e-4 3.94e-4      0
##  7     7 Doge~ DOGE   2013-12-22 23:59:59 4.98e-4 2.80e-4 3.91e-4 3.15e-4      0
##  8     8 Doge~ DOGE   2013-12-23 23:59:59 4.60e-4 2.94e-4 3.13e-4 4.51e-4      0
##  9     9 Doge~ DOGE   2013-12-24 23:59:59 9.13e-4 4.50e-4 4.50e-4 6.86e-4      0
## 10    10 Doge~ DOGE   2013-12-25 23:59:59 6.94e-4 5.58e-4 6.86e-4 5.87e-4      0
## # ... with 2 more variables: Marketcap <dbl>, Six_Day_RollingAvg <dbl>

Completing the Problem- A year to date average on a fixed file

After we have a six day rolling average, let’s address the ytd mean.

file <- "coin_Dogecoin.csv"
df <- read_delim(file, delim=",", col_names=TRUE, show_col_types = FALSE)
print(df$Close[1:10])
##  [1] 0.000204731 0.000268540 0.000361631 0.001162490 0.000703866 0.000393679
##  [7] 0.000314762 0.000451185 0.000685547 0.000586701
for (row_number in 1:NROW(df)){
   df$RollingAvg[row_number] = mean(df$Close[1:row_number])
  }
## Warning: Unknown or uninitialised column: `RollingAvg`.
df
## # A tibble: 2,760 x 11
##      SNo Name  Symbol Date                   High     Low    Open   Close Volume
##    <dbl> <chr> <chr>  <dttm>                <dbl>   <dbl>   <dbl>   <dbl>  <dbl>
##  1     1 Doge~ DOGE   2013-12-16 23:59:59 8.66e-4 1.50e-4 2.99e-4 2.05e-4      0
##  2     2 Doge~ DOGE   2013-12-17 23:59:59 2.89e-4 1.16e-4 2.07e-4 2.69e-4      0
##  3     3 Doge~ DOGE   2013-12-18 23:59:59 3.62e-4 2.05e-4 2.67e-4 3.62e-4      0
##  4     4 Doge~ DOGE   2013-12-19 23:59:59 1.52e-3 3.28e-4 3.95e-4 1.16e-3      0
##  5     5 Doge~ DOGE   2013-12-20 23:59:59 1.14e-3 6.62e-4 1.14e-3 7.04e-4      0
##  6     6 Doge~ DOGE   2013-12-21 23:59:59 7.00e-4 3.54e-4 7.00e-4 3.94e-4      0
##  7     7 Doge~ DOGE   2013-12-22 23:59:59 4.98e-4 2.80e-4 3.91e-4 3.15e-4      0
##  8     8 Doge~ DOGE   2013-12-23 23:59:59 4.60e-4 2.94e-4 3.13e-4 4.51e-4      0
##  9     9 Doge~ DOGE   2013-12-24 23:59:59 9.13e-4 4.50e-4 4.50e-4 6.86e-4      0
## 10    10 Doge~ DOGE   2013-12-25 23:59:59 6.94e-4 5.58e-4 6.86e-4 5.87e-4      0
## # ... with 2,750 more rows, and 2 more variables: Marketcap <dbl>,
## #   RollingAvg <dbl>

Expanding the problem-Getting a list of available coins

From there, let’s have some fun and get a list of all the available coins in the dataset from Kaggle. The goal of this is being able to easily select a coin from the list, and proceed to run it.

file_list <- list.files(path="coin_list")
coin_list <- ""
print(file_list)
##  [1] "coin_Aave.csv"           "coin_BinanceCoin.csv"   
##  [3] "coin_Bitcoin.csv"        "coin_Cardano.csv"       
##  [5] "coin_ChainLink.csv"      "coin_Cosmos.csv"        
##  [7] "coin_CryptocomCoin.csv"  "coin_Dogecoin.csv"      
##  [9] "coin_EOS.csv"            "coin_Ethereum.csv"      
## [11] "coin_Iota.csv"           "coin_Litecoin.csv"      
## [13] "coin_Monero.csv"         "coin_NEM.csv"           
## [15] "coin_Polkadot.csv"       "coin_Solana.csv"        
## [17] "coin_Stellar.csv"        "coin_Tether.csv"        
## [19] "coin_Tron.csv"           "coin_Uniswap.csv"       
## [21] "coin_USDCoin.csv"        "coin_WrappedBitcoin.csv"
## [23] "coin_XRP.csv"
for (i in 1:length(file_list)){
  coin_list <- c(coin_list, str_extract_all(file_list[i], "(?<=coin_)(.*)(?=.csv)"))
}
Coin_df <- data.frame(coin_list = unlist(coin_list))

Coin Selection

From this list, feel free to pick any two coins

Coin1 <- "Bitcoin" 
Coin2 <- "Ethereum"

if (!any(Coin_df$coin_list==Coin1)){
  print("Please select a different coin from the list above for Coin 1")
}
if (!any(Coin_df$coin_list==Coin2)){
  print("Please select a different coin from the list above for Coin 2")
}
if (any(Coin_df$coin_list==Coin1) && any(Coin_df$coin_list==Coin2))
{
  print("Please continue to the next steps")
}
## [1] "Please continue to the next steps"

Function Creation

Now we’re going to take the code for a 6 day moving functions and year to date averages and make each into a function. I like seperating the functions so that can be easily reused in the future.

Six_Day_RollingAvg <- function(file_name){
  six_day_ravg <- read_delim(file_name, delim=",", col_names=TRUE, show_col_types = FALSE)
  six_day_ravg$Six_Day_RollingAvg <- rollmean(six_day_ravg$Close, 6, fill = 0, na.pad = FALSE, 
  align = c("right"),)
  return(six_day_ravg[c("Name","Date","Six_Day_RollingAvg")])
}
ytd_avg <- function(file_name){
ytd_avg_df <- read_delim(file_name, delim=",", col_names=TRUE, show_col_types = FALSE)
ytd_avg_df$RollingAvg <- "NA"
for (row_number in 1:NROW(ytd_avg_df)){
    ytd_avg_df$RollingAvg[row_number] = mean(ytd_avg_df$Close[1:row_number])
}
return(ytd_avg_df[c("Name","Date","RollingAvg")])
}
Make_File_from_Coin <- function(coin_name){
  return (paste0("coin_list/","coin_", coin_name ,".csv"))
  }

Now let’s test these created functions. First things first, let’s test Make_File_from_Coin to ensure that we are generating the right relative path. Next, let’s create the 4 dataframes for Coin1 and Coin2, merging them together by Name and Date as we wish to create 2 dataframes, one for each Coin1 and Coin2, with the coin name, date, YTD average and rolling average.

Finally, let’s combine the two dataframes for each coin, merging on the date, so we can easily play with the data in the future.

print(Make_File_from_Coin(Coin1))
## [1] "coin_list/coin_Bitcoin.csv"
df_coin_1 <- merge(Six_Day_RollingAvg(Make_File_from_Coin(Coin1)), ytd_avg(Make_File_from_Coin(Coin1)), by=c("Name","Date"))
df_coin_2 <- merge(Six_Day_RollingAvg(Make_File_from_Coin(Coin2)), ytd_avg(Make_File_from_Coin(Coin2)), by=c("Name","Date"))
output <- (merge(df_coin_1, df_coin_2, by=c("Date")))
knitr::kable(head(output), "simple")
Date Name.x Six_Day_RollingAvg.x RollingAvg.x Name.y Six_Day_RollingAvg.y RollingAvg.y
2015-08-08 23:59:59 Bitcoin 277.9143 374.064804113828 Ethereum 0.000000 0.75332498550415
2015-08-09 23:59:59 Bitcoin 275.2237 373.933973626072 Ethereum 0.000000 0.727611005306244
2015-08-10 23:59:59 Bitcoin 271.7657 373.802721860597 Ethereum 0.000000 0.721223334471385
2015-08-11 23:59:59 Bitcoin 269.8497 373.678869482714 Ethereum 0.000000 0.807882502675056
2015-08-12 23:59:59 Bitcoin 267.8162 373.550516776491 Ethereum 0.000000 0.88979400396347
2015-08-13 23:59:59 Bitcoin 265.2320 373.419727612567 Ethereum 1.046107 1.04610666632652

Modular Chart Building

Using the paste0 function and the coin names from earlier, and we are going to test modularly making charts here as well.

The End

Finally, we have the output of the dataset, a view of the 6-day average & YTD value of the two coins per day (in this case, BTC and ETH).Its really unique to see that a lot of the local maximums occur at very similar dates.

References

Kaggle’s Historical Crypto Set-https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory/download