First Steps

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(readr)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v stringr 1.4.0
## v tidyr   1.2.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Starting the Problem- A six-day rolling average on a fixed file

First Things first let’s work on a singular goal of getting a six-day rolling average.

For this we will use a function named rollmean, which handles null values as well as allows for the choice in how the window is calculated. We use right so that it’s the prior days.

file <- "coin_Dogecoin.csv"
df <- read_delim("coin_Dogecoin.csv", delim=",", col_names=TRUE, show_col_types = FALSE)
df$Six_Day_RollingAvg <- rollmean(df$Close, 6, fill = 0, na.pad = FALSE, 
  align = c("right"),)
head(df,10)

## # A tibble: 10 x 11
##      SNo Name  Symbol Date                   High     Low    Open   Close Volume
##    <dbl> <chr> <chr>  <dttm>                <dbl>   <dbl>   <dbl>   <dbl>  <dbl>
##  1     1 Doge~ DOGE   2013-12-16 23:59:59 8.66e-4 1.50e-4 2.99e-4 2.05e-4      0
##  2     2 Doge~ DOGE   2013-12-17 23:59:59 2.89e-4 1.16e-4 2.07e-4 2.69e-4      0
##  3     3 Doge~ DOGE   2013-12-18 23:59:59 3.62e-4 2.05e-4 2.67e-4 3.62e-4      0
##  4     4 Doge~ DOGE   2013-12-19 23:59:59 1.52e-3 3.28e-4 3.95e-4 1.16e-3      0
##  5     5 Doge~ DOGE   2013-12-20 23:59:59 1.14e-3 6.62e-4 1.14e-3 7.04e-4      0
##  6     6 Doge~ DOGE   2013-12-21 23:59:59 7.00e-4 3.54e-4 7.00e-4 3.94e-4      0
##  7     7 Doge~ DOGE   2013-12-22 23:59:59 4.98e-4 2.80e-4 3.91e-4 3.15e-4      0
##  8     8 Doge~ DOGE   2013-12-23 23:59:59 4.60e-4 2.94e-4 3.13e-4 4.51e-4      0
##  9     9 Doge~ DOGE   2013-12-24 23:59:59 9.13e-4 4.50e-4 4.50e-4 6.86e-4      0
## 10    10 Doge~ DOGE   2013-12-25 23:59:59 6.94e-4 5.58e-4 6.86e-4 5.87e-4      0
## # ... with 2 more variables: Marketcap <dbl>, Six_Day_RollingAvg <dbl>

Completing the Problem- A year to date average on a fixed file

After we have a six day rolling average, let’s address the ytd mean.

file <- "coin_Dogecoin.csv"
df <- read_delim(file, delim=",", col_names=TRUE, show_col_types = FALSE)
print(df$Close[1:10])

##  [1] 0.000204731 0.000268540 0.000361631 0.001162490 0.000703866 0.000393679
##  [7] 0.000314762 0.000451185 0.000685547 0.000586701

for (row_number in 1:NROW(df)){
   df$RollingAvg[row_number] = mean(df$Close[1:row_number])
  }

## Warning: Unknown or uninitialised column: `RollingAvg`.

df

## # A tibble: 2,760 x 11
##      SNo Name  Symbol Date                   High     Low    Open   Close Volume
##    <dbl> <chr> <chr>  <dttm>                <dbl>   <dbl>   <dbl>   <dbl>  <dbl>
##  1     1 Doge~ DOGE   2013-12-16 23:59:59 8.66e-4 1.50e-4 2.99e-4 2.05e-4      0
##  2     2 Doge~ DOGE   2013-12-17 23:59:59 2.89e-4 1.16e-4 2.07e-4 2.69e-4      0
##  3     3 Doge~ DOGE   2013-12-18 23:59:59 3.62e-4 2.05e-4 2.67e-4 3.62e-4      0
##  4     4 Doge~ DOGE   2013-12-19 23:59:59 1.52e-3 3.28e-4 3.95e-4 1.16e-3      0
##  5     5 Doge~ DOGE   2013-12-20 23:59:59 1.14e-3 6.62e-4 1.14e-3 7.04e-4      0
##  6     6 Doge~ DOGE   2013-12-21 23:59:59 7.00e-4 3.54e-4 7.00e-4 3.94e-4      0
##  7     7 Doge~ DOGE   2013-12-22 23:59:59 4.98e-4 2.80e-4 3.91e-4 3.15e-4      0
##  8     8 Doge~ DOGE   2013-12-23 23:59:59 4.60e-4 2.94e-4 3.13e-4 4.51e-4      0
##  9     9 Doge~ DOGE   2013-12-24 23:59:59 9.13e-4 4.50e-4 4.50e-4 6.86e-4      0
## 10    10 Doge~ DOGE   2013-12-25 23:59:59 6.94e-4 5.58e-4 6.86e-4 5.87e-4      0
## # ... with 2,750 more rows, and 2 more variables: Marketcap <dbl>,
## #   RollingAvg <dbl>

Expanding the problem-Getting a list of available coins

From there, let’s have some fun and get a list of all the available coins in the dataset from Kaggle. The goal of this is being able to easily select a coin from the list, and proceed to run it.

file_list <- list.files(path="coin_list")
coin_list <- ""
print(file_list)

##  [1] "coin_Aave.csv"           "coin_BinanceCoin.csv"   
##  [3] "coin_Bitcoin.csv"        "coin_Cardano.csv"       
##  [5] "coin_ChainLink.csv"      "coin_Cosmos.csv"        
##  [7] "coin_CryptocomCoin.csv"  "coin_Dogecoin.csv"      
##  [9] "coin_EOS.csv"            "coin_Ethereum.csv"      
## [11] "coin_Iota.csv"           "coin_Litecoin.csv"      
## [13] "coin_Monero.csv"         "coin_NEM.csv"           
## [15] "coin_Polkadot.csv"       "coin_Solana.csv"        
## [17] "coin_Stellar.csv"        "coin_Tether.csv"        
## [19] "coin_Tron.csv"           "coin_Uniswap.csv"       
## [21] "coin_USDCoin.csv"        "coin_WrappedBitcoin.csv"
## [23] "coin_XRP.csv"

for (i in 1:length(file_list)){
  coin_list <- c(coin_list, str_extract_all(file_list[i], "(?<=coin_)(.*)(?=.csv)"))
}
Coin_df <- data.frame(coin_list = unlist(coin_list))

Coin Selection

From this list, feel free to pick any two coins

Coin1 <- "Bitcoin" 
Coin2 <- "Ethereum"

if (!any(Coin_df$coin_list==Coin1)){
  print("Please select a different coin from the list above for Coin 1")
}
if (!any(Coin_df$coin_list==Coin2)){
  print("Please select a different coin from the list above for Coin 2")
}
if (any(Coin_df$coin_list==Coin1) && any(Coin_df$coin_list==Coin2))
{
  print("Please continue to the next steps")
}

## [1] "Please continue to the next steps"

Function Creation

Now we’re going to take the code for a 6 day moving functions and year to date averages and make each into a function. I like seperating the functions so that can be easily reused in the future.

Six_Day_RollingAvg <- function(file_name){
  six_day_ravg <- read_delim(file_name, delim=",", col_names=TRUE, show_col_types = FALSE)
  six_day_ravg$Six_Day_RollingAvg <- rollmean(six_day_ravg$Close, 6, fill = 0, na.pad = FALSE, 
  align = c("right"),)
  return(six_day_ravg[c("Name","Date","Six_Day_RollingAvg")])
}
ytd_avg <- function(file_name){
ytd_avg_df <- read_delim(file_name, delim=",", col_names=TRUE, show_col_types = FALSE)
ytd_avg_df$RollingAvg <- "NA"
for (row_number in 1:NROW(ytd_avg_df)){
    ytd_avg_df$RollingAvg[row_number] = mean(ytd_avg_df$Close[1:row_number])
}
return(ytd_avg_df[c("Name","Date","RollingAvg")])
}
Make_File_from_Coin <- function(coin_name){
  return (paste0("coin_list/","coin_", coin_name ,".csv"))
  }

Now let’s test these created functions. First things first, let’s test Make_File_from_Coin to ensure that we are generating the right relative path. Next, let’s create the 4 dataframes for Coin1 and Coin2, merging them together by Name and Date as we wish to create 2 dataframes, one for each Coin1 and Coin2, with the coin name, date, YTD average and rolling average.

Finally, let’s combine the two dataframes for each coin, merging on the date, so we can easily play with the data in the future.

print(Make_File_from_Coin(Coin1))

## [1] "coin_list/coin_Bitcoin.csv"

df_coin_1 <- merge(Six_Day_RollingAvg(Make_File_from_Coin(Coin1)), ytd_avg(Make_File_from_Coin(Coin1)), by=c("Name","Date"))
df_coin_2 <- merge(Six_Day_RollingAvg(Make_File_from_Coin(Coin2)), ytd_avg(Make_File_from_Coin(Coin2)), by=c("Name","Date"))
output <- (merge(df_coin_1, df_coin_2, by=c("Date")))
knitr::kable(head(output), "simple")

Date	Name.x	Six_Day_RollingAvg.x	RollingAvg.x	Name.y	Six_Day_RollingAvg.y	RollingAvg.y
2015-08-08 23:59:59	Bitcoin	277.9143	374.064804113828	Ethereum	0.000000	0.75332498550415
2015-08-09 23:59:59	Bitcoin	275.2237	373.933973626072	Ethereum	0.000000	0.727611005306244
2015-08-10 23:59:59	Bitcoin	271.7657	373.802721860597	Ethereum	0.000000	0.721223334471385
2015-08-11 23:59:59	Bitcoin	269.8497	373.678869482714	Ethereum	0.000000	0.807882502675056
2015-08-12 23:59:59	Bitcoin	267.8162	373.550516776491	Ethereum	0.000000	0.88979400396347
2015-08-13 23:59:59	Bitcoin	265.2320	373.419727612567	Ethereum	1.046107	1.04610666632652

Modular Chart Building

Using the paste0 function and the coin names from earlier, and we are going to test modularly making charts here as well.

The End

Finally, we have the output of the dataset, a view of the 6-day average & YTD value of the two coins per day (in this case, BTC and ETH).Its really unique to see that a lot of the local maximums occur at very similar dates.

References

Kaggle’s Historical Crypto Set-https://www.kaggle.com/sudalairajkumar/cryptocurrencypricehistory/download

Rolling Window Assignment

Alex Moyse