Motivación

Ahora viene la duda: ¿debería mantener los btc o liquidarlos?

Paquetes

library(tidyverse)
library(visdat)
library(xray) # devtools::install_github("sicarul/xray") 
df <- read_csv("https://www.quandl.com/api/v3/datasets/BCHARTS/BITSTAMPUSD.csv?api_key=zRCb1mN9ExN4CZFKsuRF")
Parsed with column specification:
cols(
  Date = col_date(format = ""),
  Open = col_double(),
  High = col_double(),
  Low = col_double(),
  Close = col_double(),
  `Volume (BTC)` = col_double(),
  `Volume (Currency)` = col_double(),
  `Weighted Price` = col_double()
)

Veo los datos

head(df)

Visualizo en busca de NAs

vis_dat(df)

o valores perdidos

vis_miss(df)

Datos OK, sin embargo no son rectangulares, y hay 2254 prices para 2275 fechas.

length(unique(df$Date))
[1] 2275
length(unique(df$`Weighted Price`))
[1] 2254

Voy a eliminar todas las columnas sin weighted price

df <- df[ which(df$`Weighted Price` > 0),] 

Ahora grafico

df %>% 
  ggplot(aes(x = Date, y = `Weighted Price`)) +
  geom_line()

So far, so good

Repaso, ahora con xray para ver como están los datos

anomalies(df)
$variables

$problem_variables
NA
distributions(df)
================================================================================================================
Ignoring variable Date: Unsupported type for visualization.
================
[1] "Ignoring variable Date: Unsupported type for visualization."

Detección de anomalias

library(AnomalyDetection)

Mantengo solo las dos variables de interés: fecha y precio promedio

df2 <- df %>% 
  select(Date, `Weighted Price`)
head(df2)
df2 %>% 
  ggplot(aes(x = Date, y = `Weighted Price`)) +
  geom_line()

Borro el df original

rm(df)
AnomalyDetectionTs(df2, max_anoms=0.01, direction="pos", plot=TRUE, e_value = T)
Error in data.frame(timestamp = all_anoms[[1]], anoms = all_anoms[[2]],  : 
  arguments imply differing number of rows: 22, 0

Malanga, ¿ de donde salen esos valores extras?

Voy a seleccionar solo aquellas fechas desde el 2015 a la fecha

identical(length(unique(df2$Date)), length(unique(df2$`Weighted Price`)))
[1] TRUE

Ahora si… :P

Detección de anomalias

Hay un paquete en R para detectar anomalías en series temporales, el detalle está en https://cran.r-project.org/web/packages/anomalyDetection/index.html

AnomalyDetectionVec(df2$`Weighted Price`, 
                    max_anoms = 0.01, 
                    period = 365,
                    direction = 'both',
                    only_last = FALSE,
                    plot = TRUE)
$anoms

$plot

LS0tCnRpdGxlOiAiQml0Y29pbnMiCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazogCiAgICB0b2M6IHllcwotLS0KCiMgTW90aXZhY2nDs24KCkFob3JhIHZpZW5lIGxhIGR1ZGE6IMK/ZGViZXLDrWEgbWFudGVuZXIgbG9zIGJ0YyBvIGxpcXVpZGFybG9zPwoKIyBQYXF1ZXRlcwpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodmlzZGF0KQpsaWJyYXJ5KHhyYXkpICMgZGV2dG9vbHM6Omluc3RhbGxfZ2l0aHViKCJzaWNhcnVsL3hyYXkiKSAKYGBgCmBgYHtyfQpkZiA8LSByZWFkX2NzdigiaHR0cHM6Ly93d3cucXVhbmRsLmNvbS9hcGkvdjMvZGF0YXNldHMvQkNIQVJUUy9CSVRTVEFNUFVTRC5jc3Y/YXBpX2tleT16UkNiMW1OOUV4TjRDWkZLc3VSRiIpCmBgYApWZW8gbG9zIGRhdG9zCmBgYHtyfQpoZWFkKGRmKQpgYGAKClZpc3VhbGl6byBlbiBidXNjYSBkZSBOQXMKYGBge3J9CnZpc19kYXQoZGYpCmBgYApvIHZhbG9yZXMgcGVyZGlkb3MKYGBge3J9CnZpc19taXNzKGRmKQpgYGAKRGF0b3MgT0ssIHNpbiBlbWJhcmdvIG5vIHNvbiByZWN0YW5ndWxhcmVzLCB5IGhheSAyMjU0IHByaWNlcyBwYXJhIDIyNzUgZmVjaGFzLiAKCmBgYHtyfQpsZW5ndGgodW5pcXVlKGRmJERhdGUpKQpgYGAKYGBge3J9Cmxlbmd0aCh1bmlxdWUoZGYkYFdlaWdodGVkIFByaWNlYCkpCmBgYAoKVm95IGEgZWxpbWluYXIgdG9kYXMgbGFzIGNvbHVtbmFzIHNpbiB3ZWlnaHRlZCBwcmljZQoKYGBge3J9CmRmIDwtIGRmWyB3aGljaChkZiRgV2VpZ2h0ZWQgUHJpY2VgID4gMCksXSAKYGBgCgpBaG9yYSBncmFmaWNvCmBgYHtyfQpkZiAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gRGF0ZSwgeSA9IGBXZWlnaHRlZCBQcmljZWApKSArCiAgZ2VvbV9saW5lKCkKYGBgClNvIGZhciwgc28gZ29vZAoKUmVwYXNvLCBhaG9yYSBjb24geHJheSBwYXJhIHZlciBjb21vIGVzdMOhbiBsb3MgZGF0b3MKCmBgYHtyfQphbm9tYWxpZXMoZGYpCmBgYAoKYGBge3J9CmRpc3RyaWJ1dGlvbnMoZGYpCmBgYAoKCiMgRGV0ZWNjacOzbiBkZSBhbm9tYWxpYXMKCmBgYHtyfQpsaWJyYXJ5KEFub21hbHlEZXRlY3Rpb24pCmBgYAoKTWFudGVuZ28gc29sbyBsYXMgZG9zIHZhcmlhYmxlcyBkZSBpbnRlcsOpczogZmVjaGEgeSBwcmVjaW8gcHJvbWVkaW8KCmBgYHtyfQpkZjIgPC0gZGYgJT4lIAogIHNlbGVjdChEYXRlLCBgV2VpZ2h0ZWQgUHJpY2VgKQpoZWFkKGRmMikKZGYyICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBEYXRlLCB5ID0gYFdlaWdodGVkIFByaWNlYCkpICsKICBnZW9tX2xpbmUoKQpgYGAKQm9ycm8gZWwgZGYgb3JpZ2luYWwKYGBge3J9CnJtKGRmKQpgYGAKCmBgYHtyfQpBbm9tYWx5RGV0ZWN0aW9uVHMoZGYyLCBtYXhfYW5vbXM9MC4wMSwgZGlyZWN0aW9uPSJwb3MiLCBwbG90PVRSVUUsIGVfdmFsdWUgPSBUKQpgYGAKYGBge3J9CmlkZW50aWNhbChsZW5ndGgodW5pcXVlKGRmMiREYXRlKSksIGxlbmd0aCh1bmlxdWUoZGYyJGBXZWlnaHRlZCBQcmljZWApKSkKYGBgCk1hbGFuZ2EsIMK/IGRlIGRvbmRlIHNhbGVuIGVzb3MgdmFsb3JlcyBleHRyYXM/CgpgYGB7cn0KbGVuZ3RoKHVuaXF1ZShkZjIkRGF0ZSkpCmxlbmd0aCh1bmlxdWUoZGYyJGBXZWlnaHRlZCBQcmljZWApKQpgYGAKClZveSBhIHNlbGVjY2lvbmFyIHNvbG8gYXF1ZWxsYXMgZmVjaGFzIGRlc2RlIGVsIDIwMTUgYSBsYSBmZWNoYQoKYGBge3J9CmRmMiA8LSBkZjIgJT4lIAogIGZpbHRlcihEYXRlID4gIjIwMTQtMTItMzEiKQpgYGAKCmBgYHtyfQppZGVudGljYWwobGVuZ3RoKHVuaXF1ZShkZjIkRGF0ZSkpLCBsZW5ndGgodW5pcXVlKGRmMiRgV2VpZ2h0ZWQgUHJpY2VgKSkpCmBgYAoKQWhvcmEgc2kuLi4gOlAKCmBgYHtyfQpkZjIgJT4lIAogIGdncGxvdChhZXMoeCA9IERhdGUsIHkgPSBgV2VpZ2h0ZWQgUHJpY2VgKSkgKyAKICBnZW9tX2xpbmUoKQpgYGAKCiMgRGV0ZWNjacOzbiBkZSBhbm9tYWxpYXMKSGF5IHVuIHBhcXVldGUgZW4gUiBwYXJhIGRldGVjdGFyIGFub21hbMOtYXMgZW4gc2VyaWVzIHRlbXBvcmFsZXMsIGVsIGRldGFsbGUgZXN0w6EgZW4gCmh0dHBzOi8vY3Jhbi5yLXByb2plY3Qub3JnL3dlYi9wYWNrYWdlcy9hbm9tYWx5RGV0ZWN0aW9uL2luZGV4Lmh0bWwKCmBgYHtyfQpBbm9tYWx5RGV0ZWN0aW9uVmVjKGRmMiRgV2VpZ2h0ZWQgUHJpY2VgLCAKICAgICAgICAgICAgICAgICAgICBtYXhfYW5vbXMgPSAwLjAxLCAKICAgICAgICAgICAgICAgICAgICBwZXJpb2QgPSAzNjUsCiAgICAgICAgICAgICAgICAgICAgZGlyZWN0aW9uID0gJ2JvdGgnLAogICAgICAgICAgICAgICAgICAgIG9ubHlfbGFzdCA9IEZBTFNFLAogICAgICAgICAgICAgICAgICAgIHBsb3QgPSBUUlVFKQoKYGBgCgoK