Bitcoin прогноз

1. Доступ к данным Bitcoin из сайта https://coinmarketcap.com/

# load libraries
library(rvest)
library(tidyverse)

url <-read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20130428&end=20190328")
data <- url %>% html_table()
df <- data[[1]]
df <- as_tibble(df)
colnames(df) <- c("date", "open", "high", "low", "close", "volume", "market_cap")
df$date <- as.Date(df$date, "%b %d,%Y")
knitr::kable(head(df))

date	open	high	low	close	volume	market_cap
2019-03-28	4087.58	4094.90	4040.27	4069.11	9,353,915,899	71,678,998,915
2019-03-27	3984.24	4087.07	3977.81	4087.07	10,897,131,934	71,987,847,571
2019-03-26	3969.23	3985.08	3944.75	3985.08	10,707,678,815	70,184,147,203
2019-03-25	4024.11	4038.84	3934.03	3963.07	10,359,818,883	69,789,872,373
2019-03-24	4035.16	4040.70	4006.19	4022.17	9,144,851,065	70,823,042,992
2019-03-23	4022.71	4049.88	4015.96	4035.83	9,578,850,549	71,056,017,910

2. Визуализация данных

# Plot using ggplot2
library(ggplot2)
ggplot(data = df, aes(x = date, y = close)) + geom_line(colour = "blue", size = 1) +
  labs(title = "Historical data for Bitcoin",
     subtitle = "Accessing Bitcoin Data for the Forvision project",
     caption = "sources: Bitcoin (BTC); https://coinmarketcap.com",
     x = "Date",
     y = "Close")

# Plot using dygraphs
# Covert data to xts object
df1 <- xts::xts(df$close, order.by=df$date)
names(df1) <- "Close"
library(dygraphs)
dygraph(df1,main = "Historical data for Bitcoin") %>%
        dyRangeSelector(height = 20)

# Last 365 days
df2 <- df[1:365, ]
library(ggplot2)
ggplot(data = df2, aes(x = date, y = close)) + geom_line(colour = "blue", size = 1) +
  labs(title = "Historical data of Bitcoin for last 365 days",
       subtitle = "Accessing Bitcoin Data for the Forvision project",
       caption = "sources: Bitcoin (BTC); https://coinmarketcap.com",
       x = "Date",
       y = "Close")

3. Подготовка данных для прогнозирования

Точка origin = “2019-01-01”
C точки origin сделать прогноз на 60 дней

library(dplyr)
train_df <- filter(df, date <= "2019-01-01")
test_df <- filter(df, date > "2019-01-01")
knitr::kable(head(train_df))

date	open	high	low	close	volume	market_cap
2019-01-01	3746.71	3850.91	3707.23	3843.52	4,324,200,990	67,098,634,181
2018-12-31	3866.84	3868.74	3725.87	3742.70	4,661,840,806	65,331,499,158
2018-12-30	3822.38	3901.91	3797.22	3865.95	4,770,578,575	67,475,512,827
2018-12-29	3932.49	3963.76	3820.41	3820.41	4,991,655,917	66,672,244,158
2018-12-28	3653.13	3956.14	3642.63	3923.92	5,631,554,348	68,471,837,969
2018-12-27	3854.69	3874.42	3645.45	3654.83	5,130,222,366	63,768,757,101

test_df <- test_df[(nrow(test_df)-60 + 1):nrow(test_df), ]
knitr::kable(tail(test_df))

date	open	high	low	close	volume	market_cap
2019-01-07	4078.59	4092.61	4020.89	4025.25	5,228,625,637	70,316,305,580
2019-01-06	3836.52	4093.30	3826.51	4076.63	5,597,027,440	71,206,795,853
2019-01-05	3851.97	3904.90	3836.90	3845.19	5,137,609,824	67,157,570,935
2019-01-04	3832.04	3865.93	3783.85	3857.72	4,847,965,467	67,368,333,500
2019-01-03	3931.05	3935.69	3826.22	3836.74	4,530,215,219	66,994,920,903
2019-01-02	3849.22	3947.98	3817.41	3943.41	5,244,856,836	68,849,856,732

# covert data to time series 
train_ts <- ts(data = rev(train_df$close), end=c(2019, 1), frequency = 365)
test_ts <- ts(data = rev(test_df$close), start=c(2019, 2), frequency = 365)
plot(train_ts)

# Seasonal decomposition
fit <- stl(train_ts, s.window="period")
plot(fit)

# Test stationary
library(tseries)
adf.test(train_df$close, alternative="stationary", k=0)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_df$close
## Dickey-Fuller = -2.2373, Lag order = 0, p-value = 0.4778
## alternative hypothesis: stationary

big p-value -> data is not stationary

adf.test(diff(log(train_df$close)), alternative="stationary", k=0)

## Warning in adf.test(diff(log(train_df$close)), alternative =
## "stationary", : p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(log(train_df$close))
## Dickey-Fuller = -45.264, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary

plot(diff(log(train_ts)))

Small p-values -> diff(log(data)) is stationary

4 Построение auto.arima модели и прогнозирование

library(forecast)
fit <- auto.arima(log(train_ts), d = 1, ic = "aic", trace = TRUE)

## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,1,2)(1,0,1)[365] with drift         : Inf
##  ARIMA(0,1,0)             with drift         : -7073.575
##  ARIMA(1,1,0)(1,0,0)[365] with drift         : Inf
##  ARIMA(0,1,1)(0,0,1)[365] with drift         : Inf
##  ARIMA(0,1,0)                                : -7072.756
##  ARIMA(0,1,0)(1,0,0)[365] with drift         : Inf
##  ARIMA(0,1,0)(0,0,1)[365] with drift         : Inf
##  ARIMA(0,1,0)(1,0,1)[365] with drift         : Inf
##  ARIMA(1,1,0)             with drift         : -7073.355
##  ARIMA(0,1,1)             with drift         : -7071.62
##  ARIMA(1,1,1)             with drift         : -7071.534
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(0,1,0)             with drift         : -7079.827
## 
##  Best model: ARIMA(0,1,0)             with drift

# forecast, horizon = 60
forecast_fit <- forecast(fit, h = 60)
forecast <- exp(forecast_fit$mean)

forecast_data <- data.frame(date = rev(test_df$date), value = rev(test_df$close), forecast)
knitr::kable(forecast_data)

date	value	forecast
2019-01-02	3943.41	3849.742
2019-01-03	3836.74	3855.974
2019-01-04	3857.72	3862.216
2019-01-05	3845.19	3868.469
2019-01-06	4076.63	3874.731
2019-01-07	4025.25	3881.003
2019-01-08	4030.85	3887.286
2019-01-09	4035.30	3893.579
2019-01-10	3678.92	3899.882
2019-01-11	3687.37	3906.195
2019-01-12	3661.30	3912.519
2019-01-13	3552.95	3918.852
2019-01-14	3706.05	3925.196
2019-01-15	3630.68	3931.551
2019-01-16	3655.01	3937.915
2019-01-17	3678.56	3944.290
2019-01-18	3657.84	3950.675
2019-01-19	3728.57	3957.071
2019-01-20	3601.01	3963.476
2019-01-21	3576.03	3969.893
2019-01-22	3604.58	3976.319
2019-01-23	3585.12	3982.756
2019-01-24	3600.87	3989.204
2019-01-25	3599.77	3995.661
2019-01-26	3602.46	4002.130
2019-01-27	3583.97	4008.608
2019-01-28	3470.45	4015.098
2019-01-29	3448.12	4021.597
2019-01-30	3486.18	4028.108
2019-01-31	3457.79	4034.629
2019-02-01	3487.95	4041.160
2019-02-02	3521.06	4047.702
2019-02-03	3464.01	4054.254
2019-02-04	3459.15	4060.818
2019-02-05	3466.36	4067.391
2019-02-06	3413.77	4073.976
2019-02-07	3399.47	4080.571
2019-02-08	3666.78	4087.177
2019-02-09	3671.20	4093.793
2019-02-10	3690.19	4100.420
2019-02-11	3648.43	4107.058
2019-02-12	3653.53	4113.707
2019-02-13	3632.07	4120.366
2019-02-14	3616.88	4127.036
2019-02-15	3620.81	4133.717
2019-02-16	3629.79	4140.409
2019-02-17	3673.84	4147.112
2019-02-18	3915.71	4153.825
2019-02-19	3947.09	4160.549
2019-02-20	3999.82	4167.285
2019-02-21	3954.12	4174.031
2019-02-22	4005.53	4180.788
2019-02-23	4142.53	4187.556
2019-02-24	3810.43	4194.335
2019-02-25	3882.70	4201.125
2019-02-26	3854.36	4207.926
2019-02-27	3851.05	4214.737
2019-02-28	3854.79	4221.560
2019-03-01	3859.58	4228.394
2019-03-02	3864.42	4235.239

accuracy(forecast_data$value, forecast_data$forecast)

##                ME     RMSE      MAE      MPE     MAPE     ACF1 Theil's U
## Test set 330.1088 391.3159 354.2788 8.108618 8.731735 0.858627  60.05321

ggplot() + geom_line(data = forecast_data, aes(x = date, y = value, colour = "value"), size = 1) +
  geom_line(data = forecast_data, aes(date, forecast, colour = "forecast"), size = 1) +
  geom_line(data = train_df[1:100,], aes(date, close, colour = "value"), size = 1) + 
  ggtitle("60-days ahead bitcoin forecast from origin '01/01/2019'")

Bitcoin прогноз

Cuong Sai

2019-03-29

1. Доступ к данным Bitcoin из сайта https://coinmarketcap.com/

2. Визуализация данных

3. Подготовка данных для прогнозирования

4 Построение auto.arima модели и прогнозирование