For this https://www.metaculus.com/questions/5446/total-market-cap-of-cryptocurrencies-2025/ Data from https://www.kaggle.com/sviatm/coinmarket-cap-and-volume-20130428-20200521 (they are out of date, use API to get newer data)
options(
digits = 2
)
library(pacman)
p_load(
kirkegaard,
rms,
lubridate
)
theme_set(theme_bw())
#read the daily data
tmc = read_csv("data/coinmarket-cap-and-volume-2013-04-28-2020-05-21.csv") %>%
mutate(
# date = as.Date(datetime),
market_cap_B = market_cap/1e9,
market_cap_B_log10 = market_cap_B %>% log10(),
)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## datetime = col_datetime(format = ""),
## market_cap = col_double(),
## market_volume = col_double()
## )
#fill in more dates
tmc = bind_rows(
tmc,
tibble(
datetime = seq(from = (tmc$datetime %>% last()) + days(1),
to = (tmc$datetime %>% last()) + years(10),
by = "day")
)
)
#ylabel
x_lab = "Year"
y_lab = "Total crypto market cap in billions of USD"
#linear
tmc %>%
filter(!is.na(market_cap_B)) %>%
ggplot(aes(datetime, market_cap_B)) +
geom_line() +
scale_x_datetime(x_lab) +
scale_y_continuous(y_lab)
#log10
tmc %>%
filter(!is.na(market_cap_B)) %>%
ggplot(aes(datetime, market_cap_B_log10)) +
geom_line() +
scale_x_datetime(x_lab) +
scale_y_continuous(y_lab)
#log-linear
loglin = ols(market_cap_B_log10 ~ as.numeric(datetime), data = tmc)
loglin
## Frequencies of Missing Values Due to Each Variable
## market_cap_B_log10 datetime
## 3652 0
##
## Linear Regression Model
##
## ols(formula = market_cap_B_log10 ~ as.numeric(datetime), data = tmc)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2581 LR chi2 4422.90 R2 0.820
## sigma0.3427 d.f. 1 R2 adj 0.820
## d.f. 2579 Pr(> chi2) 0.0000 g 0.844
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.5123 -0.2935 -0.1159 0.2658 1.0374
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -15.3220 0.1551 -98.80 <0.0001
## datetime 0.0000 0.0000 108.32 <0.0001
##
#add fitted and predicted values
tmc$market_cap_B_log10_pred_loglin = predict(loglin, newdata = tmc)
tmc$market_cap_B_pred_loglin = 10^tmc$market_cap_B_log10_pred_loglin
#replot
tmc %>%
ggplot(aes(datetime)) +
geom_line(mapping = aes(y = market_cap_B_log10)) +
geom_line(mapping = aes(y = market_cap_B_log10_pred_loglin), color = "blue") +
scale_x_datetime(x_lab) +
scale_y_continuous(y_lab)
## Warning: Removed 3652 row(s) containing missing values (geom_path).
#numerical values for 2025-01-01
tmc %>% filter(year(datetime) == 2025, month(datetime) == 1, day(datetime) == 1)
#get the SE
predict(object = loglin,
newdata = tmc %>% filter(year(datetime) == 2025, month(datetime) == 1, day(datetime) == 1),
se.fit=T)
## $linear.predictors
## 1
## 4.4
##
## $se.fit
## 1
## 0.028