About

For this https://www.metaculus.com/questions/5446/total-market-cap-of-cryptocurrencies-2025/ Data from https://www.kaggle.com/sviatm/coinmarket-cap-and-volume-20130428-20200521 (they are out of date, use API to get newer data)

Init

options(
  digits = 2
  
)

library(pacman)
p_load(
  kirkegaard,
  rms,
  lubridate
)

theme_set(theme_bw())

Data

#read the daily data
tmc = read_csv("data/coinmarket-cap-and-volume-2013-04-28-2020-05-21.csv") %>% 
  mutate(
    # date = as.Date(datetime),
    market_cap_B = market_cap/1e9,
    market_cap_B_log10 = market_cap_B %>% log10(),
  )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   datetime = col_datetime(format = ""),
##   market_cap = col_double(),
##   market_volume = col_double()
## )
#fill in more dates
tmc = bind_rows(
  tmc,
  tibble(
    datetime = seq(from = (tmc$datetime %>% last()) + days(1),
               to = (tmc$datetime %>% last()) + years(10),
               by = "day")
  )
)

Plot

#ylabel
x_lab = "Year"
y_lab = "Total crypto market cap in billions of USD"

#linear
tmc %>% 
  filter(!is.na(market_cap_B)) %>% 
  ggplot(aes(datetime, market_cap_B)) +
  geom_line() +
  scale_x_datetime(x_lab) +
  scale_y_continuous(y_lab)

#log10
tmc %>% 
  filter(!is.na(market_cap_B)) %>% 
  ggplot(aes(datetime, market_cap_B_log10)) +
  geom_line() +
  scale_x_datetime(x_lab) +
  scale_y_continuous(y_lab)

Model

#log-linear
loglin = ols(market_cap_B_log10 ~ as.numeric(datetime), data = tmc)
loglin
## Frequencies of Missing Values Due to Each Variable
## market_cap_B_log10           datetime 
##               3652                  0 
## 
## Linear Regression Model
##  
##  ols(formula = market_cap_B_log10 ~ as.numeric(datetime), data = tmc)
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs    2581    LR chi2    4422.90    R2       0.820    
##  sigma0.3427    d.f.             1    R2 adj   0.820    
##  d.f.   2579    Pr(> chi2)  0.0000    g        0.844    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -0.5123 -0.2935 -0.1159  0.2658  1.0374 
##  
##  
##            Coef     S.E.   t      Pr(>|t|)
##  Intercept -15.3220 0.1551 -98.80 <0.0001 
##  datetime    0.0000 0.0000 108.32 <0.0001 
## 
#add fitted and predicted values
tmc$market_cap_B_log10_pred_loglin = predict(loglin, newdata = tmc)
tmc$market_cap_B_pred_loglin = 10^tmc$market_cap_B_log10_pred_loglin

#replot
tmc %>% 
  ggplot(aes(datetime)) +
  geom_line(mapping = aes(y = market_cap_B_log10)) +
  geom_line(mapping = aes(y = market_cap_B_log10_pred_loglin), color = "blue") +
  scale_x_datetime(x_lab) +
  scale_y_continuous(y_lab)
## Warning: Removed 3652 row(s) containing missing values (geom_path).

#numerical values for 2025-01-01
tmc %>% filter(year(datetime) == 2025, month(datetime) == 1, day(datetime) == 1)
#get the SE
predict(object = loglin,
        newdata = tmc %>% filter(year(datetime) == 2025, month(datetime) == 1, day(datetime) == 1),
        se.fit=T)
## $linear.predictors
##   1 
## 4.4 
## 
## $se.fit
##     1 
## 0.028