data("cars")
median(cars$speed)
## [1] 15
# Load package
library(jsonlite)
# Create URL
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=99"
# Read Data
btc_data <- fromJSON(url)
# Extract the dataframe containing price data
daily_data <- btc_data$Data$Data
# View structure (optional)
str(daily_data)
## 'data.frame': 100 obs. of 9 variables:
## $ time : int 1764028800 1764115200 1764201600 1764288000 1764374400 1764460800 1764547200 1764633600 1764720000 1764806400 ...
## $ high : num 88495 90634 91935 93117 91202 ...
## $ low : num 86090 86298 90083 90242 90211 ...
## $ open : num 88288 87341 90487 91327 90918 ...
## $ volumefrom : num 32066 30484 21382 28365 9606 ...
## $ volumeto : num 2.80e+09 2.69e+09 1.95e+09 2.60e+09 8.72e+08 ...
## $ close : num 87341 90487 91327 90918 90843 ...
## $ conversionType : chr "direct" "direct" "direct" "direct" ...
## $ conversionSymbol: chr "" "" "" "" ...
# Compute maximum daily close price
max_close <- max(daily_data$close, na.rm = TRUE)
max_close
## [1] 96945.09
#Mini Project # Bitcoin Daily Return and Volatility
# Data extraction
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=730"
raw <- fromJSON(url)
btc <- raw$Data$Data %>% as_tibble()
# Quick check
glimpse(btc)
## Rows: 731
## Columns: 9
## $ time <int> 1709510400, 1709596800, 1709683200, 1709769600, 17098…
## $ high <dbl> 68562.03, 69244.55, 67645.97, 68065.89, 70151.23, 686…
## $ low <dbl> 62363.97, 59268.68, 62851.27, 65610.94, 66173.73, 680…
## $ open <dbl> 63149.43, 68338.64, 63804.11, 66115.57, 66928.15, 682…
## $ volumefrom <dbl> 79177.16, 126873.13, 68031.77, 41800.64, 59361.92, 16…
## $ volumeto <dbl> 5220774006, 8263897913, 4498440969, 2804368120, 40462…
## $ close <dbl> 68338.64, 63804.11, 66115.57, 66928.15, 68269.39, 684…
## $ conversionType <chr> "direct", "direct", "direct", "direct", "direct", "di…
## $ conversionSymbol <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "…
head(btc)
## # A tibble: 6 × 9
## time high low open volumefrom volumeto close conversionType
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1709510400 68562. 62364. 63149. 79177. 5220774006. 68339. direct
## 2 1709596800 69245. 59269. 68339. 126873. 8263897913. 63804. direct
## 3 1709683200 67646. 62851. 63804. 68032. 4498440969. 66116. direct
## 4 1709769600 68066. 65611. 66116. 41801. 2804368120. 66928. direct
## 5 1709856000 70151. 66174. 66928. 59362. 4046244831. 68269. direct
## 6 1709942400 68664. 68027. 68269. 16947. 1158711168. 68446. direct
## # ℹ 1 more variable: conversionSymbol <chr>
#Clean data
btc_clean <- btc %>%
mutate(
date = as.Date(as.POSIXct(time, origin = "1970-01-01", tz ="UTC" )),
# Daily Return
ret = (close/ lag(close)) - 1
) %>%
filter(!is.na(ret)) %>%
arrange(date)
# Add Rolling Volatility
library(dplyr)
library(zoo)
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lubridate)
btc_clean <- btc_clean %>%
mutate(
vol_7 = rollapply(ret,width = 7, FUN = sd, fill = NA, align = "right"),
vol_30 = rollapply(ret,width = 30, FUN = sd, fill = NA, align = "right"),
dow = wday(date, label = TRUE)
)
summary(btc_clean$ret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.398e-01 -1.279e-02 -1.098e-05 4.094e-04 1.263e-02 1.230e-01
# Distribution of Daily Returns
btc_clean %>%
summarise(
n = n(),
mean_ret = mean(ret),
median_ret = median(ret),
sd_ret = sd(ret),
min_ret = min(ret),
max_ret = max(ret),
)
## # A tibble: 1 × 6
## n mean_ret median_ret sd_ret min_ret max_ret
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 730 0.000409 -0.0000110 0.0256 -0.140 0.123
# Largest Positive and Negative Return Days
btc_clean %>%
select(date, close, ret) %>%
arrange(ret) %>%
slice(1:5)
## # A tibble: 5 × 3
## date close ret
## <date> <dbl> <dbl>
## 1 2026-02-05 62812. -0.140
## 2 2025-03-03 86159. -0.0861
## 3 2024-03-19 61924. -0.0841
## 4 2025-10-10 112999. -0.0716
## 5 2024-08-05 54032. -0.0706
btc_clean %>%
select(date, close, ret) %>%
arrange(desc(ret)) %>%
slice(1:5)
## # A tibble: 5 × 3
## date close ret
## <date> <dbl> <dbl>
## 1 2026-02-06 70536. 0.123
## 2 2024-08-08 61709. 0.119
## 3 2024-11-11 88758. 0.104
## 4 2024-03-20 67857. 0.0958
## 5 2025-03-02 94276. 0.0956
# Rolling Volatility over time
ggplot(btc_clean, aes(x = date, y = vol_30)) +
geom_line() +
labs(title = "BTC 30-Day Rolling Volatility (SD of Daily Returns", x = "Date", y = "30-day Volatility" )
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Day-of-week effects
btc_clean %>%
group_by(dow) %>%
summarise(
mean_ret = mean(ret),
median_ret = median(ret),
sd_ret = sd(ret),
n= n()
) %>%
arrange(dow)
## # A tibble: 7 × 5
## dow mean_ret median_ret sd_ret n
## <ord> <dbl> <dbl> <dbl> <int>
## 1 Sun 0.00121 0.00133 0.0224 104
## 2 Mon 0.00244 0.00195 0.0286 104
## 3 Tue -0.00365 -0.00482 0.0268 105
## 4 Wed 0.00518 0.00205 0.0284 105
## 5 Thu -0.00333 -0.00289 0.0282 104
## 6 Fri 0.00140 -0.000413 0.0275 104
## 7 Sat -0.000381 0.000267 0.0133 104
ggplot(btc_clean, aes(x = dow, y = ret )) +
geom_boxplot() +
labs(title = "BTC Daily Returns by Day of Week",
x = "Day of Week", y = "Daily Return")
# Frequency of daily drops worse than -5%
btc_clean %>%
summarise(
pct_days_below_neg5 = mean(ret < -0.05),
count_days_below_neg5 = sum(ret < -0.05)
)
## # A tibble: 1 × 2
## pct_days_below_neg5 count_days_below_neg5
## <dbl> <int>
## 1 0.0301 22