library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(dplyr)
library(lubridate)
library(readr)
library(tibble)
library(purrr)
library(broom)
library(tidyverse)
library(lubridate)
# Read and clean Fama-French factor data
ff_factors <- read_csv("F-F_Research_Data_Factors-2.csv", skip = 3, show_col_types = FALSE) |>
rename(Date = `...1`) |>
filter(!is.na(RF), !str_detect(Date, "^[A-Za-z]")) |> # Remove footnotes
mutate(
Date = ymd(paste0(Date, "01")),
`Mkt-RF` = as.numeric(`Mkt-RF`),
SMB = as.numeric(SMB),
HML = as.numeric(HML),
RF = as.numeric(RF)
)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = ymd(paste0(Date, "01"))`.
## Caused by warning:
## ! 86 failed to parse.
# Optional: view first few rows
head(ff_factors)
## # A tibble: 6 × 5
## Date `Mkt-RF` SMB HML RF
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 1926-07-01 2.89 -2.55 -2.39 0.22
## 2 1926-08-01 2.64 -1.14 3.81 0.25
## 3 1926-09-01 0.38 -1.36 0.05 0.23
## 4 1926-10-01 -3.27 -0.14 0.82 0.32
## 5 1926-11-01 2.54 -0.11 -0.61 0.31
## 6 1926-12-01 2.62 -0.07 0.06 0.28
# Simulate fake monthly asset returns for 5 assets
set.seed(123)
# Use dates from Fama-French factors
dates <- ff_factors$Date
# Generate returns for 5 assets
sim_returns <- tibble(
Date = dates,
Asset1 = rnorm(length(dates), 0.5, 5),
Asset2 = rnorm(length(dates), 0.3, 4),
Asset3 = rnorm(length(dates), 0.4, 6),
Asset4 = rnorm(length(dates), 0.6, 3),
Asset5 = rnorm(length(dates), 0.2, 4.5)
)
# Optional: write to CSV if needed for future use
# write_csv(sim_returns, "asset_returns.csv")
# Continue with the pipeline
asset_returns <- sim_returns
# Check for duplicates in Fama-French factors
ff_factors %>%
count(Date) %>%
filter(n > 1)
## # A tibble: 13 × 2
## Date n
## <date> <int>
## 1 2020-01-01 2
## 2 2020-02-01 2
## 3 2020-03-01 2
## 4 2020-04-01 2
## 5 2020-05-01 2
## 6 2020-06-01 2
## 7 2020-07-01 2
## 8 2020-08-01 2
## 9 2020-09-01 2
## 10 2020-10-01 2
## 11 2020-11-01 2
## 12 2020-12-01 2
## 13 NA 86
# Check for duplicates in asset returns
asset_returns %>%
count(Date) %>%
filter(n > 1)
## # A tibble: 13 × 2
## Date n
## <date> <int>
## 1 2020-01-01 2
## 2 2020-02-01 2
## 3 2020-03-01 2
## 4 2020-04-01 2
## 5 2020-05-01 2
## 6 2020-06-01 2
## 7 2020-07-01 2
## 8 2020-08-01 2
## 9 2020-09-01 2
## 10 2020-10-01 2
## 11 2020-11-01 2
## 12 2020-12-01 2
## 13 NA 86
asset_returns %>% count(Date) %>% filter(n > 1)
## # A tibble: 13 × 2
## Date n
## <date> <int>
## 1 2020-01-01 2
## 2 2020-02-01 2
## 3 2020-03-01 2
## 4 2020-04-01 2
## 5 2020-05-01 2
## 6 2020-06-01 2
## 7 2020-07-01 2
## 8 2020-08-01 2
## 9 2020-09-01 2
## 10 2020-10-01 2
## 11 2020-11-01 2
## 12 2020-12-01 2
## 13 NA 86
ff_factors %>% count(Date) %>% filter(n > 1)
## # A tibble: 13 × 2
## Date n
## <date> <int>
## 1 2020-01-01 2
## 2 2020-02-01 2
## 3 2020-03-01 2
## 4 2020-04-01 2
## 5 2020-05-01 2
## 6 2020-06-01 2
## 7 2020-07-01 2
## 8 2020-08-01 2
## 9 2020-09-01 2
## 10 2020-10-01 2
## 11 2020-11-01 2
## 12 2020-12-01 2
## 13 NA 86
asset_returns <- asset_returns %>%
distinct(Date, .keep_all = TRUE)
ff_factors <- ff_factors %>%
distinct(Date, .keep_all = TRUE)
combined_data <- left_join(asset_returns, ff_factors, by = "Date", relationship = "many-to-many")
# Combine asset returns with Fama-French factors
combined_data <- left_join(asset_returns, ff_factors, by = "Date")
# Calculate excess returns for all assets
excess_returns <- combined_data |>
mutate(across(starts_with("Asset"), ~ .x - RF, .names = "excess_{col}"))
sample_cov <- excess_returns |>
select(starts_with("excess_")) |>
cov()
capm_models <- excess_returns |>
pivot_longer(starts_with("excess_"), names_to = "Asset", values_to = "Return") |>
group_by(Asset) |>
nest() |>
mutate(model = map(data, ~ lm(Return ~ `Mkt-RF`, data = .x)),
resid = map(model, residuals))
# Extract residuals and build residual matrix for CAPM
capm_resid_matrix <- capm_models |>
mutate(resid = map(resid, as.numeric)) |>
select(Asset, resid) |>
unnest(resid) |>
group_by(Asset) |>
mutate(t = row_number()) |>
ungroup() |>
pivot_wider(names_from = Asset, values_from = resid) |>
select(where(is.numeric)) |>
cov()
ff3_models <- excess_returns |>
pivot_longer(starts_with("excess_"), names_to = "Asset", values_to = "Return") |>
group_by(Asset) |>
nest() |>
mutate(model = map(data, ~ lm(Return ~ `Mkt-RF` + SMB + HML, data = .x)),
resid = map(model, residuals))
ff3_resid_matrix <- ff3_models |>
mutate(resid = map(resid, as.numeric)) |>
select(Asset, resid) |>
unnest(resid) |>
group_by(Asset) |>
mutate(t = row_number()) |>
ungroup() |>
pivot_wider(names_from = Asset, values_from = resid) |>
select(where(is.numeric)) |>
cov()
library(corpcor)
cat("Sample Covariance Matrix:\n")
## Sample Covariance Matrix:
print(sample_cov)
## excess_Asset1 excess_Asset2 excess_Asset3 excess_Asset4
## excess_Asset1 24.7665220 -0.6425495 0.1685365 -0.581488637
## excess_Asset2 -0.6425495 15.6536062 -2.9669809 -0.228907033
## excess_Asset3 0.1685365 -2.9669809 36.0496860 0.129645133
## excess_Asset4 -0.5814886 -0.2289070 0.1296451 9.097989548
## excess_Asset5 0.3505406 -0.1206993 -1.2196179 -0.003103986
## excess_Asset5
## excess_Asset1 0.350540572
## excess_Asset2 -0.120699327
## excess_Asset3 -1.219617934
## excess_Asset4 -0.003103986
## excess_Asset5 21.195195819
cat("\nCovariance from CAPM Residuals:\n")
##
## Covariance from CAPM Residuals:
print(capm_resid_matrix)
## t excess_Asset1 excess_Asset2 excess_Asset3
## t 117315.166667 -9.7781845 -62.7225997 -19.4212526
## excess_Asset1 -9.778185 24.7338163 -0.6411154 0.1920918
## excess_Asset2 -62.722600 -0.6411154 15.6535433 -2.9680137
## excess_Asset3 -19.421253 0.1920918 -2.9680137 36.0327210
## excess_Asset4 -45.308041 -0.5783607 -0.2290442 0.1273923
## excess_Asset5 -36.024824 0.3284782 -0.1197319 -1.2037281
## excess_Asset4 excess_Asset5
## t -4.530804e+01 -3.602482e+01
## excess_Asset1 -5.783607e-01 3.284782e-01
## excess_Asset2 -2.290442e-01 -1.197319e-01
## excess_Asset3 1.273923e-01 -1.203728e+00
## excess_Asset4 9.097690e+00 -9.939362e-04
## excess_Asset5 -9.939362e-04 2.118031e+01
cat("\nCovariance from Fama-French Residuals:\n")
##
## Covariance from Fama-French Residuals:
print(ff3_resid_matrix)
## t excess_Asset1 excess_Asset2 excess_Asset3
## t 117315.16667 -10.7358038 -60.2480158 -15.6937508
## excess_Asset1 -10.73580 24.6777943 -0.6213580 0.1868881
## excess_Asset2 -60.24802 -0.6213580 15.6344528 -2.9878295
## excess_Asset3 -15.69375 0.1868881 -2.9878295 35.9935687
## excess_Asset4 -43.80716 -0.5860694 -0.2355877 0.1101340
## excess_Asset5 -36.18090 0.3444943 -0.1229418 -1.1978851
## excess_Asset4 excess_Asset5
## t -43.807164704 -36.180904091
## excess_Asset1 -0.586069367 0.344494251
## excess_Asset2 -0.235587669 -0.122941786
## excess_Asset3 0.110133996 -1.197885088
## excess_Asset4 9.089553020 0.003073084
## excess_Asset5 0.003073084 21.175243701