library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(dplyr)
library(lubridate)
library(readr)
library(tibble)
library(purrr)
library(broom)
library(tidyverse)
library(lubridate)

# Read and clean Fama-French factor data
ff_factors <- read_csv("F-F_Research_Data_Factors-2.csv", skip = 3, show_col_types = FALSE) |>
  rename(Date = `...1`) |>
  filter(!is.na(RF), !str_detect(Date, "^[A-Za-z]")) |>  # Remove footnotes
  mutate(
    Date = ymd(paste0(Date, "01")),
    `Mkt-RF` = as.numeric(`Mkt-RF`),
    SMB = as.numeric(SMB),
    HML = as.numeric(HML),
    RF = as.numeric(RF)
  )
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = ymd(paste0(Date, "01"))`.
## Caused by warning:
## !  86 failed to parse.
# Optional: view first few rows
head(ff_factors)
## # A tibble: 6 × 5
##   Date       `Mkt-RF`   SMB   HML    RF
##   <date>        <dbl> <dbl> <dbl> <dbl>
## 1 1926-07-01     2.89 -2.55 -2.39  0.22
## 2 1926-08-01     2.64 -1.14  3.81  0.25
## 3 1926-09-01     0.38 -1.36  0.05  0.23
## 4 1926-10-01    -3.27 -0.14  0.82  0.32
## 5 1926-11-01     2.54 -0.11 -0.61  0.31
## 6 1926-12-01     2.62 -0.07  0.06  0.28
# Simulate fake monthly asset returns for 5 assets
set.seed(123)

# Use dates from Fama-French factors
dates <- ff_factors$Date

# Generate returns for 5 assets
sim_returns <- tibble(
  Date = dates,
  Asset1 = rnorm(length(dates), 0.5, 5),
  Asset2 = rnorm(length(dates), 0.3, 4),
  Asset3 = rnorm(length(dates), 0.4, 6),
  Asset4 = rnorm(length(dates), 0.6, 3),
  Asset5 = rnorm(length(dates), 0.2, 4.5)
)

# Optional: write to CSV if needed for future use
# write_csv(sim_returns, "asset_returns.csv")

# Continue with the pipeline
asset_returns <- sim_returns
# Check for duplicates in Fama-French factors
ff_factors %>%
  count(Date) %>%
  filter(n > 1)
## # A tibble: 13 × 2
##    Date           n
##    <date>     <int>
##  1 2020-01-01     2
##  2 2020-02-01     2
##  3 2020-03-01     2
##  4 2020-04-01     2
##  5 2020-05-01     2
##  6 2020-06-01     2
##  7 2020-07-01     2
##  8 2020-08-01     2
##  9 2020-09-01     2
## 10 2020-10-01     2
## 11 2020-11-01     2
## 12 2020-12-01     2
## 13 NA            86
# Check for duplicates in asset returns
asset_returns %>%
  count(Date) %>%
  filter(n > 1)
## # A tibble: 13 × 2
##    Date           n
##    <date>     <int>
##  1 2020-01-01     2
##  2 2020-02-01     2
##  3 2020-03-01     2
##  4 2020-04-01     2
##  5 2020-05-01     2
##  6 2020-06-01     2
##  7 2020-07-01     2
##  8 2020-08-01     2
##  9 2020-09-01     2
## 10 2020-10-01     2
## 11 2020-11-01     2
## 12 2020-12-01     2
## 13 NA            86
asset_returns %>% count(Date) %>% filter(n > 1)
## # A tibble: 13 × 2
##    Date           n
##    <date>     <int>
##  1 2020-01-01     2
##  2 2020-02-01     2
##  3 2020-03-01     2
##  4 2020-04-01     2
##  5 2020-05-01     2
##  6 2020-06-01     2
##  7 2020-07-01     2
##  8 2020-08-01     2
##  9 2020-09-01     2
## 10 2020-10-01     2
## 11 2020-11-01     2
## 12 2020-12-01     2
## 13 NA            86
ff_factors %>% count(Date) %>% filter(n > 1)
## # A tibble: 13 × 2
##    Date           n
##    <date>     <int>
##  1 2020-01-01     2
##  2 2020-02-01     2
##  3 2020-03-01     2
##  4 2020-04-01     2
##  5 2020-05-01     2
##  6 2020-06-01     2
##  7 2020-07-01     2
##  8 2020-08-01     2
##  9 2020-09-01     2
## 10 2020-10-01     2
## 11 2020-11-01     2
## 12 2020-12-01     2
## 13 NA            86
asset_returns <- asset_returns %>%
  distinct(Date, .keep_all = TRUE)

ff_factors <- ff_factors %>%
  distinct(Date, .keep_all = TRUE)
combined_data <- left_join(asset_returns, ff_factors, by = "Date", relationship = "many-to-many")
# Combine asset returns with Fama-French factors
combined_data <- left_join(asset_returns, ff_factors, by = "Date")

# Calculate excess returns for all assets
excess_returns <- combined_data |>
  mutate(across(starts_with("Asset"), ~ .x - RF, .names = "excess_{col}"))
sample_cov <- excess_returns |>
  select(starts_with("excess_")) |>
  cov()
capm_models <- excess_returns |>
  pivot_longer(starts_with("excess_"), names_to = "Asset", values_to = "Return") |>
  group_by(Asset) |>
  nest() |>
  mutate(model = map(data, ~ lm(Return ~ `Mkt-RF`, data = .x)),
         resid = map(model, residuals))
# Extract residuals and build residual matrix for CAPM
capm_resid_matrix <- capm_models |>
  mutate(resid = map(resid, as.numeric)) |>
  select(Asset, resid) |>
  unnest(resid) |>
  group_by(Asset) |>
  mutate(t = row_number()) |>
  ungroup() |>
  pivot_wider(names_from = Asset, values_from = resid) |>
  select(where(is.numeric)) |>
  cov()
ff3_models <- excess_returns |>
  pivot_longer(starts_with("excess_"), names_to = "Asset", values_to = "Return") |>
  group_by(Asset) |>
  nest() |>
  mutate(model = map(data, ~ lm(Return ~ `Mkt-RF` + SMB + HML, data = .x)),
         resid = map(model, residuals))
ff3_resid_matrix <- ff3_models |>
  mutate(resid = map(resid, as.numeric)) |>
  select(Asset, resid) |>
  unnest(resid) |>
  group_by(Asset) |>
  mutate(t = row_number()) |>
  ungroup() |>
  pivot_wider(names_from = Asset, values_from = resid) |>
  select(where(is.numeric)) |>
  cov()
library(corpcor)

cat("Sample Covariance Matrix:\n")
## Sample Covariance Matrix:
print(sample_cov)
##               excess_Asset1 excess_Asset2 excess_Asset3 excess_Asset4
## excess_Asset1    24.7665220    -0.6425495     0.1685365  -0.581488637
## excess_Asset2    -0.6425495    15.6536062    -2.9669809  -0.228907033
## excess_Asset3     0.1685365    -2.9669809    36.0496860   0.129645133
## excess_Asset4    -0.5814886    -0.2289070     0.1296451   9.097989548
## excess_Asset5     0.3505406    -0.1206993    -1.2196179  -0.003103986
##               excess_Asset5
## excess_Asset1   0.350540572
## excess_Asset2  -0.120699327
## excess_Asset3  -1.219617934
## excess_Asset4  -0.003103986
## excess_Asset5  21.195195819
cat("\nCovariance from CAPM Residuals:\n")
## 
## Covariance from CAPM Residuals:
print(capm_resid_matrix)
##                           t excess_Asset1 excess_Asset2 excess_Asset3
## t             117315.166667    -9.7781845   -62.7225997   -19.4212526
## excess_Asset1     -9.778185    24.7338163    -0.6411154     0.1920918
## excess_Asset2    -62.722600    -0.6411154    15.6535433    -2.9680137
## excess_Asset3    -19.421253     0.1920918    -2.9680137    36.0327210
## excess_Asset4    -45.308041    -0.5783607    -0.2290442     0.1273923
## excess_Asset5    -36.024824     0.3284782    -0.1197319    -1.2037281
##               excess_Asset4 excess_Asset5
## t             -4.530804e+01 -3.602482e+01
## excess_Asset1 -5.783607e-01  3.284782e-01
## excess_Asset2 -2.290442e-01 -1.197319e-01
## excess_Asset3  1.273923e-01 -1.203728e+00
## excess_Asset4  9.097690e+00 -9.939362e-04
## excess_Asset5 -9.939362e-04  2.118031e+01
cat("\nCovariance from Fama-French Residuals:\n")
## 
## Covariance from Fama-French Residuals:
print(ff3_resid_matrix)
##                          t excess_Asset1 excess_Asset2 excess_Asset3
## t             117315.16667   -10.7358038   -60.2480158   -15.6937508
## excess_Asset1    -10.73580    24.6777943    -0.6213580     0.1868881
## excess_Asset2    -60.24802    -0.6213580    15.6344528    -2.9878295
## excess_Asset3    -15.69375     0.1868881    -2.9878295    35.9935687
## excess_Asset4    -43.80716    -0.5860694    -0.2355877     0.1101340
## excess_Asset5    -36.18090     0.3444943    -0.1229418    -1.1978851
##               excess_Asset4 excess_Asset5
## t             -43.807164704 -36.180904091
## excess_Asset1  -0.586069367   0.344494251
## excess_Asset2  -0.235587669  -0.122941786
## excess_Asset3   0.110133996  -1.197885088
## excess_Asset4   9.089553020   0.003073084
## excess_Asset5   0.003073084  21.175243701