# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)

# Load the Fama-French 5 factors data
ff5 <- read_csv("F-F_Research_Data_5_Factors_2x3 copy 2.csv", skip = 3, show_col_types = FALSE)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
colnames(ff5) <- c("date", "Mkt_RF", "SMB", "HML", "RMW", "CMA", "RF")

# Convert date to appropriate format and filter out rows with NA in date
ff5 <- ff5 %>%
  filter(!is.na(Mkt_RF)) %>%
  mutate(date = ymd(paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-01"))) %>%
  filter(!is.na(date))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = ymd(paste0(substr(date, 1, 4), "-", substr(date, 5, 6),
##   "-01"))`.
## Caused by warning:
## !  49 failed to parse.
# Load the 10 Industry Portfolios data
industries <- read_csv("10_Industry_Portfolios.CSV", skip = 11, show_col_types = FALSE)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
colnames(industries)[1] <- "date"

# Convert date to appropriate format and filter out rows with NA in date
industries <- industries %>%
  filter(!is.na(NoDur)) %>%
  mutate(date = ymd(paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-01"))) %>%
  filter(!is.na(date))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `date = ymd(paste0(substr(date, 1, 4), "-", substr(date, 5, 6),
##   "-01"))`.
## Caused by warning:
## !  342 failed to parse.
# Merge the datasets on date
merged_data <- left_join(ff5, industries, by = "date")
## Warning in left_join(ff5, industries, by = "date"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1123 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Filter data for the last 5 years
recent_data <- merged_data %>%
  filter(date >= (max(date) - years(5)))

# Prepare the returns data
returns_data <- recent_data %>%
  select(date, starts_with("NoDur"):starts_with("Other"))

# Prepare the factors data
factors_data <- recent_data %>%
  select(date, Mkt_RF, SMB, HML, RMW, CMA)

# Ensure no column is constant in returns_data
returns_data <- returns_data %>%
  select(where(~ n_distinct(.) > 1))

# Ensure no column is constant in factors_data
factors_data <- factors_data %>%
  select(where(~ n_distinct(.) > 1))

# Fit the Fama-French 5-factor model for each industry portfolio
fit <- lm(as.matrix(returns_data %>% select(-date)) ~ Mkt_RF + SMB + HML + RMW + CMA, data = factors_data)

# Extract the coefficients and residuals
betas <- coef(fit)[-1,]
residuals <- residuals(fit)
cov_matrix <- var(residuals)

# Calculate the inverse of the covariance matrix
inv_cov_matrix <- solve(cov_matrix)

# Compute the weights of the Minimum Variance Portfolio (MVP)
ones <- rep(1, ncol(inv_cov_matrix))
mvp_weights <- inv_cov_matrix %*% ones / sum(inv_cov_matrix %*% ones)

# Calculate the monthly returns of the MVP
mvp_returns <- rowSums(returns_data %>% select(-date) * mvp_weights)

# Convert to a data frame
mvp_returns_df <- data.frame(date = recent_data$date, mvp_return = mvp_returns)

# Calculate cumulative returns
mvp_returns_df <- mvp_returns_df %>%
  mutate(cumulative_return = cumprod(1 + mvp_return / 100) - 1)

# Filter for data starting from 1969
mvp_returns_df <- mvp_returns_df %>%
  filter(year(date) >= 1969)

# Plot the cumulative returns
ggplot(mvp_returns_df, aes(x = date, y = cumulative_return)) +
  geom_line(color = "blue") +
  labs(title = "Cumulative Returns of the Minimum Variance Portfolio (MVP)",
       x = "Date", y = "Cumulative Return") +
  theme_minimal()