Load necessary libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
retdata = read_csv('F-F_Research_Data_5_Factors_2x3.csv')
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 793 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202403 ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(retdata)
## # A tibble: 6 × 1
##   This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202403 CRSP data…¹
##   <chr>                                                                         
## 1 The 1-month TBill return is from Ibbotson and Associates Inc.                 
## 2 ,Mkt-RF,SMB,HML,RMW,CMA,RF                                                    
## 3 196307,   -0.39,   -0.41,   -0.97,    0.68,   -1.18,    0.27                  
## 4 196308,    5.07,   -0.80,    1.80,    0.36,   -0.35,    0.25                  
## 5 196309,   -1.57,   -0.52,    0.13,   -0.71,    0.29,    0.27                  
## 6 196310,    2.53,   -1.39,   -0.10,    2.80,   -2.01,    0.29                  
## # ℹ abbreviated name:
## #   ¹​`This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202403 CRSP database.`
glimpse(retdata)
## Rows: 793
## Columns: 1
## $ `This file was created by CMPT_ME_BEME_OP_INV_RETS using the 202403 CRSP database.` <chr> …
colnames(retdata)[2]<- 'Mkt_RF'# Replace 'Mkt-RF' with 'Mkt_RF'; 
library(tidyr)
library(tibble)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(ggplot2)

# Load Fama-French 5 Factors data
ff_factors <- read_csv("F-F_Research_Data_5_Factors_2x3.csv", skip = 3, col_names = FALSE, col_types = cols(
  X1 = col_character(),
  X2 = col_double(),
  X3 = col_double(),
  X4 = col_double(),
  X5 = col_double(),
  X6 = col_double(),
  X7 = col_double()
))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
colnames(ff_factors) <- c("Date", "Mkt_RF", "SMB", "HML", "RMW", "CMA", "RF")

# Filter data from 1969
ff_factors <- ff_factors %>%
  filter(as.Date(paste0(Date, "01"), format = "%Y%m%d") >= as.Date("1969-01-01"))

# Convert dates to Date type
ff_factors$Date <- as.Date(paste0(ff_factors$Date, "01"), format = "%Y%m%d")

# Load 10 Industry Portfolios data
industry_returns <- read_csv("F-F_Research_Data_5_Factors_2x3.csv", skip = 11, col_names = FALSE, col_types = cols(
  X1 = col_character(),
  X2 = col_double(),
  X3 = col_double(),
  X4 = col_double(),
  X5 = col_double(),
  X6 = col_double(),
  X7 = col_double(),
  X8 = col_double(),
  X9 = col_double(),
  X10 = col_double(),
  X11 = col_double()
))
## Warning: The following named parsers don't match the column names: X8, X9, X10, X11
## One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
colnames(industry_returns) <- c("Date", "Ind1", "Ind2", "Ind3", "Ind4", "Ind5", "Ind6", "Ind7", "Ind8", "Ind9", "Ind10")

# Filter data from 1969
industry_returns <- industry_returns %>%
  filter(as.Date(paste0(Date, "01"), format = "%Y%m%d") >= as.Date("1969-01-01"))

# Convert dates to Date type
industry_returns$Date <- as.Date(paste0(industry_returns$Date, "01"), format = "%Y%m%d")

# Calculate cumulative returns for Fama-French 5 factors
ff_factors_cumulative <- ff_factors %>%
  mutate(
    Mkt_RF_Cumulative = cumprod(1 + Mkt_RF/100),
    SMB_Cumulative = cumprod(1 + SMB/100),
    HML_Cumulative = cumprod(1 + HML/100),
    RMW_Cumulative = cumprod(1 + RMW/100),
    CMA_Cumulative = cumprod(1 + CMA/100)
  )

# Plot cumulative returns for Fama-French 5 factors
ff_factors_long <- ff_factors_cumulative %>%
  select(Date, Mkt_RF_Cumulative, SMB_Cumulative, HML_Cumulative, RMW_Cumulative, CMA_Cumulative) %>%
  pivot_longer(cols = -Date, names_to = "Factor", values_to = "Cumulative_Return")

ggplot(ff_factors_long, aes(x = Date, y = Cumulative_Return, color = Factor)) +
  geom_line() +
  labs(title = "Cumulative Returns of Fama-French 5 Factors",
       x = "Date",
       y = "Cumulative Return") +
  theme_minimal()