# Install and Load Required Libraries
install.packages(c("tidyverse", "lubridate", "dplyr", "ggplot2", "readxl", "zoo"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(dplyr)
library(ggplot2)
library(readxl)
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# === 1. Load Stock Data ===
companies <- c("nvda", "tsla", "msft", "pg", "ko")
stock_files <- paste0(companies, " data.xlsx")
company_names <- toupper(companies)

stock_data_list <- map(stock_files, read_excel)
names(stock_data_list) <- company_names

# === 2. Load Fama-French 5-Factor Data and Risk-Free Rate ===
ff_factors <- read_csv("/cloud/project/F-F_Research_Data_5_Factors_2x3 (1).CSV")
## Rows: 86 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (5): Mkt_RF, SMB, HML, RMW, CMA
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
risk_free  <- read_excel("risk free rate.xlsx")

# Clean and Rename Columns
colnames(ff_factors)[1] <- "Date"
ff_factors <- ff_factors %>%
  rename(
    Mkt_RF = `Mkt_RF`,
    SMB = SMB,
    HML = HML,
    RMW = RMW,
    CMA = CMA
  )
ff_factors$Date <- as.Date(ff_factors$Date)
risk_free$Date <- as.Date(risk_free$Date)

# Convert Dates in Stock Data
convert_date <- function(df) {
  df$Date <- as.Date(df$Date)
  return(df)
}
stock_data_list <- map(stock_data_list, convert_date)

# === 3. Calculate Log Returns ===
calc_returns <- function(data) {
  data %>%
    arrange(Date) %>%
    mutate(Return = log(Adjusted / lag(Adjusted))) %>%
    select(Date, Return) %>%
    drop_na()
}
returns_list <- map(stock_data_list, calc_returns)

# === 4. Merge Stock Returns with FF5FM Factors ===
prepare_data <- function(stock_returns) {
  merged <- inner_join(stock_returns, ff_factors, by = "Date")
  merged <- inner_join(merged, risk_free, by = "Date")
  return(merged)
}
merged_data_list <- map(returns_list, prepare_data)

# === 5. Run FF5FM Model for All Companies ===
run_ff5fm <- function(data) {
  lm(Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data) %>% summary()
}
ff5fm_results <- map(merged_data_list, run_ff5fm)
names(ff5fm_results) <- company_names

# === 6. Display Results for All Companies ===
for (company in company_names) {
  cat("\n===== FF5FM Results for", company, "=====\n")
  print(ff5fm_results[[company]])
}
## 
## ===== FF5FM Results for NVDA =====
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.369e-05 -3.445e-05 -4.799e-06  3.178e-05  9.212e-05 
## 
## Coefficients:
##               Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)  4.002e-03  5.556e-06   720.260   <2e-16 ***
## Mkt_RF       1.600e+00  1.104e-04 14498.149   <2e-16 ***
## SMB         -1.352e-04  1.891e-04    -0.715    0.477    
## HML         -2.059e-04  1.948e-04    -1.057    0.294    
## RMW         -3.970e-04  2.868e-04    -1.384    0.170    
## CMA          1.964e-04  3.400e-04     0.578    0.565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.652e-05 on 79 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.473e+07 on 5 and 79 DF,  p-value: < 2.2e-16
## 
## 
## ===== FF5FM Results for TSLA =====
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.863e-04 -8.958e-05  3.800e-08  8.171e-05  2.196e-04 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  5.004e-03  1.262e-05  396.473   <2e-16 ***
## Mkt_RF       3.646e-04  2.507e-04    1.454    0.150    
## SMB          3.999e+00  4.296e-04 9308.892   <2e-16 ***
## HML         -3.303e-04  4.424e-04   -0.747    0.457    
## RMW         -2.266e-04  6.515e-04   -0.348    0.729    
## CMA         -1.020e-03  7.723e-04   -1.321    0.190    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0001057 on 79 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.888e+07 on 5 and 79 DF,  p-value: < 2.2e-16
## 
## 
## ===== FF5FM Results for MSFT =====
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.237e-04 -5.295e-05  8.549e-06  5.506e-05  9.782e-05 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  1.000e-02  7.460e-06 1341.004   <2e-16 ***
## Mkt_RF      -1.062e-04  1.482e-04   -0.717    0.476    
## SMB         -1.363e-04  2.539e-04   -0.537    0.593    
## HML          2.000e+00  2.615e-04 7650.095   <2e-16 ***
## RMW         -2.802e-04  3.851e-04   -0.728    0.469    
## CMA          2.452e-04  4.564e-04    0.537    0.593    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.245e-05 on 79 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.187e+07 on 5 and 79 DF,  p-value: < 2.2e-16
## 
## 
## ===== FF5FM Results for PG =====
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.580e-05 -4.460e-05 -4.110e-06  3.552e-05  7.721e-05 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 3.497e-03  5.671e-06  616.679   <2e-16 ***
## Mkt_RF      3.260e-05  1.126e-04    0.289    0.773    
## SMB         4.928e-05  1.930e-04    0.255    0.799    
## HML         2.531e-04  1.988e-04    1.274    0.207    
## RMW         1.500e+00  2.927e-04 5124.981   <2e-16 ***
## CMA         1.244e-04  3.470e-04    0.359    0.721    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.747e-05 on 79 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 5.493e+06 on 5 and 79 DF,  p-value: < 2.2e-16
## 
## 
## ===== FF5FM Results for KO =====
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.906e-05 -3.387e-05 -6.021e-06  2.850e-05  9.742e-05 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  3.658e-03  5.134e-06  712.503   <2e-16 ***
## Mkt_RF       1.125e-04  1.020e-04    1.103   0.2734    
## SMB         -1.813e-04  1.748e-04   -1.037   0.3028    
## HML          3.507e-04  1.800e-04    1.948   0.0549 .  
## RMW         -5.031e-04  2.651e-04   -1.898   0.0613 .  
## CMA          1.667e+00  3.142e-04 5304.534   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.299e-05 on 79 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 5.681e+06 on 5 and 79 DF,  p-value: < 2.2e-16