# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(dplyr)
library(ggplot2)
library(readxl)
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# === 1. Load Stock Data ===
companies <- c("nvda", "tsla", "msft", "pg", "ko")
stock_files <- paste0(companies, " data.xlsx")
company_names <- toupper(companies)

stock_data_list <- map(stock_files, read_excel)
names(stock_data_list) <- company_names

# === 2. Load Fama-French Factors and Risk-Free Rate ===
ff_factors <- read_csv("/cloud/project/F-F_Research_Data_Factors.CSV", skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 1283 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
risk_free  <- read_excel("risk free rate.xlsx")

# Clean Fama-French Data
colnames(ff_factors)[1] <- "Date"
ff_factors <- ff_factors %>% rename(Mkt_RF = `Mkt-RF`)
ff_factors$Date <- as.Date(as.yearmon(as.character(ff_factors$Date), format = "%Y%m"), frac = 1)
risk_free$Date <- as.Date(risk_free$Date)

# === 3. Prepare Stock Data Dates ===
convert_date <- function(df) {
  df$Date <- as.Date(df$Date)
  return(df)
}

stock_data_list <- map(stock_data_list, convert_date)

# === 4. Calculate Log Returns ===
calc_returns <- function(data) {
  data %>%
    arrange(Date) %>%
    mutate(Return = log(Adjusted / lag(Adjusted))) %>%
    select(Date, Return) %>%
    drop_na()
}

returns_list <- map(stock_data_list, calc_returns)

# === 5. Merge with Fama-French Factors and Risk-Free Rate ===
prepare_data <- function(stock_returns) {
  merged <- inner_join(stock_returns, ff_factors, by = "Date")
  merged <- inner_join(merged, risk_free, by = "Date")
  return(merged)
}

merged_data_list <- map(returns_list, prepare_data)

# === 6. Run CAPM, FF3FM, FF5FM Models ===
run_models <- function(data) {
  capm_model <- lm(Return ~ Mkt_RF, data = data)
  ff3fm_model <- lm(Return ~ Mkt_RF + SMB + HML, data = data)
  
  ff5fm_model <- if (all(c("RMW", "CMA") %in% colnames(data))) {
    lm(Return ~ Mkt_RF + SMB + HML + RMW + CMA, data = data)
  } else {
    NULL
  }
  
  return(list(
    CAPM = summary(capm_model),
    FF3FM = summary(ff3fm_model),
    FF5FM = if (!is.null(ff5fm_model)) summary(ff5fm_model) else "FF5FM factors missing."
  ))
}

# Run models for all companies and store results
model_results <- map(merged_data_list, run_models)
names(model_results) <- company_names

# === 7. View Results ===

# Example: Access Results for Each Company and Model
model_results$NVDA$CAPM    # NVDA CAPM Results
## 
## Call:
## lm(formula = Return ~ Mkt_RF, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.205402 -0.043744  0.003038  0.054084  0.140215 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.008662   0.008463   1.024    0.309
## Mkt_RF      0.002530   0.001624   1.558    0.123
## 
## Residual standard error: 0.07585 on 81 degrees of freedom
## Multiple R-squared:  0.02908,    Adjusted R-squared:  0.01709 
## F-statistic: 2.426 on 1 and 81 DF,  p-value: 0.1232
model_results$NVDA$FF3FM   # NVDA FF3FM Results
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.193930 -0.037964 -0.004766  0.053625  0.146293 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0099722  0.0085271   1.169    0.246
## Mkt_RF      0.0017210  0.0017241   0.998    0.321
## SMB         0.0042023  0.0029986   1.401    0.165
## HML         0.0000131  0.0019917   0.007    0.995
## 
## Residual standard error: 0.07586 on 79 degrees of freedom
## Multiple R-squared:  0.05273,    Adjusted R-squared:  0.01676 
## F-statistic: 1.466 on 3 and 79 DF,  p-value: 0.2302
model_results$NVDA$FF5FM   # NVDA FF5FM Results
## [1] "FF5FM factors missing."
model_results$TSLA$CAPM
## 
## Call:
## lm(formula = Return ~ Mkt_RF, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.228013 -0.079684  0.004754  0.060796  0.242906 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.023245   0.012338   1.884   0.0631 .
## Mkt_RF      0.004402   0.002368   1.859   0.0667 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1106 on 81 degrees of freedom
## Multiple R-squared:  0.04092,    Adjusted R-squared:  0.02908 
## F-statistic: 3.456 on 1 and 81 DF,  p-value: 0.06667
model_results$TSLA$FF3FM
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.204391 -0.079660 -0.004113  0.069731  0.261093 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.025072   0.012469   2.011   0.0478 *
## Mkt_RF      0.003511   0.002521   1.393   0.1676  
## SMB         0.004308   0.004385   0.983   0.3288  
## HML         0.001930   0.002912   0.663   0.5094  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1109 on 79 degrees of freedom
## Multiple R-squared:  0.05866,    Adjusted R-squared:  0.02291 
## F-statistic: 1.641 on 3 and 79 DF,  p-value: 0.1866
model_results$TSLA$FF5FM
## [1] "FF5FM factors missing."
model_results$MSFT$CAPM
## 
## Call:
## lm(formula = Return ~ Mkt_RF, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.106181 -0.032761  0.003609  0.026607  0.184260 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.022427   0.005843   3.838 0.000244 ***
## Mkt_RF      0.001421   0.001121   1.267 0.208859    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05236 on 81 degrees of freedom
## Multiple R-squared:  0.01943,    Adjusted R-squared:  0.007321 
## F-statistic: 1.605 on 1 and 81 DF,  p-value: 0.2089
model_results$MSFT$FF3FM
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.101975 -0.033077  0.003349  0.029035  0.187919 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.177e-02  5.921e-03   3.678 0.000428 ***
## Mkt_RF       1.829e-03  1.197e-03   1.528 0.130491    
## SMB         -2.129e-03  2.082e-03  -1.023 0.309567    
## HML          3.743e-05  1.383e-03   0.027 0.978476    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05267 on 79 degrees of freedom
## Multiple R-squared:  0.03225,    Adjusted R-squared:  -0.004495 
## F-statistic: 0.8777 on 3 and 79 DF,  p-value: 0.4564
model_results$MSFT$FF5FM
## [1] "FF5FM factors missing."
model_results$PG$CAPM
## 
## Call:
## lm(formula = Return ~ Mkt_RF, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097497 -0.018148  0.001241  0.019792  0.064373 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.0095499  0.0030641   3.117  0.00253 **
## Mkt_RF      -0.0004870  0.0005882  -0.828  0.41009   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02746 on 81 degrees of freedom
## Multiple R-squared:  0.008393,   Adjusted R-squared:  -0.003849 
## F-statistic: 0.6856 on 1 and 81 DF,  p-value: 0.4101
model_results$PG$FF3FM
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.087250 -0.016222 -0.000421  0.021598  0.065627 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  8.905e-03  3.056e-03   2.914  0.00464 **
## Mkt_RF      -9.200e-05  6.178e-04  -0.149  0.88201   
## SMB         -2.046e-03  1.075e-03  -1.905  0.06048 . 
## HML         -3.584e-05  7.137e-04  -0.050  0.96008   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02719 on 79 degrees of freedom
## Multiple R-squared:  0.05224,    Adjusted R-squared:  0.01625 
## F-statistic: 1.451 on 3 and 79 DF,  p-value: 0.2342
model_results$PG$FF5FM
## [1] "FF5FM factors missing."
model_results$KO$CAPM
## 
## Call:
## lm(formula = Return ~ Mkt_RF, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.058164 -0.019868 -0.000386  0.017969  0.046594 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0095820  0.0027247   3.517 0.000719 ***
## Mkt_RF      0.0008065  0.0005230   1.542 0.126949    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02442 on 81 degrees of freedom
## Multiple R-squared:  0.02852,    Adjusted R-squared:  0.01653 
## F-statistic: 2.378 on 1 and 81 DF,  p-value: 0.1269
model_results$KO$FF3FM
## 
## Call:
## lm(formula = Return ~ Mkt_RF + SMB + HML, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.049452 -0.016055 -0.002113  0.017651  0.051119 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.0090878  0.0026956   3.371  0.00116 **
## Mkt_RF       0.0009455  0.0005450   1.735  0.08667 . 
## SMB         -0.0004961  0.0009479  -0.523  0.60219   
## HML         -0.0013447  0.0006296  -2.136  0.03579 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02398 on 79 degrees of freedom
## Multiple R-squared:  0.08623,    Adjusted R-squared:  0.05153 
## F-statistic: 2.485 on 3 and 79 DF,  p-value: 0.06672
model_results$KO$FF5FM
## [1] "FF5FM factors missing."