library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyr)
library(dplyr)
library(broom)
library(ggplot2)
library(tibbletime)
## 
## Attaching package: 'tibbletime'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ forecast  8.23.0     ✔ expsmooth 2.3   
## ✔ fma       2.5
library(tidyquant)
## Loading required package: PerformanceAnalytics
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
library(dygraphs)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
#####################################################################################
#####################################################################################
################# 
#################         FAMA FRENCH FACTORS
#################
#####################################################################################
#####################################################################################


# We need to load the file again
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"

temp_file <- tempfile()

download.file(ff_url, temp_file)

FF <- unzip(temp_file)

# Skipping the first 3 rows

#ff_factors_raw_data <- read_csv(ff_factors_raw_data)

#ff_factors_raw_data <- read_csv(ff_factors_raw_data, skip = 3)

FF <- read_csv(FF, skip = 3, col_types = cols(...1 = col_integer(),`Mkt-RF` = col_double(),
                                              SMB = col_double(),
                                              HML = col_double(),
                                              RF = col_double())) %>% rename(date = ...1) %>% subset(date>100000) %>% mutate(date = ymd(parse_date_time(date, "%Y%m")))  %>% na.omit(date)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
FF <- FF  %>% mutate(MKT =`Mkt-RF`+RF)


################ 
################ FF Factors in Time Series
################ 
ggplot(FF, aes(x = date, y = RF)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Risk-free return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(FF, aes(x = date, y = MKT)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Excess market return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(FF, aes(x = date, y = SMB)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Size factor return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(FF, aes(x = date, y = HML)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Value factor return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

################ 
################ cumulative returns
################ 

################# cumulative returns of the Market factor
FF_MKT <- FF %>% select(-SMB,-HML,-'Mkt-RF',-RF)  %>% mutate(MKT = MKT/100)
port_cumulative_ret <- FF_MKT %>%
  mutate(cr = cumprod(1 + MKT))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_minimal() +
  scale_x_date(date_breaks = '10 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the Market factor
FF_MKT <- FF %>% select(-SMB,-HML,-'Mkt-RF',-RF)  %>% mutate(MKT = MKT/100) %>% filter(date >= as.Date('1997-01-01'))
port_cumulative_ret <- FF_MKT %>%
  mutate(cr = cumprod(1 + MKT))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_minimal() +
  scale_x_date(date_breaks = '2 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the SMB factor
FF_SMB <- FF %>% select(date, SMB) %>% mutate(SMB = SMB/100)
port_cumulative_ret <- FF_SMB %>%
  mutate(cr = cumprod(1 + SMB))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_classic() +
  scale_y_continuous(breaks = seq(1,10,0.5)) +
  scale_x_date(date_breaks = '10 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the HML factor
FF_HML <- FF %>% select(date, HML) %>% mutate(HML = HML/100)
port_cumulative_ret <- FF_HML %>%
  mutate(cr = cumprod(1 + HML))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_classic() +
  scale_y_continuous(breaks = seq(1,100,5)) +
  scale_x_date(date_breaks = '10 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

#### transforming FF into a long format for histograms
FF_long <- 
  FF %>% 
  select(-RF) %>%
  gather(factor, returns, -date)

################ 
################ Distribution of FF Factors
################ 

################# histograms in one picture
FF_long %>% 
  ggplot(aes(x = returns, fill = factor)) + 
  geom_histogram(alpha = 1, binwidth = 1) 

################# histograms in separate pictures
FF_long %>% 
  ggplot(aes(x = returns, fill = factor)) + 
  geom_histogram(alpha = 1, binwidth = 1,aes(y = stat(width*density)))  +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  scale_y_continuous(labels=percent) +
  facet_wrap(~factor)
## Warning: `stat(width * density)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(width * density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `position_stack()` requires non-overlapping x intervals.

################# density plots in one pictures
FF_long %>% 
  ggplot(aes(x = returns, colour = factor, fill = factor)) +
  stat_density(geom = "line", alpha = 1) +
  xlab("monthly returns") +
  ylab("distribution") 

#####################################################################################
#####################################################################################
################# 
#################         STOCKS
#################
#####################################################################################
#####################################################################################



################# pulling stock data straight from Yahoo Finance
symbols <- c("AMZN","WMT","TGT")
prices <- 
  getSymbols(symbols, src = 'yahoo', 
             from = "1997-06-01", 
             auto.assign = TRUE, 
             warnings = FALSE) %>% 
  map(~Ad(get(.))) %>% 
  reduce(merge) %>%
  `colnames<-`(symbols)

################# converting the price date into monthly returns
returns <- 
  prices %>% 
  to.monthly(indexAt = "firstof", OHLC = FALSE) %>%
  # convert the index to a date
  data.frame(date = index(.)) %>%
  # now remove the index because it got converted to row names
  remove_rownames() %>%
  # convert from wide to long creating new variable asset that will assign ticker, conversion needed to create returns using lagged variables
  gather(asset,prices,-date) %>%
  # compute returns using the usual way
  group_by(asset) %>%  
  mutate(returns=(prices/lag(prices))-1) %>%
  # remove prices
  select(-prices) %>% 
  # convert back to wide by asset
  spread(asset, returns) %>% 
  # remove missings
  na.omit()






################# returns over time

ggplot(returns, aes(x = date, y = AMZN)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Amazon return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(returns, aes(x = date, y = WMT)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Walmart return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(returns, aes(x = date, y = TGT)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) + 
  labs(title = "Target return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

################# cumulative returns
amaz <- returns %>% select(date, AMZN)
port_cumulative_ret <- amaz %>%
  mutate(cr = cumprod(1 + AMZN))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_classic() +
  scale_x_date(date_breaks = '5 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

wal <- returns %>% select(date, WMT)
port_cumulative_ret <- wal %>%
  mutate(cr = cumprod(1 + WMT))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_classic() +
  scale_x_date(date_breaks = '5 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

tgt <- returns %>% select(date, TGT)
port_cumulative_ret <- tgt %>%
  mutate(cr = cumprod(1 + TGT))
port_cumulative_ret %>%
  ggplot(aes(x = date, y = cr)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative Returns',
       title = 'Portfolio Cumulative Returns') +
  theme_classic() +
  scale_x_date(date_breaks = '5 years',
               date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# visualizgin the returns
################# back to long from wide
asset_returns_long <- 
  returns %>% 
  gather(asset, returns, -date)

################# histograms
asset_returns_long %>% 
  # pipe to ggplot, aes is a function for aestethics of a plot, returns on x axis, fill means different colors for different assets, alpha describes the intensity of colors, binwidth is selfexplanatory
  ggplot(aes(x = returns, fill = asset)) + 
  geom_histogram(alpha = 1, binwidth = .01)

################# separate histograms
asset_returns_long %>% 
  # pipe to ggplot, aes is a function for aestethics of a plot, returns on x axis, fill means different colors for different assets, alpha describes the intensity of colors, binwidth is selfexplanatory
  ggplot(aes(x = returns, fill = asset)) + 
  geom_histogram(alpha = 1, binwidth = .01) +
  facet_wrap(~asset)

################# densities
asset_returns_long %>% 
  ggplot(aes(x = returns, colour = asset, fill = asset)) +
  stat_density(geom = "line", alpha = 1) +
  xlab("monthly returns") +
  ylab("distribution") 

#####################################################################################
#####################################################################################
################# 
#################         3 FACTOR MODELS
#################
#####################################################################################
#####################################################################################

##################### merging asset returns with factors, and converting returns to excess returns
ff_assets <-
  returns %>% 
  left_join(FF, by = "date") %>%
  mutate(MKT_RF = MKT/100,
         SMB = SMB/100,
         HML = HML/100,
         RF = RF/100,
         AMZN = AMZN-RF,
         TGT = TGT-RF,
         WMT = WMT-RF) %>%
  select(-RF,-MKT) %>%
  na.omit()

##################### modelling


##################### 
##################### 
########
######## AMAZON
########
##################### 
##################### 

## estimating CAPM model
capm_amazon <- lm(AMZN ~ MKT_RF,data=ff_assets)
summary(capm_amazon)
## 
## Call:
## lm(formula = AMZN ~ MKT_RF, data = ff_assets)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42361 -0.07216 -0.01373  0.04509  1.17856 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.020359   0.008244   2.469   0.0141 *  
## MKT_RF      1.693870   0.174221   9.723   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1459 on 321 degrees of freedom
## Multiple R-squared:  0.2275, Adjusted R-squared:  0.2251 
## F-statistic: 94.53 on 1 and 321 DF,  p-value: < 2.2e-16
## estimating 3 factor model
ff3_amazon <- lm(AMZN ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_amazon)
## 
## Call:
## lm(formula = AMZN ~ MKT_RF + SMB + HML, data = ff_assets)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41718 -0.06070 -0.01494  0.04234  1.13772 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.021858   0.007867   2.779  0.00578 ** 
## MKT_RF       1.699502   0.171801   9.892  < 2e-16 ***
## SMB         -0.416315   0.247578  -1.682  0.09364 .  
## HML         -1.315150   0.227165  -5.789 1.69e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1391 on 319 degrees of freedom
## Multiple R-squared:  0.302,  Adjusted R-squared:  0.2955 
## F-statistic: 46.01 on 3 and 319 DF,  p-value: < 2.2e-16
## plotting realized returns vs predicted from the CAPM and the 3-factor model
amazon <- data.frame(ff_assets$AMZN, predict(ff3_amazon), predict(capm_amazon))
ggplot(amazon) + geom_point(aes(x = ff_assets$AMZN, y = predict(ff3_amazon)),color="red") + geom_point(aes(x = ff_assets$AMZN, y = predict(capm_amazon)))  + geom_abline(intercept = 0)

######## WALMART

## estimating 3 factor model
ff3_walmart <- lm(WMT ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_walmart)
## 
## Call:
## lm(formula = WMT ~ MKT_RF + SMB + HML, data = ff_assets)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.197907 -0.030447  0.000483  0.026036  0.195817 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.005070   0.002986   1.698   0.0904 .  
## MKT_RF       0.586768   0.065204   8.999  < 2e-16 ***
## SMB         -0.613113   0.093963  -6.525 2.67e-10 ***
## HML         -0.136974   0.086216  -1.589   0.1131    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05279 on 319 degrees of freedom
## Multiple R-squared:  0.2421, Adjusted R-squared:  0.235 
## F-statistic: 33.97 on 3 and 319 DF,  p-value: < 2.2e-16
## estimating CAPM model
capm_walmart <- lm(WMT ~ MKT_RF,data=ff_assets)
summary(capm_walmart)
## 
## Call:
## lm(formula = WMT ~ MKT_RF, data = ff_assets)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.196245 -0.033012 -0.001799  0.030789  0.219836 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.005168   0.003168   1.632    0.104    
## MKT_RF      0.484538   0.066940   7.238 3.37e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05605 on 321 degrees of freedom
## Multiple R-squared:  0.1403, Adjusted R-squared:  0.1376 
## F-statistic: 52.39 on 1 and 321 DF,  p-value: 3.373e-12
## plotting realized returns vs predicted from the CAPM and the 3-factor model
walmart <- data.frame(ff_assets$WMT, predict(ff3_walmart), predict(capm_walmart))
ggplot(walmart) + geom_point(aes(x = ff_assets$WMT, y = predict(ff3_walmart)),color="red") + geom_point(aes(x = ff_assets$WMT, y = predict(capm_walmart))) + geom_abline(intercept = 0) + xlim(-0.3,0.3) + ylim(-0.3,0.3)

######## TARGET
## estimating 3 factor model
ff3_target <- lm(TGT ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_target)
## 
## Call:
## lm(formula = TGT ~ MKT_RF + SMB + HML, data = ff_assets)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31411 -0.04427 -0.00571  0.04188  0.27529 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.002506   0.003949   0.635   0.5261    
## MKT_RF       1.031054   0.086237  11.956   <2e-16 ***
## SMB         -0.246262   0.124274  -1.982   0.0484 *  
## HML          0.248993   0.114027   2.184   0.0297 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06982 on 319 degrees of freedom
## Multiple R-squared:  0.3153, Adjusted R-squared:  0.3088 
## F-statistic: 48.96 on 3 and 319 DF,  p-value: < 2.2e-16
## estimating CAPM model
capm_target <- lm(TGT ~ MKT_RF,data=ff_assets)
summary(capm_target)
## 
## Call:
## lm(formula = TGT ~ MKT_RF, data = ff_assets)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.289315 -0.044019 -0.004299  0.040144  0.267444 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.002935   0.003997   0.734    0.463    
## MKT_RF      0.974126   0.084470  11.532   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07073 on 321 degrees of freedom
## Multiple R-squared:  0.2929, Adjusted R-squared:  0.2907 
## F-statistic:   133 on 1 and 321 DF,  p-value: < 2.2e-16
## plotting realized returns vs predicted from the CAPM and the 3-factor model
target <- data.frame(ff_assets$TGT, predict(ff3_target), predict(capm_target))
ggplot(target) + geom_point(aes(x = ff_assets$TGT, y = predict(ff3_target)),color="red") + geom_point(aes(x = ff_assets$TGT, y = predict(capm_target))) + geom_abline(intercept = 0) + xlim(-0.3,0.3) + ylim(-0.3,0.3)

############# out of sample prediction
### pre_COVID sample to train the model
ff3_amazon_pred <- subset(ff_assets,date<"2020-01-01")
### post_COVID sample to predict the returns
ff3_amazon_fore <- subset(ff_assets,date>"2019-12-01")
### train the 3 factor model on pre COVID sample
ff3_amazon_1    <- lm(AMZN ~ MKT_RF + SMB + HML,data=ff3_amazon_pred)
### train the CAPM model on pre COVID sample
ff3_amazon_2    <- lm(AMZN ~ MKT_RF,data=ff3_amazon_pred)
### plot the realized returns vs predicted returns post COVID
AMZN_post <- data.frame(date=ff3_amazon_fore$date,realized=ff3_amazon_fore$AMZN, predicted_ff=predict(ff3_amazon_1, ff3_amazon_fore), predicted_capm=predict(ff3_amazon_2, ff3_amazon_fore))

ggplot(AMZN_post) + geom_point(aes(x = realized, y = predicted_ff),color="red") + 
  geom_point(aes(x = realized, y = predicted_capm),color="blue") + 
  geom_abline(intercept = 0)

ggplot(AMZN_post) + 
  geom_line(aes(x = date, y = predicted_ff, colour = "FF"),color="red") + 
  geom_line(aes(x = date, y = predicted_capm, colour = "CAPM"),color="blue") +
  geom_line(aes(x = date, y = realized, colour = "Realized"),color="black") +
  labs(title = "Target return", y = "Returns", x = "Calendar Time") +
  scale_colour_manual("", 
                      breaks = c("FF", "CAPM", "realized"),
                      values = c("red", "blue", "green"))

############# out of sample prediction
### pre_COVID sample to train the model
ff3_walmart_pred <- subset(ff_assets,date<"2020-01-01")
### post_COVID sample to predict the returns
ff3_walmart_fore <- subset(ff_assets,date>"2019-12-01")
### train the 3 factor model on pre COVID sample
ff3_walmart_1    <- lm(WMT ~ MKT_RF + SMB + HML,data=ff3_walmart_pred)
### train the CAPM model on pre COVID sample
ff3_walmart_2    <- lm(WMT ~ MKT_RF,data=ff3_walmart_pred)
### plot the realized returns vs predicted returns post COVID
WMT_post <- data.frame(date=ff3_walmart_fore$date,realized=ff3_walmart_fore$WMT, predicted_ff=predict(ff3_walmart_1, ff3_walmart_fore), predicted_capm=predict(ff3_walmart_2, ff3_walmart_fore))

ggplot(WMT_post) + geom_point(aes(x = realized, y = predicted_ff),color="red") + 
  geom_point(aes(x = realized, y = predicted_capm),color="blue") + 
  geom_abline(intercept = 0)

ggplot(WMT_post) + 
  geom_line(aes(x = date, y = predicted_ff, colour = "FF"),color="red") + 
  geom_line(aes(x = date, y = predicted_capm, colour = "CAPM"),color="blue") +
  geom_line(aes(x = date, y = realized, colour = "Realized"),color="black") +
  labs(title = "Target return", y = "Returns", x = "Calendar Time") +
  scale_colour_manual("", 
                      breaks = c("FF", "CAPM", "realized"),
                      values = c("red", "blue", "green"))

ggplot(WMT_post) + 
  geom_line(aes(x = date, y = predicted_capm),color="blue") +
  geom_line(aes(x = date, y = realized),color="black")

ggplot(WMT_post) + 
  geom_line(aes(x = date, y = predicted_ff),color="red") + 
  geom_line(aes(x = date, y = realized),color="black")

# step 1.. read in data from yahoo finance 
library(tidyverse)
library(lubridate)
library(quantmod)
library(tidyr)
library(dplyr)
library(broom)
library(ggplot2)
library(tibbletime)
library(fpp2)
library(tidyquant)
library(dygraphs)
library(scales)

# Downloading historical prices for FAANG stocks
symbols <- c("META", "AMZN", "AAPL", "NFLX", "GOOG")
prices_list <- list()

for (symbol in symbols) {
  prices_list[[symbol]] <- tryCatch({
    Ad(getSymbols(symbol, src = 'yahoo', from = "2015-03-30", to = "2024-05-01", auto.assign = FALSE))
  }, error = function(e) {
    warning(paste("Error downloading data for", symbol))
    NULL
  })
}

# Check the structure of each data frame in the list
lapply(prices_list, str)
## An xts object on 2015-03-30 / 2024-04-30 containing: 
##   Data:    double [2288, 1]
##   Columns: META.Adjusted
##   Index:   Date [2288] (TZ: "UTC")
##   xts Attributes:
##     $ src    : chr "yahoo"
##     $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing: 
##   Data:    double [2288, 1]
##   Columns: AMZN.Adjusted
##   Index:   Date [2288] (TZ: "UTC")
##   xts Attributes:
##     $ src    : chr "yahoo"
##     $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing: 
##   Data:    double [2288, 1]
##   Columns: AAPL.Adjusted
##   Index:   Date [2288] (TZ: "UTC")
##   xts Attributes:
##     $ src    : chr "yahoo"
##     $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing: 
##   Data:    double [2288, 1]
##   Columns: NFLX.Adjusted
##   Index:   Date [2288] (TZ: "UTC")
##   xts Attributes:
##     $ src    : chr "yahoo"
##     $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing: 
##   Data:    double [2288, 1]
##   Columns: GOOG.Adjusted
##   Index:   Date [2288] (TZ: "UTC")
##   xts Attributes:
##     $ src    : chr "yahoo"
##     $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## $META
## NULL
## 
## $AMZN
## NULL
## 
## $AAPL
## NULL
## 
## $NFLX
## NULL
## 
## $GOOG
## NULL
# Combine the data into a single data frame
valid_prices <- prices_list[!sapply(prices_list, is.null)]
prices <- do.call(merge, valid_prices)
colnames(prices) <- symbols[!sapply(prices_list, is.null)]

# Convert the combined data to monthly and adjust dates to the 1st of the month
prices <- prices %>%
  to.monthly(indexAt = "lastof", OHLC = FALSE) %>%
  data.frame(date = index(.)) %>%
  remove_rownames() %>%
  mutate(date = ceiling_date(date, "month") - days(1)) %>%
  mutate(date = date + days(1 - day(date)))

# Create a full sequence of dates from April 2015 to May 2024
full_dates <- data.frame(date = seq(as.Date("2015-04-01"), as.Date("2024-05-01"), by = "month"))

# Merge with the full sequence of dates and fill missing values with the mean
prices <- full_dates %>%
  left_join(prices, by = "date") %>%
  mutate(across(-date, ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# Converting the price data into monthly returns
returns <- prices %>%
  gather(asset, prices, -date) %>%
  group_by(asset) %>%
  mutate(returns = (prices / lag(prices)) - 1) %>%
  select(-prices) %>%
  spread(asset, returns) %>%
  na.omit()

# Visualizing returns over time for each stock
ggplot(returns, aes(x = date, y = AMZN)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
  labs(title = "Amazon return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(returns, aes(x = date, y = AAPL)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
  labs(title = "Apple return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(returns, aes(x = date, y = NFLX)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
  labs(title = "Netflix return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

ggplot(returns, aes(x = date, y = GOOG)) +
  geom_line() + theme_minimal() +
  scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
  labs(title = "Google return", y = "Percent", x = "Years", colour = "") +
  theme(legend.position = "none")

# Visualizing cumulative returns
for (stock in symbols) {
  stock_data <- returns %>% select(date, all_of(stock))
  port_cumulative_ret <- stock_data %>%
    mutate(cr = cumprod(1 + !!sym(stock)))
  
  ggplot(port_cumulative_ret, aes(x = date, y = cr)) +
    geom_line() +
    labs(x = 'Date',
         y = 'Cumulative Returns',
         title = paste(stock, 'Cumulative Returns')) +
    theme_classic() +
    scale_x_date(date_breaks = '5 years',
                 date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))
}

# Visualizing distributions
asset_returns_long <- returns %>% gather(asset, returns, -date)

# Histograms
asset_returns_long %>%
  ggplot(aes(x = returns, fill = asset)) +
  geom_histogram(alpha = 1, binwidth = .01)

# Separate histograms
asset_returns_long %>%
  ggplot(aes(x = returns, fill = asset)) +
  geom_histogram(alpha = 1, binwidth = .01) +
  facet_wrap(~asset)

# Densities
asset_returns_long %>%
  ggplot(aes(x = returns, colour = asset, fill = asset)) +
  stat_density(geom = "line", alpha = 1) +
  xlab("monthly returns") +
  ylab("distribution")

# Question A)

# Load the Fama-French factors data
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp_file <- tempfile()
download.file(ff_url, temp_file)
ff_factors_file <- unzip(temp_file)

FF <- read_csv(ff_factors_file, skip = 3, col_types = cols(...1 = col_integer(),
                                                           `Mkt-RF` = col_double(),
                                                           SMB = col_double(),
                                                           HML = col_double(),
                                                           RF = col_double())) %>% 
  rename(date = ...1) %>%
  filter(date >= 201504 & date <= 202404) %>%
  mutate(date = ymd(parse_date_time(date, "%Y%m"))) %>%
  na.omit()
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
FF <- FF %>% mutate(MKT_RF = `Mkt-RF` / 100, SMB = SMB / 100, HML = HML / 100, RF = RF / 100)
# Assuming 'returns' is the FAANG stock returns dataframe
ff_assets <- returns %>% left_join(FF, by = "date")
# Function to estimate CAPM model and return the beta
estimate_capm <- function(asset_returns, market_returns) {
  model <- lm(asset_returns ~ market_returns)
  return(coef(model)[2]) # Beta is the slope coefficient
}

# Estimate CAPM beta for each FAANG stock
betas <- ff_assets %>%
  summarise(
    AMZN = estimate_capm(AMZN, MKT_RF),
    AAPL = estimate_capm(AAPL, MKT_RF),
    NFLX = estimate_capm(NFLX, MKT_RF),
    GOOG = estimate_capm(GOOG, MKT_RF),
    META = estimate_capm(META, MKT_RF)
  )

# Determine whether the stock is defensive or cyclical based on beta
betas_desc <- betas %>%
  gather(stock, beta) %>%
  mutate(type = ifelse(beta < 1, "Defensive", "Cyclical"))

print(betas_desc)
## # A tibble: 5 × 3
##   stock  beta type     
##   <chr> <dbl> <chr>    
## 1 AMZN  1.25  Cyclical 
## 2 AAPL  1.18  Cyclical 
## 3 NFLX  1.28  Cyclical 
## 4 GOOG  0.993 Defensive
## 5 META  1.09  Cyclical
library(knitr)
kable(betas_desc, col.names = c("Stock", "Beta", "Type"), caption = "CAPM Betas and Stock Type for FAANG Stocks")
CAPM Betas and Stock Type for FAANG Stocks
Stock Beta Type
AMZN 1.2528713 Cyclical
AAPL 1.1797829 Cyclical
NFLX 1.2759747 Cyclical
GOOG 0.9933472 Defensive
META 1.0856910 Cyclical
# Save the combined summary table to a CSV file
#write.csv(combined_summary_table, "FAANG_stock_analysis_summary.csv", row.names = FALSE)
#part B 
library(tidyverse)
library(broom)
library(knitr)

# Function to estimate Fama-French three-factor model and return the betas
estimate_ff3 <- function(asset_returns, market_returns, smb, hml) {
  model <- lm(asset_returns ~ market_returns + smb + hml)
  tidy(model)[2:4, "estimate"] # Betas are the coefficients for the factors
}


# Estimate Fama-French three-factor model betas for each FAANG stock
ff3_betas <- ff_assets %>%
  summarise(
    AMZN = list(estimate_ff3(AMZN, MKT_RF, SMB, HML)),
    AAPL = list(estimate_ff3(AAPL, MKT_RF, SMB, HML)),
    NFLX = list(estimate_ff3(NFLX, MKT_RF, SMB, HML)),
    GOOG = list(estimate_ff3(GOOG, MKT_RF, SMB, HML)),
    META = list(estimate_ff3(META, MKT_RF, SMB, HML))
  ) %>%
  pivot_longer(cols = everything(), names_to = "Stock", values_to = "Betas") %>%
  unnest(Betas)
# Create separate columns for each factor
ff3_betas <- ff3_betas %>%
  group_by(Stock) %>%
  mutate(Factor = rep(c("Beta_MKT_RF", "Beta_SMB", "Beta_HML"), each = 1)) %>%
  pivot_wider(names_from = Factor, values_from = estimate)
# Output the Fama-French three-factor model betas
kable(ff3_betas, caption = "Fama-French Three-Factor Model Betas for FAANG Stocks")
Fama-French Three-Factor Model Betas for FAANG Stocks
Stock Beta_MKT_RF Beta_SMB Beta_HML
AMZN 1.318308 -0.2338576 -0.9660014
AAPL 1.228317 -0.1987468 -0.5323266
NFLX 1.272063 0.1550635 -0.9693695
GOOG 1.082738 -0.4599591 -0.2967990
META 1.193977 -0.5292724 -0.5627588
# Function to identify the two most sensitive stocks for each factor
most_sensitive_stocks <- function(betas, factor) {
  betas %>%
    arrange(desc(abs(!!sym(factor)))) %>%
    slice(1:2) %>%
    select(Stock, !!sym(factor))
}

# Function to identify the two most sensitive stocks for each factor
most_sensitive_stocks <- function(betas, factor) {
  betas %>%
    arrange(desc(abs(!!sym(factor)))) %>%
    slice(1:2) %>%
    select(Stock, !!sym(factor))
}

# Identify the two most sensitive stocks for each factor
most_sensitive_mkt_rf <- most_sensitive_stocks(ff3_betas, "Beta_MKT_RF")
most_sensitive_smb <- most_sensitive_stocks(ff3_betas, "Beta_SMB")
most_sensitive_hml <- most_sensitive_stocks(ff3_betas, "Beta_HML")

# Output the results
kable(most_sensitive_mkt_rf, col.names = c("Stock", "Beta_MKT_RF"), caption = "Two Most Sensitive Stocks to Market Factor (MKT_RF)")
Two Most Sensitive Stocks to Market Factor (MKT_RF)
Stock Beta_MKT_RF
AAPL 1.228317
AMZN 1.318308
GOOG 1.082738
META 1.193977
NFLX 1.272063
kable(most_sensitive_smb, col.names = c("Stock", "Beta_SMB"), caption = "Two Most Sensitive Stocks to Size Factor (SMB)")
Two Most Sensitive Stocks to Size Factor (SMB)
Stock Beta_SMB
AAPL -0.1987468
AMZN -0.2338576
GOOG -0.4599591
META -0.5292724
NFLX 0.1550635
kable(most_sensitive_hml, col.names = c("Stock", "Beta_HML"), caption = "Two Most Sensitive Stocks to Value Factor (HML)")
Two Most Sensitive Stocks to Value Factor (HML)
Stock Beta_HML
AAPL -0.5323266
AMZN -0.9660014
GOOG -0.2967990
META -0.5627588
NFLX -0.9693695
# Function to estimate CAPM model and return the alpha
estimate_capm_alpha <- function(asset_returns, market_returns, rf) {
  model <- lm(asset_returns ~ market_returns)
  tidy(model)[1, "estimate"] # Alpha is the intercept
}

# Estimate CAPM alphas for each FAANG stock
capm_alphas <- ff_assets %>%
  summarise(
    AMZN = estimate_capm_alpha(AMZN, MKT_RF, RF),
    AAPL = estimate_capm_alpha(AAPL, MKT_RF, RF),
    NFLX = estimate_capm_alpha(NFLX, MKT_RF, RF),
    GOOG = estimate_capm_alpha(GOOG, MKT_RF, RF),
    META = estimate_capm_alpha(META, MKT_RF, RF)
  )

capm_alphas_long <- capm_alphas %>%
  pivot_longer(cols = everything(), names_to = "Stock", values_to = "Alpha_CAPM")

#fama alphas 
# Function to estimate Fama-French three-factor model and return the alpha
estimate_ff3_alpha <- function(asset_returns, market_returns, smb, hml) {
  model <- lm(asset_returns ~ market_returns + smb + hml)
  tidy(model)[1, "estimate"] # Alpha is the intercept
}

# Estimate Fama-French three-factor alphas for each FAANG stock
ff3_alphas <- ff_assets %>%
  summarise(
    AMZN = estimate_ff3_alpha(AMZN, MKT_RF, SMB, HML),
    AAPL = estimate_ff3_alpha(AAPL, MKT_RF, SMB, HML),
    NFLX = estimate_ff3_alpha(NFLX, MKT_RF, SMB, HML),
    GOOG = estimate_ff3_alpha(GOOG, MKT_RF, SMB, HML),
    META = estimate_ff3_alpha(META, MKT_RF, SMB, HML)
  )

ff3_alphas_long <- ff3_alphas %>%
  pivot_longer(cols = everything(), names_to = "Stock", values_to = "Alpha_FF3")