library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyr)
library(dplyr)
library(broom)
library(ggplot2)
library(tibbletime)
##
## Attaching package: 'tibbletime'
##
## The following object is masked from 'package:stats':
##
## filter
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ forecast 8.23.0 ✔ expsmooth 2.3
## ✔ fma 2.5
library(tidyquant)
## Loading required package: PerformanceAnalytics
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
library(dygraphs)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
#####################################################################################
#####################################################################################
#################
################# FAMA FRENCH FACTORS
#################
#####################################################################################
#####################################################################################
# We need to load the file again
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp_file <- tempfile()
download.file(ff_url, temp_file)
FF <- unzip(temp_file)
# Skipping the first 3 rows
#ff_factors_raw_data <- read_csv(ff_factors_raw_data)
#ff_factors_raw_data <- read_csv(ff_factors_raw_data, skip = 3)
FF <- read_csv(FF, skip = 3, col_types = cols(...1 = col_integer(),`Mkt-RF` = col_double(),
SMB = col_double(),
HML = col_double(),
RF = col_double())) %>% rename(date = ...1) %>% subset(date>100000) %>% mutate(date = ymd(parse_date_time(date, "%Y%m"))) %>% na.omit(date)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
FF <- FF %>% mutate(MKT =`Mkt-RF`+RF)
################
################ FF Factors in Time Series
################
ggplot(FF, aes(x = date, y = RF)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Risk-free return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(FF, aes(x = date, y = MKT)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Excess market return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(FF, aes(x = date, y = SMB)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Size factor return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(FF, aes(x = date, y = HML)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "10 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Value factor return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

################
################ cumulative returns
################
################# cumulative returns of the Market factor
FF_MKT <- FF %>% select(-SMB,-HML,-'Mkt-RF',-RF) %>% mutate(MKT = MKT/100)
port_cumulative_ret <- FF_MKT %>%
mutate(cr = cumprod(1 + MKT))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_minimal() +
scale_x_date(date_breaks = '10 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the Market factor
FF_MKT <- FF %>% select(-SMB,-HML,-'Mkt-RF',-RF) %>% mutate(MKT = MKT/100) %>% filter(date >= as.Date('1997-01-01'))
port_cumulative_ret <- FF_MKT %>%
mutate(cr = cumprod(1 + MKT))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_minimal() +
scale_x_date(date_breaks = '2 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the SMB factor
FF_SMB <- FF %>% select(date, SMB) %>% mutate(SMB = SMB/100)
port_cumulative_ret <- FF_SMB %>%
mutate(cr = cumprod(1 + SMB))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_classic() +
scale_y_continuous(breaks = seq(1,10,0.5)) +
scale_x_date(date_breaks = '10 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# cumulative returns of the HML factor
FF_HML <- FF %>% select(date, HML) %>% mutate(HML = HML/100)
port_cumulative_ret <- FF_HML %>%
mutate(cr = cumprod(1 + HML))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_classic() +
scale_y_continuous(breaks = seq(1,100,5)) +
scale_x_date(date_breaks = '10 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

#### transforming FF into a long format for histograms
FF_long <-
FF %>%
select(-RF) %>%
gather(factor, returns, -date)
################
################ Distribution of FF Factors
################
################# histograms in one picture
FF_long %>%
ggplot(aes(x = returns, fill = factor)) +
geom_histogram(alpha = 1, binwidth = 1)

################# histograms in separate pictures
FF_long %>%
ggplot(aes(x = returns, fill = factor)) +
geom_histogram(alpha = 1, binwidth = 1,aes(y = stat(width*density))) +
geom_bar(aes(y = (..count..)/sum(..count..))) +
scale_y_continuous(labels=percent) +
facet_wrap(~factor)
## Warning: `stat(width * density)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(width * density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `position_stack()` requires non-overlapping x intervals.

################# density plots in one pictures
FF_long %>%
ggplot(aes(x = returns, colour = factor, fill = factor)) +
stat_density(geom = "line", alpha = 1) +
xlab("monthly returns") +
ylab("distribution")

#####################################################################################
#####################################################################################
#################
################# STOCKS
#################
#####################################################################################
#####################################################################################
################# pulling stock data straight from Yahoo Finance
symbols <- c("AMZN","WMT","TGT")
prices <-
getSymbols(symbols, src = 'yahoo',
from = "1997-06-01",
auto.assign = TRUE,
warnings = FALSE) %>%
map(~Ad(get(.))) %>%
reduce(merge) %>%
`colnames<-`(symbols)
################# converting the price date into monthly returns
returns <-
prices %>%
to.monthly(indexAt = "firstof", OHLC = FALSE) %>%
# convert the index to a date
data.frame(date = index(.)) %>%
# now remove the index because it got converted to row names
remove_rownames() %>%
# convert from wide to long creating new variable asset that will assign ticker, conversion needed to create returns using lagged variables
gather(asset,prices,-date) %>%
# compute returns using the usual way
group_by(asset) %>%
mutate(returns=(prices/lag(prices))-1) %>%
# remove prices
select(-prices) %>%
# convert back to wide by asset
spread(asset, returns) %>%
# remove missings
na.omit()
################# returns over time
ggplot(returns, aes(x = date, y = AMZN)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Amazon return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(returns, aes(x = date, y = WMT)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Walmart return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(returns, aes(x = date, y = TGT)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge=2)) +
labs(title = "Target return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

################# cumulative returns
amaz <- returns %>% select(date, AMZN)
port_cumulative_ret <- amaz %>%
mutate(cr = cumprod(1 + AMZN))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_classic() +
scale_x_date(date_breaks = '5 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

wal <- returns %>% select(date, WMT)
port_cumulative_ret <- wal %>%
mutate(cr = cumprod(1 + WMT))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_classic() +
scale_x_date(date_breaks = '5 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

tgt <- returns %>% select(date, TGT)
port_cumulative_ret <- tgt %>%
mutate(cr = cumprod(1 + TGT))
port_cumulative_ret %>%
ggplot(aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = 'Portfolio Cumulative Returns') +
theme_classic() +
scale_x_date(date_breaks = '5 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))

################# visualizgin the returns
################# back to long from wide
asset_returns_long <-
returns %>%
gather(asset, returns, -date)
################# histograms
asset_returns_long %>%
# pipe to ggplot, aes is a function for aestethics of a plot, returns on x axis, fill means different colors for different assets, alpha describes the intensity of colors, binwidth is selfexplanatory
ggplot(aes(x = returns, fill = asset)) +
geom_histogram(alpha = 1, binwidth = .01)

################# separate histograms
asset_returns_long %>%
# pipe to ggplot, aes is a function for aestethics of a plot, returns on x axis, fill means different colors for different assets, alpha describes the intensity of colors, binwidth is selfexplanatory
ggplot(aes(x = returns, fill = asset)) +
geom_histogram(alpha = 1, binwidth = .01) +
facet_wrap(~asset)

################# densities
asset_returns_long %>%
ggplot(aes(x = returns, colour = asset, fill = asset)) +
stat_density(geom = "line", alpha = 1) +
xlab("monthly returns") +
ylab("distribution")

#####################################################################################
#####################################################################################
#################
################# 3 FACTOR MODELS
#################
#####################################################################################
#####################################################################################
##################### merging asset returns with factors, and converting returns to excess returns
ff_assets <-
returns %>%
left_join(FF, by = "date") %>%
mutate(MKT_RF = MKT/100,
SMB = SMB/100,
HML = HML/100,
RF = RF/100,
AMZN = AMZN-RF,
TGT = TGT-RF,
WMT = WMT-RF) %>%
select(-RF,-MKT) %>%
na.omit()
##################### modelling
#####################
#####################
########
######## AMAZON
########
#####################
#####################
## estimating CAPM model
capm_amazon <- lm(AMZN ~ MKT_RF,data=ff_assets)
summary(capm_amazon)
##
## Call:
## lm(formula = AMZN ~ MKT_RF, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42361 -0.07216 -0.01373 0.04509 1.17856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.020359 0.008244 2.469 0.0141 *
## MKT_RF 1.693870 0.174221 9.723 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1459 on 321 degrees of freedom
## Multiple R-squared: 0.2275, Adjusted R-squared: 0.2251
## F-statistic: 94.53 on 1 and 321 DF, p-value: < 2.2e-16
## estimating 3 factor model
ff3_amazon <- lm(AMZN ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_amazon)
##
## Call:
## lm(formula = AMZN ~ MKT_RF + SMB + HML, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41718 -0.06070 -0.01494 0.04234 1.13772
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.021858 0.007867 2.779 0.00578 **
## MKT_RF 1.699502 0.171801 9.892 < 2e-16 ***
## SMB -0.416315 0.247578 -1.682 0.09364 .
## HML -1.315150 0.227165 -5.789 1.69e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1391 on 319 degrees of freedom
## Multiple R-squared: 0.302, Adjusted R-squared: 0.2955
## F-statistic: 46.01 on 3 and 319 DF, p-value: < 2.2e-16
## plotting realized returns vs predicted from the CAPM and the 3-factor model
amazon <- data.frame(ff_assets$AMZN, predict(ff3_amazon), predict(capm_amazon))
ggplot(amazon) + geom_point(aes(x = ff_assets$AMZN, y = predict(ff3_amazon)),color="red") + geom_point(aes(x = ff_assets$AMZN, y = predict(capm_amazon))) + geom_abline(intercept = 0)

######## WALMART
## estimating 3 factor model
ff3_walmart <- lm(WMT ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_walmart)
##
## Call:
## lm(formula = WMT ~ MKT_RF + SMB + HML, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.197907 -0.030447 0.000483 0.026036 0.195817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005070 0.002986 1.698 0.0904 .
## MKT_RF 0.586768 0.065204 8.999 < 2e-16 ***
## SMB -0.613113 0.093963 -6.525 2.67e-10 ***
## HML -0.136974 0.086216 -1.589 0.1131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05279 on 319 degrees of freedom
## Multiple R-squared: 0.2421, Adjusted R-squared: 0.235
## F-statistic: 33.97 on 3 and 319 DF, p-value: < 2.2e-16
## estimating CAPM model
capm_walmart <- lm(WMT ~ MKT_RF,data=ff_assets)
summary(capm_walmart)
##
## Call:
## lm(formula = WMT ~ MKT_RF, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.196245 -0.033012 -0.001799 0.030789 0.219836
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005168 0.003168 1.632 0.104
## MKT_RF 0.484538 0.066940 7.238 3.37e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05605 on 321 degrees of freedom
## Multiple R-squared: 0.1403, Adjusted R-squared: 0.1376
## F-statistic: 52.39 on 1 and 321 DF, p-value: 3.373e-12
## plotting realized returns vs predicted from the CAPM and the 3-factor model
walmart <- data.frame(ff_assets$WMT, predict(ff3_walmart), predict(capm_walmart))
ggplot(walmart) + geom_point(aes(x = ff_assets$WMT, y = predict(ff3_walmart)),color="red") + geom_point(aes(x = ff_assets$WMT, y = predict(capm_walmart))) + geom_abline(intercept = 0) + xlim(-0.3,0.3) + ylim(-0.3,0.3)

######## TARGET
## estimating 3 factor model
ff3_target <- lm(TGT ~ MKT_RF + SMB + HML,data=ff_assets)
summary(ff3_target)
##
## Call:
## lm(formula = TGT ~ MKT_RF + SMB + HML, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31411 -0.04427 -0.00571 0.04188 0.27529
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002506 0.003949 0.635 0.5261
## MKT_RF 1.031054 0.086237 11.956 <2e-16 ***
## SMB -0.246262 0.124274 -1.982 0.0484 *
## HML 0.248993 0.114027 2.184 0.0297 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06982 on 319 degrees of freedom
## Multiple R-squared: 0.3153, Adjusted R-squared: 0.3088
## F-statistic: 48.96 on 3 and 319 DF, p-value: < 2.2e-16
## estimating CAPM model
capm_target <- lm(TGT ~ MKT_RF,data=ff_assets)
summary(capm_target)
##
## Call:
## lm(formula = TGT ~ MKT_RF, data = ff_assets)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.289315 -0.044019 -0.004299 0.040144 0.267444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002935 0.003997 0.734 0.463
## MKT_RF 0.974126 0.084470 11.532 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07073 on 321 degrees of freedom
## Multiple R-squared: 0.2929, Adjusted R-squared: 0.2907
## F-statistic: 133 on 1 and 321 DF, p-value: < 2.2e-16
## plotting realized returns vs predicted from the CAPM and the 3-factor model
target <- data.frame(ff_assets$TGT, predict(ff3_target), predict(capm_target))
ggplot(target) + geom_point(aes(x = ff_assets$TGT, y = predict(ff3_target)),color="red") + geom_point(aes(x = ff_assets$TGT, y = predict(capm_target))) + geom_abline(intercept = 0) + xlim(-0.3,0.3) + ylim(-0.3,0.3)

############# out of sample prediction
### pre_COVID sample to train the model
ff3_amazon_pred <- subset(ff_assets,date<"2020-01-01")
### post_COVID sample to predict the returns
ff3_amazon_fore <- subset(ff_assets,date>"2019-12-01")
### train the 3 factor model on pre COVID sample
ff3_amazon_1 <- lm(AMZN ~ MKT_RF + SMB + HML,data=ff3_amazon_pred)
### train the CAPM model on pre COVID sample
ff3_amazon_2 <- lm(AMZN ~ MKT_RF,data=ff3_amazon_pred)
### plot the realized returns vs predicted returns post COVID
AMZN_post <- data.frame(date=ff3_amazon_fore$date,realized=ff3_amazon_fore$AMZN, predicted_ff=predict(ff3_amazon_1, ff3_amazon_fore), predicted_capm=predict(ff3_amazon_2, ff3_amazon_fore))
ggplot(AMZN_post) + geom_point(aes(x = realized, y = predicted_ff),color="red") +
geom_point(aes(x = realized, y = predicted_capm),color="blue") +
geom_abline(intercept = 0)

ggplot(AMZN_post) +
geom_line(aes(x = date, y = predicted_ff, colour = "FF"),color="red") +
geom_line(aes(x = date, y = predicted_capm, colour = "CAPM"),color="blue") +
geom_line(aes(x = date, y = realized, colour = "Realized"),color="black") +
labs(title = "Target return", y = "Returns", x = "Calendar Time") +
scale_colour_manual("",
breaks = c("FF", "CAPM", "realized"),
values = c("red", "blue", "green"))

############# out of sample prediction
### pre_COVID sample to train the model
ff3_walmart_pred <- subset(ff_assets,date<"2020-01-01")
### post_COVID sample to predict the returns
ff3_walmart_fore <- subset(ff_assets,date>"2019-12-01")
### train the 3 factor model on pre COVID sample
ff3_walmart_1 <- lm(WMT ~ MKT_RF + SMB + HML,data=ff3_walmart_pred)
### train the CAPM model on pre COVID sample
ff3_walmart_2 <- lm(WMT ~ MKT_RF,data=ff3_walmart_pred)
### plot the realized returns vs predicted returns post COVID
WMT_post <- data.frame(date=ff3_walmart_fore$date,realized=ff3_walmart_fore$WMT, predicted_ff=predict(ff3_walmart_1, ff3_walmart_fore), predicted_capm=predict(ff3_walmart_2, ff3_walmart_fore))
ggplot(WMT_post) + geom_point(aes(x = realized, y = predicted_ff),color="red") +
geom_point(aes(x = realized, y = predicted_capm),color="blue") +
geom_abline(intercept = 0)

ggplot(WMT_post) +
geom_line(aes(x = date, y = predicted_ff, colour = "FF"),color="red") +
geom_line(aes(x = date, y = predicted_capm, colour = "CAPM"),color="blue") +
geom_line(aes(x = date, y = realized, colour = "Realized"),color="black") +
labs(title = "Target return", y = "Returns", x = "Calendar Time") +
scale_colour_manual("",
breaks = c("FF", "CAPM", "realized"),
values = c("red", "blue", "green"))

ggplot(WMT_post) +
geom_line(aes(x = date, y = predicted_capm),color="blue") +
geom_line(aes(x = date, y = realized),color="black")

ggplot(WMT_post) +
geom_line(aes(x = date, y = predicted_ff),color="red") +
geom_line(aes(x = date, y = realized),color="black")

# step 1.. read in data from yahoo finance
library(tidyverse)
library(lubridate)
library(quantmod)
library(tidyr)
library(dplyr)
library(broom)
library(ggplot2)
library(tibbletime)
library(fpp2)
library(tidyquant)
library(dygraphs)
library(scales)
# Downloading historical prices for FAANG stocks
symbols <- c("META", "AMZN", "AAPL", "NFLX", "GOOG")
prices_list <- list()
for (symbol in symbols) {
prices_list[[symbol]] <- tryCatch({
Ad(getSymbols(symbol, src = 'yahoo', from = "2015-03-30", to = "2024-05-01", auto.assign = FALSE))
}, error = function(e) {
warning(paste("Error downloading data for", symbol))
NULL
})
}
# Check the structure of each data frame in the list
lapply(prices_list, str)
## An xts object on 2015-03-30 / 2024-04-30 containing:
## Data: double [2288, 1]
## Columns: META.Adjusted
## Index: Date [2288] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing:
## Data: double [2288, 1]
## Columns: AMZN.Adjusted
## Index: Date [2288] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing:
## Data: double [2288, 1]
## Columns: AAPL.Adjusted
## Index: Date [2288] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing:
## Data: double [2288, 1]
## Columns: NFLX.Adjusted
## Index: Date [2288] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## An xts object on 2015-03-30 / 2024-04-30 containing:
## Data: double [2288, 1]
## Columns: GOOG.Adjusted
## Index: Date [2288] (TZ: "UTC")
## xts Attributes:
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2024-07-30 21:43:25"
## $META
## NULL
##
## $AMZN
## NULL
##
## $AAPL
## NULL
##
## $NFLX
## NULL
##
## $GOOG
## NULL
# Combine the data into a single data frame
valid_prices <- prices_list[!sapply(prices_list, is.null)]
prices <- do.call(merge, valid_prices)
colnames(prices) <- symbols[!sapply(prices_list, is.null)]
# Convert the combined data to monthly and adjust dates to the 1st of the month
prices <- prices %>%
to.monthly(indexAt = "lastof", OHLC = FALSE) %>%
data.frame(date = index(.)) %>%
remove_rownames() %>%
mutate(date = ceiling_date(date, "month") - days(1)) %>%
mutate(date = date + days(1 - day(date)))
# Create a full sequence of dates from April 2015 to May 2024
full_dates <- data.frame(date = seq(as.Date("2015-04-01"), as.Date("2024-05-01"), by = "month"))
# Merge with the full sequence of dates and fill missing values with the mean
prices <- full_dates %>%
left_join(prices, by = "date") %>%
mutate(across(-date, ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
# Converting the price data into monthly returns
returns <- prices %>%
gather(asset, prices, -date) %>%
group_by(asset) %>%
mutate(returns = (prices / lag(prices)) - 1) %>%
select(-prices) %>%
spread(asset, returns) %>%
na.omit()
# Visualizing returns over time for each stock
ggplot(returns, aes(x = date, y = AMZN)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
labs(title = "Amazon return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(returns, aes(x = date, y = AAPL)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
labs(title = "Apple return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(returns, aes(x = date, y = NFLX)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
labs(title = "Netflix return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

ggplot(returns, aes(x = date, y = GOOG)) +
geom_line() + theme_minimal() +
scale_x_date(date_labels = "%Y", date_breaks = "5 years", guide = guide_axis(n.dodge = 2)) +
labs(title = "Google return", y = "Percent", x = "Years", colour = "") +
theme(legend.position = "none")

# Visualizing cumulative returns
for (stock in symbols) {
stock_data <- returns %>% select(date, all_of(stock))
port_cumulative_ret <- stock_data %>%
mutate(cr = cumprod(1 + !!sym(stock)))
ggplot(port_cumulative_ret, aes(x = date, y = cr)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative Returns',
title = paste(stock, 'Cumulative Returns')) +
theme_classic() +
scale_x_date(date_breaks = '5 years',
date_labels = '%Y') + theme(axis.text.x = element_text(angle = 90))
}
# Visualizing distributions
asset_returns_long <- returns %>% gather(asset, returns, -date)
# Histograms
asset_returns_long %>%
ggplot(aes(x = returns, fill = asset)) +
geom_histogram(alpha = 1, binwidth = .01)

# Separate histograms
asset_returns_long %>%
ggplot(aes(x = returns, fill = asset)) +
geom_histogram(alpha = 1, binwidth = .01) +
facet_wrap(~asset)

# Densities
asset_returns_long %>%
ggplot(aes(x = returns, colour = asset, fill = asset)) +
stat_density(geom = "line", alpha = 1) +
xlab("monthly returns") +
ylab("distribution")

# Question A)
# Load the Fama-French factors data
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp_file <- tempfile()
download.file(ff_url, temp_file)
ff_factors_file <- unzip(temp_file)
FF <- read_csv(ff_factors_file, skip = 3, col_types = cols(...1 = col_integer(),
`Mkt-RF` = col_double(),
SMB = col_double(),
HML = col_double(),
RF = col_double())) %>%
rename(date = ...1) %>%
filter(date >= 201504 & date <= 202404) %>%
mutate(date = ymd(parse_date_time(date, "%Y%m"))) %>%
na.omit()
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
FF <- FF %>% mutate(MKT_RF = `Mkt-RF` / 100, SMB = SMB / 100, HML = HML / 100, RF = RF / 100)
# Assuming 'returns' is the FAANG stock returns dataframe
ff_assets <- returns %>% left_join(FF, by = "date")
# Function to estimate CAPM model and return the beta
estimate_capm <- function(asset_returns, market_returns) {
model <- lm(asset_returns ~ market_returns)
return(coef(model)[2]) # Beta is the slope coefficient
}
# Estimate CAPM beta for each FAANG stock
betas <- ff_assets %>%
summarise(
AMZN = estimate_capm(AMZN, MKT_RF),
AAPL = estimate_capm(AAPL, MKT_RF),
NFLX = estimate_capm(NFLX, MKT_RF),
GOOG = estimate_capm(GOOG, MKT_RF),
META = estimate_capm(META, MKT_RF)
)
# Determine whether the stock is defensive or cyclical based on beta
betas_desc <- betas %>%
gather(stock, beta) %>%
mutate(type = ifelse(beta < 1, "Defensive", "Cyclical"))
print(betas_desc)
## # A tibble: 5 × 3
## stock beta type
## <chr> <dbl> <chr>
## 1 AMZN 1.25 Cyclical
## 2 AAPL 1.18 Cyclical
## 3 NFLX 1.28 Cyclical
## 4 GOOG 0.993 Defensive
## 5 META 1.09 Cyclical
library(knitr)
kable(betas_desc, col.names = c("Stock", "Beta", "Type"), caption = "CAPM Betas and Stock Type for FAANG Stocks")
CAPM Betas and Stock Type for FAANG Stocks
| AMZN |
1.2528713 |
Cyclical |
| AAPL |
1.1797829 |
Cyclical |
| NFLX |
1.2759747 |
Cyclical |
| GOOG |
0.9933472 |
Defensive |
| META |
1.0856910 |
Cyclical |
# Save the combined summary table to a CSV file
#write.csv(combined_summary_table, "FAANG_stock_analysis_summary.csv", row.names = FALSE)
#part B
library(tidyverse)
library(broom)
library(knitr)
# Function to estimate Fama-French three-factor model and return the betas
estimate_ff3 <- function(asset_returns, market_returns, smb, hml) {
model <- lm(asset_returns ~ market_returns + smb + hml)
tidy(model)[2:4, "estimate"] # Betas are the coefficients for the factors
}
# Estimate Fama-French three-factor model betas for each FAANG stock
ff3_betas <- ff_assets %>%
summarise(
AMZN = list(estimate_ff3(AMZN, MKT_RF, SMB, HML)),
AAPL = list(estimate_ff3(AAPL, MKT_RF, SMB, HML)),
NFLX = list(estimate_ff3(NFLX, MKT_RF, SMB, HML)),
GOOG = list(estimate_ff3(GOOG, MKT_RF, SMB, HML)),
META = list(estimate_ff3(META, MKT_RF, SMB, HML))
) %>%
pivot_longer(cols = everything(), names_to = "Stock", values_to = "Betas") %>%
unnest(Betas)
# Create separate columns for each factor
ff3_betas <- ff3_betas %>%
group_by(Stock) %>%
mutate(Factor = rep(c("Beta_MKT_RF", "Beta_SMB", "Beta_HML"), each = 1)) %>%
pivot_wider(names_from = Factor, values_from = estimate)
# Output the Fama-French three-factor model betas
kable(ff3_betas, caption = "Fama-French Three-Factor Model Betas for FAANG Stocks")
Fama-French Three-Factor Model Betas for FAANG Stocks
| AMZN |
1.318308 |
-0.2338576 |
-0.9660014 |
| AAPL |
1.228317 |
-0.1987468 |
-0.5323266 |
| NFLX |
1.272063 |
0.1550635 |
-0.9693695 |
| GOOG |
1.082738 |
-0.4599591 |
-0.2967990 |
| META |
1.193977 |
-0.5292724 |
-0.5627588 |
# Function to identify the two most sensitive stocks for each factor
most_sensitive_stocks <- function(betas, factor) {
betas %>%
arrange(desc(abs(!!sym(factor)))) %>%
slice(1:2) %>%
select(Stock, !!sym(factor))
}
# Function to identify the two most sensitive stocks for each factor
most_sensitive_stocks <- function(betas, factor) {
betas %>%
arrange(desc(abs(!!sym(factor)))) %>%
slice(1:2) %>%
select(Stock, !!sym(factor))
}
# Identify the two most sensitive stocks for each factor
most_sensitive_mkt_rf <- most_sensitive_stocks(ff3_betas, "Beta_MKT_RF")
most_sensitive_smb <- most_sensitive_stocks(ff3_betas, "Beta_SMB")
most_sensitive_hml <- most_sensitive_stocks(ff3_betas, "Beta_HML")
# Output the results
kable(most_sensitive_mkt_rf, col.names = c("Stock", "Beta_MKT_RF"), caption = "Two Most Sensitive Stocks to Market Factor (MKT_RF)")
Two Most Sensitive Stocks to Market Factor (MKT_RF)
| AAPL |
1.228317 |
| AMZN |
1.318308 |
| GOOG |
1.082738 |
| META |
1.193977 |
| NFLX |
1.272063 |
kable(most_sensitive_smb, col.names = c("Stock", "Beta_SMB"), caption = "Two Most Sensitive Stocks to Size Factor (SMB)")
Two Most Sensitive Stocks to Size Factor (SMB)
| AAPL |
-0.1987468 |
| AMZN |
-0.2338576 |
| GOOG |
-0.4599591 |
| META |
-0.5292724 |
| NFLX |
0.1550635 |
kable(most_sensitive_hml, col.names = c("Stock", "Beta_HML"), caption = "Two Most Sensitive Stocks to Value Factor (HML)")
Two Most Sensitive Stocks to Value Factor (HML)
| AAPL |
-0.5323266 |
| AMZN |
-0.9660014 |
| GOOG |
-0.2967990 |
| META |
-0.5627588 |
| NFLX |
-0.9693695 |
# Function to estimate CAPM model and return the alpha
estimate_capm_alpha <- function(asset_returns, market_returns, rf) {
model <- lm(asset_returns ~ market_returns)
tidy(model)[1, "estimate"] # Alpha is the intercept
}
# Estimate CAPM alphas for each FAANG stock
capm_alphas <- ff_assets %>%
summarise(
AMZN = estimate_capm_alpha(AMZN, MKT_RF, RF),
AAPL = estimate_capm_alpha(AAPL, MKT_RF, RF),
NFLX = estimate_capm_alpha(NFLX, MKT_RF, RF),
GOOG = estimate_capm_alpha(GOOG, MKT_RF, RF),
META = estimate_capm_alpha(META, MKT_RF, RF)
)
capm_alphas_long <- capm_alphas %>%
pivot_longer(cols = everything(), names_to = "Stock", values_to = "Alpha_CAPM")
#fama alphas
# Function to estimate Fama-French three-factor model and return the alpha
estimate_ff3_alpha <- function(asset_returns, market_returns, smb, hml) {
model <- lm(asset_returns ~ market_returns + smb + hml)
tidy(model)[1, "estimate"] # Alpha is the intercept
}
# Estimate Fama-French three-factor alphas for each FAANG stock
ff3_alphas <- ff_assets %>%
summarise(
AMZN = estimate_ff3_alpha(AMZN, MKT_RF, SMB, HML),
AAPL = estimate_ff3_alpha(AAPL, MKT_RF, SMB, HML),
NFLX = estimate_ff3_alpha(NFLX, MKT_RF, SMB, HML),
GOOG = estimate_ff3_alpha(GOOG, MKT_RF, SMB, HML),
META = estimate_ff3_alpha(META, MKT_RF, SMB, HML)
)
ff3_alphas_long <- ff3_alphas %>%
pivot_longer(cols = everything(), names_to = "Stock", values_to = "Alpha_FF3")