It seems that risk is not directly linked to future returns. Let’s see if we can exploit this. The premise is to invest in low-risk industries and hope for higher returns.
library(tidyverse)
library(lubridate)
library(scales)
library(ggrepel)
library(tidyquant)
library(jsonlite)
theme_set(theme_minimal())
invisible(Sys.setlocale("LC_TIME", "en_US.UTF-8"))
# Load data
library(frenchdata)
industrydata <- frenchdata::download_french_data("49 Industry Portfolios")
industryrets <- industrydata$subsets |>
filter(name == "Average Value Weighted Returns -- Monthly") |>
unnest(data) |>
mutate(date = lubridate::ymd(paste0(date, "01"))) |>
mutate_if(is.numeric, function(x) ifelse(x == -99.99, NA_real_, x / 100)) |>
select(-name) |>
pivot_longer(-date) |>
arrange(name, date) |>
drop_na()
industryrets |> saveRDS("industryrets.RDS")
industryrets <- readRDS("industryrets.RDS") |>
rename(r = value)
annual_riskreturn <- industryrets |>
group_by(name, year = as.integer(year(date))) |> # Ensure year is an integer
filter(n() == 12) |>
summarise(mu = prod(1 + r) - 1,
sd = sd(r) * sqrt(12))
library(gganimate)
animated_plot <- annual_riskreturn |>
ggplot(aes(x = sd, y = mu, color = name)) +
geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.3, fullrange = T) +
geom_point(size = 2) +
scale_x_continuous(labels = percent) +
scale_y_continuous(labels = percent) +
labs(x = "Risk (annualized standard deviation)",
y = "Return (annualized mean)",
title = "Risk vs Return by Industry ({frame_time})",
subtitle = "Each point represents an industry in a given year") +
coord_cartesian(xlim = c(0, 1), ylim = c(-0.8, 1.1)) +
theme_minimal() +
theme(legend.position = "none") +
transition_time(year) +
ease_aes('linear')
# Save animation to an HTML file
animate(animated_plot, nframes = 300, fps = 10, width = 800, height = 600, renderer = gifski_renderer("animated_plot.gif"))
knitr::include_graphics("animated_plot.gif")
annual_riskreturn |>
group_by(year) |>
slice_max(sd) |>
mutate(decade = paste0(floor(year / 10) * 10, "s"),
y = str_sub(year, -1, -1)) |>
ggplot(aes(x = y, y = decade)) +
geom_raster(aes(fill = name), interpolate = F) +
geom_text(aes(label = name), size = 2.5) +
labs(x = "Year", y = "Decade", title = "Industry with highest risk by year") +
theme(legend.position = "none")
annual_riskreturn |>
group_by(year) |>
slice_min(sd) |>
mutate(decade = paste0(floor(year / 10) * 10, "s"),
y = str_sub(year, -1, -1)) |>
ggplot(aes(x = y, y = decade)) +
geom_raster(aes(fill = name), interpolate = F) +
geom_text(aes(label = name), size = 2.5) +
labs(x = "Year", y = "Decade", title = "Industry with lowest risk by year") +
theme(legend.position = "none")
annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(mu1 = lead(mu)) |>
group_by(year) |>
slice_max(sd) |>
mutate(decade = paste0(floor(year / 10) * 10, "s"),
y = str_sub(year, -1, -1)) |>
ggplot(aes(x = y, y = decade)) +
geom_raster(aes(fill = mu1), interpolate = F) +
geom_text(aes(label = percent(mu1, accuracy = 0.1)), size = 2.5) +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
labs(x = "Year", y = "Decade", title = "Following year return by industry with highest risk") +
theme(legend.position = "none")
annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(mu1 = lead(mu)) |>
group_by(year) |>
slice_min(sd) |>
mutate(decade = paste0(floor(year / 10) * 10, "s"),
y = str_sub(year, -1, -1)) |>
ggplot(aes(x = y, y = decade)) +
geom_raster(aes(fill = mu1), interpolate = F) +
geom_text(aes(label = percent(mu1, accuracy = 0.1)), size = 2.5) +
scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
labs(x = "Year", y = "Decade", title = "Following year return by industry with lowest risk") +
theme(legend.position = "none")
max_sd <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_max(sd0, with_ties = F) |>
mutate(mu = replace_na(mu, 0)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
min_sd <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
# filter(!is.na(sd0)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_min(sd0, with_ties = F) |>
# filter(!is.na(mu)) |>
mutate(mu = replace_na(mu, 0)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
max_sd |>
ggplot(aes(x = year, y = p)) +
geom_line(aes(color = "Highest previous year risk")) +
geom_line(data = min_sd, aes(x = year, y = p, color = "Lowest previous year risk")) +
scale_y_log10(labels = scales::dollar) +
labs(x = "Year", y = "Portfolio value", title = "One industry strategy")
While the high-risk strategy has outperformed the low-risk strategy in terms of portfolio value, the low-risk strategy has been more stable. The Sharpe ratio of the low-risk strategy exceeds that of the high-risk strategy.
bind_rows(max_sd |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "Highest risk") |>
select(strategy, everything()),
min_sd |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "Lowest risk") |>
select(strategy, everything())) |>
mutate(AnnualizedReturn = percent(AnnualizedReturn, accuracy = 0.1),
`AnnualizedSharpe(Rf=0%)` = round(`AnnualizedSharpe(Rf=0%)`, 2),
AnnualizedStdDev = percent(AnnualizedStdDev, accuracy = 0.1)) |>
knitr::kable()
strategy | AnnualizedReturn | AnnualizedSharpe(Rf=0%) | AnnualizedStdDev |
---|---|---|---|
Highest risk | 49.5% | 1.17 | 42.4% |
Lowest risk | 10.1% | 1.90 | 5.3% |
Instead of putting all of our money on one industry, we could diversify our investment across more than one industry. Let’s see how a diversified portfolio would have performed.
# 3 industries
max_sd3 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_max(sd0, with_ties = F, n = 3) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarise(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
min_sd3 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_min(sd0, with_ties = F, n = 3) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
# 5 industries
max_sd5 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_max(sd0, with_ties = F, n = 5) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
min_sd5 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_min(sd0, with_ties = F, n = 5) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
# 10 industries
max_sd10 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_max(sd0, with_ties = F, n = 10) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
min_sd10 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_min(sd0, with_ties = F, n = 10) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
# 20 industries
max_sd20 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_max(sd0, with_ties = F, n = 20) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
min_sd20 <- annual_riskreturn |>
arrange(name, year) |>
group_by(name) |>
mutate(sd0 = lag(sd)) |>
mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
group_by(year) |>
slice_min(sd0, with_ties = F, n = 20) |>
mutate(mu = replace_na(mu, 0)) |>
group_by(year) |>
summarize(mu = mean(mu)) |>
ungroup() |>
mutate(r = mu,
p = cumprod(1 + r)*100)
bind_rows(max_sd |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "1 industry (highest risk)") |>
select(strategy, everything()),
min_sd |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "1 industry (lowest risk)") |>
select(strategy, everything()),
max_sd3 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "3 industries (highest risk)") |>
select(strategy, everything()),
min_sd3 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "3 industries (lowest risk)") |>
select(strategy, everything()),
max_sd5 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "5 industries (highest risk)") |>
select(strategy, everything()),
min_sd5 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "5 industries (lowest risk)") |>
select(strategy, everything()),
max_sd10 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "10 industries (highest risk)") |>
select(strategy, everything()),
min_sd10 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "10 industries (lowest risk)") |>
select(strategy, everything()),
max_sd20 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "20 industries (highest risk)") |>
select(strategy, everything()),
min_sd20 |>
mutate(date = ymd(paste0(year, "0101"))) |>
tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
mutate(strategy = "20 industries (lowest risk)") |>
select(strategy, everything())) |>
mutate(AnnualizedReturn = percent(AnnualizedReturn, accuracy = 0.1),
`AnnualizedSharpe(Rf=0%)` = round(`AnnualizedSharpe(Rf=0%)`, 2),
AnnualizedStdDev = percent(AnnualizedStdDev, accuracy = 0.1)) |>
arrange(str_detect(strategy, "low")) |>
knitr::kable()
strategy | AnnualizedReturn | AnnualizedSharpe(Rf=0%) | AnnualizedStdDev |
---|---|---|---|
1 industry (highest risk) | 49.5% | 1.17 | 42.4% |
3 industries (highest risk) | 41.7% | 1.53 | 27.2% |
5 industries (highest risk) | 38.0% | 1.66 | 22.8% |
10 industries (highest risk) | 33.0% | 1.79 | 18.5% |
20 industries (highest risk) | 28.4% | 1.88 | 15.1% |
1 industry (lowest risk) | 10.1% | 1.90 | 5.3% |
3 industries (lowest risk) | 11.4% | 1.91 | 6.0% |
5 industries (lowest risk) | 12.3% | 1.93 | 6.4% |
10 industries (lowest risk) | 13.8% | 1.96 | 7.0% |
20 industries (lowest risk) | 15.9% | 1.99 | 8.0% |
The highest SR is achieved investing in the 20 industries with the lowest risk in the previous year. But even the lowest SR low-risk strategy outperforms the highest SR high-risk strategy.