It seems that risk is not directly linked to future returns. Let’s see if we can exploit this. The premise is to invest in low-risk industries and hope for higher returns.

library(tidyverse)
library(lubridate)
library(scales)
library(ggrepel)
library(tidyquant)
library(jsonlite)
theme_set(theme_minimal())
invisible(Sys.setlocale("LC_TIME", "en_US.UTF-8"))
# Load data
library(frenchdata)

industrydata <- frenchdata::download_french_data("49 Industry Portfolios")

industryrets <- industrydata$subsets |> 
  filter(name == "Average Value Weighted Returns -- Monthly") |> 
  unnest(data) |> 
  mutate(date = lubridate::ymd(paste0(date, "01"))) |> 
  mutate_if(is.numeric, function(x) ifelse(x == -99.99, NA_real_, x / 100)) |> 
  select(-name) |> 
  pivot_longer(-date) |> 
  arrange(name, date) |> 
  drop_na()

industryrets |> saveRDS("industryrets.RDS")
industryrets <- readRDS("industryrets.RDS") |> 
  rename(r = value)
annual_riskreturn <- industryrets |> 
  group_by(name, year = as.integer(year(date))) |>  # Ensure year is an integer
  filter(n() == 12) |> 
  summarise(mu = prod(1 + r) - 1,
            sd = sd(r) * sqrt(12))
library(gganimate)

animated_plot <- annual_riskreturn  |> 
  ggplot(aes(x = sd, y = mu, color = name)) +
  geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 0.3, fullrange = T) +
  geom_point(size = 2) + 
  scale_x_continuous(labels = percent) +
  scale_y_continuous(labels = percent) +
  labs(x = "Risk (annualized standard deviation)",
       y = "Return (annualized mean)",
       title = "Risk vs Return by Industry ({frame_time})",
       subtitle = "Each point represents an industry in a given year") +
  coord_cartesian(xlim = c(0, 1), ylim = c(-0.8, 1.1)) +
  theme_minimal() +
  theme(legend.position = "none") +
  transition_time(year) + 
  ease_aes('linear')

# Save animation to an HTML file
animate(animated_plot, nframes = 300, fps = 10, width = 800, height = 600, renderer = gifski_renderer("animated_plot.gif"))
knitr::include_graphics("animated_plot.gif")

annual_riskreturn |> 
  group_by(year) |> 
  slice_max(sd) |> 
  mutate(decade = paste0(floor(year / 10) * 10, "s"),
         y = str_sub(year, -1, -1)) |> 
  ggplot(aes(x = y, y = decade)) +
  geom_raster(aes(fill = name), interpolate = F) +
  geom_text(aes(label = name), size = 2.5) +
  labs(x = "Year", y = "Decade", title = "Industry with highest risk by year") +
  theme(legend.position = "none")

annual_riskreturn |> 
  group_by(year) |> 
  slice_min(sd) |> 
  mutate(decade = paste0(floor(year / 10) * 10, "s"),
         y = str_sub(year, -1, -1)) |> 
  ggplot(aes(x = y, y = decade)) +
  geom_raster(aes(fill = name), interpolate = F) +
  geom_text(aes(label = name), size = 2.5) +
  labs(x = "Year", y = "Decade", title = "Industry with lowest risk by year") +
  theme(legend.position = "none")

annual_riskreturn |> 
  arrange(name, year) |> 
  group_by(name) |> 
  mutate(mu1 = lead(mu)) |> 
  group_by(year) |> 
  slice_max(sd) |> 
  mutate(decade = paste0(floor(year / 10) * 10, "s"),
         y = str_sub(year, -1, -1)) |> 
  ggplot(aes(x = y, y = decade)) +
  geom_raster(aes(fill = mu1), interpolate = F) +
  geom_text(aes(label = percent(mu1, accuracy = 0.1)), size = 2.5) +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
  labs(x = "Year", y = "Decade", title = "Following year return by industry with highest risk") +
  theme(legend.position = "none")

annual_riskreturn |> 
  arrange(name, year) |> 
  group_by(name) |> 
  mutate(mu1 = lead(mu)) |> 
  group_by(year) |> 
  slice_min(sd) |> 
  mutate(decade = paste0(floor(year / 10) * 10, "s"),
         y = str_sub(year, -1, -1)) |> 
  ggplot(aes(x = y, y = decade)) +
  geom_raster(aes(fill = mu1), interpolate = F) +
  geom_text(aes(label = percent(mu1, accuracy = 0.1)), size = 2.5) +
  scale_fill_gradient2(low = "red", mid = "white", high = "green", midpoint = 0) +
  labs(x = "Year", y = "Decade", title = "Following year return by industry with lowest risk") +
  theme(legend.position = "none")

Strategy

max_sd <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_max(sd0, with_ties = F) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

min_sd <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  # filter(!is.na(sd0)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_min(sd0, with_ties = F) |> 
  # filter(!is.na(mu)) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

max_sd |> 
  ggplot(aes(x = year, y = p)) +
  geom_line(aes(color = "Highest previous year risk")) +
  geom_line(data = min_sd, aes(x = year, y = p, color = "Lowest previous year risk")) +
  scale_y_log10(labels = scales::dollar) +
  labs(x = "Year", y = "Portfolio value", title = "One industry strategy")

While the high-risk strategy has outperformed the low-risk strategy in terms of portfolio value, the low-risk strategy has been more stable. The Sharpe ratio of the low-risk strategy exceeds that of the high-risk strategy.

bind_rows(max_sd |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "Highest risk") |> 
            select(strategy, everything()),
          min_sd |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "Lowest risk") |> 
            select(strategy, everything())) |> 
  mutate(AnnualizedReturn = percent(AnnualizedReturn, accuracy = 0.1),
         `AnnualizedSharpe(Rf=0%)` = round(`AnnualizedSharpe(Rf=0%)`, 2),
         AnnualizedStdDev = percent(AnnualizedStdDev, accuracy = 0.1)) |>
  knitr::kable()
strategy AnnualizedReturn AnnualizedSharpe(Rf=0%) AnnualizedStdDev
Highest risk 49.5% 1.17 42.4%
Lowest risk 10.1% 1.90 5.3%

Instead of putting all of our money on one industry, we could diversify our investment across more than one industry. Let’s see how a diversified portfolio would have performed.

# 3 industries
max_sd3 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_max(sd0, with_ties = F, n = 3) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarise(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

min_sd3 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_min(sd0, with_ties = F, n = 3) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

# 5 industries
max_sd5 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_max(sd0, with_ties = F, n = 5) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

min_sd5 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_min(sd0, with_ties = F, n = 5) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

# 10 industries
max_sd10 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_max(sd0, with_ties = F, n = 10) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

min_sd10 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_min(sd0, with_ties = F, n = 10) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

# 20 industries
max_sd20 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_max(sd0, with_ties = F, n = 20) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)

min_sd20 <- annual_riskreturn |>
  arrange(name, year) |>
  group_by(name) |> 
  mutate(sd0 = lag(sd)) |>
  mutate(mu = ifelse(is.na(sd0), 0, sd0)) |>
  group_by(year) |> 
  slice_min(sd0, with_ties = F, n = 20) |> 
  mutate(mu = replace_na(mu, 0)) |> 
  group_by(year) |> 
  summarize(mu = mean(mu)) |> 
  ungroup() |> 
  mutate(r = mu,
         p = cumprod(1 + r)*100)
bind_rows(max_sd |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "1 industry (highest risk)") |> 
            select(strategy, everything()),
          min_sd |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "1 industry (lowest risk)") |> 
            select(strategy, everything()),
          max_sd3 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "3 industries (highest risk)") |> 
            select(strategy, everything()),
          min_sd3 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "3 industries (lowest risk)") |> 
            select(strategy, everything()),
          max_sd5 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "5 industries (highest risk)") |> 
            select(strategy, everything()),
          min_sd5 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "5 industries (lowest risk)") |> 
            select(strategy, everything()),
          max_sd10 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "10 industries (highest risk)") |> 
            select(strategy, everything()),
          min_sd10 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |>
            mutate(strategy = "10 industries (lowest risk)") |>
            select(strategy, everything()),
          max_sd20 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "20 industries (highest risk)") |> 
            select(strategy, everything()),
          min_sd20 |>
            mutate(date = ymd(paste0(year, "0101"))) |>
            tq_performance(Ra = r, performance_fun = table.AnnualizedReturns, scale = 1) |> 
            mutate(strategy = "20 industries (lowest risk)") |> 
            select(strategy, everything())) |>
  mutate(AnnualizedReturn = percent(AnnualizedReturn, accuracy = 0.1),
         `AnnualizedSharpe(Rf=0%)` = round(`AnnualizedSharpe(Rf=0%)`, 2),
         AnnualizedStdDev = percent(AnnualizedStdDev, accuracy = 0.1)) |>
  arrange(str_detect(strategy, "low")) |> 
  knitr::kable()
strategy AnnualizedReturn AnnualizedSharpe(Rf=0%) AnnualizedStdDev
1 industry (highest risk) 49.5% 1.17 42.4%
3 industries (highest risk) 41.7% 1.53 27.2%
5 industries (highest risk) 38.0% 1.66 22.8%
10 industries (highest risk) 33.0% 1.79 18.5%
20 industries (highest risk) 28.4% 1.88 15.1%
1 industry (lowest risk) 10.1% 1.90 5.3%
3 industries (lowest risk) 11.4% 1.91 6.0%
5 industries (lowest risk) 12.3% 1.93 6.4%
10 industries (lowest risk) 13.8% 1.96 7.0%
20 industries (lowest risk) 15.9% 1.99 8.0%

The highest SR is achieved investing in the 20 industries with the lowest risk in the previous year. But even the lowest SR low-risk strategy outperforms the highest SR high-risk strategy.