Data sources

Shiller data: https://shillerdata.com/

Investment data (CFA login required): https://rpc.cfainstitute.org/en/research-foundation/sbbi

Contact me at with questions, corrections, comments, etc.

Load libraries and theme

library(tidyverse)
library(data.table)
library(magrittr)
library(slider)
library(car)

# Personal plot theme; change settings as desired

theme <- 
  theme(
    plot.title = element_text(size = 30, lineheight = .9, face = "bold", hjust = .5, vjust = .75, family = "serif"
    ),
    plot.subtitle = element_text(hjust = .5, vjust = 1, family = "serif", size = 20),
    plot.caption = element_text(hjust = .5, family = "serif", size = 15),
    panel.background = element_rect(fill = "white"),
    axis.title = element_text(family = "serif", size = 20),
    axis.text = element_text(size = 20, family = "serif"),
    axis.line.y = element_line(color = "black"),
    axis.line.x = element_line(color = "black"))

Load data

# CAPE data from Robert Shiller data -- I'm adding a time index for regressions (later)

cape <- 
  read_csv("shiller.csv") %>% 
  mutate(date = floor_date(mdy(date), "month"),
         time = 1:length(cape)) %>% 
  select(time, date, cape) 

# Large cap monthly returns from CFAI/Ibbotson 

stocks <- 
  read_csv("sbbi_2023.csv", locale = locale(encoding = "latin1")) %>% 
    select(c(1, 2, 5, 7, 15, 16)) %>% 
    set_colnames(c("date", "large", "small", "lt_gov", "tbills", "infl")) %>% 
    mutate(date = seq(as.Date("1926-01-01"),
                      by = "month",
                      length = length(large))) %>% 
    select(date, large)

Figure 1 - CAPE line chart

cape %>% 
  ggplot(aes(x = date, y = cape)) +
  geom_line() +
  geom_point(aes(y = cape), data = last) +
  geom_text(aes(label = round(cape, 1)), data = last, vjust  = -1)

Figure 2 - CAPE and returns scatter

stocks %>% 
    mutate(large = 1 + large,
           large_40 = slide_dbl(large, .f = prod, .before = 479, .complete = T),
           large_40 = (large_40^(12/480)) - 1,
           large_30 = slide_dbl(large, .f = prod, .before = 359, .complete = T),
           large_30 = (large_30^(12/360)) - 1,
           large_20 = slide_dbl(large, .f = prod, .before = 239, .complete = T),
           large_20 = (large_20^(12/240)) - 1,
           large_10 = slide_dbl(large, .f = prod, .before = 119, .complete = T),
           large_10 = (large_10^(12/120)) - 1,
           large_5 = slide_dbl(large, .f = prod, .before = 59, .complete = T),
           large_5 = (large_5^(12/60)) - 1) %>% 
    left_join(cape, by = "date") %>% 
    mutate(initial_cape = lag(cape, 120),
           year = year(date)) %>% 
    #cor.test(~large_5 + initial_cape, data = .)
    ggplot(aes(x = initial_cape, y = large_10)) +
    geom_point(aes(fill = initial_cape), shape = 22, size = 3) +
    annotate("text", x = 30, y = .15, label = "Corr. = -0.7", size = 8, family = "serif") +
    scale_fill_gradient(low = "blue", high = "red") +
    labs(y = "Following 10-year annualized return", x = "Beginning of 10-year period CAPE", 
         caption = "Source: Ibbotson and Robert Shiller data") +
    theme 

Figure 3 - QLR Test

# Use CAPE dataframe

# step 1. create a date vector for the period that may contain the break

window <- seq(as.Date("1980-01-01"), as.Date("1999-12-01"), by = "month")

# step 2. initialize a vector to hold the F stats

F_stats <- numeric(length(window))

# step 3. create loop for period during which you want to look for break

for(i in 1:length(window)) {
  
  t <-  cape$date >= window[i]
  
  test <- lm(log(cape) ~ time*t, data = cape) 
  
  F_stats[i] <-  linearHypothesis(test, c("tTRUE = 0", "time:tTRUE = 0"))$F[2]
  
}

# Identify max F

window[which.max(F_stats)]
## [1] "1991-08-01"
# Plot

F_stats %>% 
  as_tibble %>% 
  mutate(date = window) %>% 
  select(date, F_stat = value) %>% 
  ggplot(aes(x = date)) +
  geom_line(aes(y = F_stat), color = "steelblue") +
  geom_point(aes(x = window[which.max(F_stats)], y = max(F_stats)), color = "red", size = 4, shape = 16) +
  theme +
  labs(x = NULL, y = "Break test statistic", caption = "Source: Robert Shiller data")

Misc calcs used throughout text and Figure 4 charts

df <- 
  cape %>% 
  mutate(five_yr_change = (cape/lag(cape, 60)) - 1,
         ten_yr_change = (cape/lag(cape, 120)) - 1) %>% 
  drop_na %>% 
  mutate(index = row_number())

# Freq of CAPE above pre 1990 mean
## pre 1990 mean

df %>% 
  summarize(mean(cape[year(date) < 1990])) 
## # A tibble: 1 × 1
##   `mean(cape[year(date) < 1990])`
##                             <dbl>
## 1                            13.6
## Post 1990 mean

df %>% 
  summarize(mean(cape[year(date) >= 1990]))
## # A tibble: 1 × 1
##   `mean(cape[year(date) >= 1990])`
##                              <dbl>
## 1                             26.6
## Freq < pre-1990 mean since 1990

df %>% 
  filter(year(date) >= 1990) %>% 
  summarize(cape_count = sum(cape > 14.1)/length(cape))
## # A tibble: 1 × 1
##   cape_count
##        <dbl>
## 1      0.998
# Regressions

df %>% 
  lm(five_yr_change ~ lag(five_yr_change, 60), 
     data = ., subset = (between(year(date), 1992, 2024))) %>% 
  summary
## 
## Call:
## lm(formula = five_yr_change ~ lag(five_yr_change, 60), data = ., 
##     subset = (between(year(date), 1992, 2024)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70993 -0.33373  0.00729  0.24555  1.04385 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.18381    0.02204   8.342 1.32e-15 ***
## lag(five_yr_change, 60) -0.06196    0.04415  -1.403    0.161    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3746 on 385 degrees of freedom
## Multiple R-squared:  0.00509,    Adjusted R-squared:  0.002505 
## F-statistic:  1.97 on 1 and 385 DF,  p-value: 0.1613
# Correlation plots

  df %>% 
  filter(between(year(date), 1900, 1990)) %>% 
  ggplot(aes(x = lag(five_yr_change, 60), y = five_yr_change)) +
  geom_point(color = "steelblue") +
  geom_smooth(method = "lm", color = "black", se = F) +
  theme +
  labs(x = "Prior-five-year change as fraction", y = "Five-year change as fraction", 
       subtitle = "1900-1991")

  df %>% 
  filter(between(year(date), 1991, 2024)) %>% 
  ggplot(aes(x = lag(five_yr_change, 60), y = five_yr_change)) +
  geom_point(color = "steelblue") +
  geom_smooth(method = "lm", color = "black", se = F) +
  theme +
  labs(x = "Prior-five-year change as fraction", y = "Five-year change as fraction", 
       subtitle = "1992-2024")