Load libraries and theme
library(tidyverse)
library(data.table)
library(magrittr)
library(slider)
library(car)
# Personal plot theme; change settings as desired
theme <-
theme(
plot.title = element_text(size = 30, lineheight = .9, face = "bold", hjust = .5, vjust = .75, family = "serif"
),
plot.subtitle = element_text(hjust = .5, vjust = 1, family = "serif", size = 20),
plot.caption = element_text(hjust = .5, family = "serif", size = 15),
panel.background = element_rect(fill = "white"),
axis.title = element_text(family = "serif", size = 20),
axis.text = element_text(size = 20, family = "serif"),
axis.line.y = element_line(color = "black"),
axis.line.x = element_line(color = "black"))
Load data
# CAPE data from Robert Shiller data -- I'm adding a time index for regressions (later)
cape <-
read_csv("shiller.csv") %>%
mutate(date = floor_date(mdy(date), "month"),
time = 1:length(cape)) %>%
select(time, date, cape)
# Large cap monthly returns from CFAI/Ibbotson
stocks <-
read_csv("sbbi_2023.csv", locale = locale(encoding = "latin1")) %>%
select(c(1, 2, 5, 7, 15, 16)) %>%
set_colnames(c("date", "large", "small", "lt_gov", "tbills", "infl")) %>%
mutate(date = seq(as.Date("1926-01-01"),
by = "month",
length = length(large))) %>%
select(date, large)
Figure 2 - CAPE and returns scatter
stocks %>%
mutate(large = 1 + large,
large_40 = slide_dbl(large, .f = prod, .before = 479, .complete = T),
large_40 = (large_40^(12/480)) - 1,
large_30 = slide_dbl(large, .f = prod, .before = 359, .complete = T),
large_30 = (large_30^(12/360)) - 1,
large_20 = slide_dbl(large, .f = prod, .before = 239, .complete = T),
large_20 = (large_20^(12/240)) - 1,
large_10 = slide_dbl(large, .f = prod, .before = 119, .complete = T),
large_10 = (large_10^(12/120)) - 1,
large_5 = slide_dbl(large, .f = prod, .before = 59, .complete = T),
large_5 = (large_5^(12/60)) - 1) %>%
left_join(cape, by = "date") %>%
mutate(initial_cape = lag(cape, 120),
year = year(date)) %>%
#cor.test(~large_5 + initial_cape, data = .)
ggplot(aes(x = initial_cape, y = large_10)) +
geom_point(aes(fill = initial_cape), shape = 22, size = 3) +
annotate("text", x = 30, y = .15, label = "Corr. = -0.7", size = 8, family = "serif") +
scale_fill_gradient(low = "blue", high = "red") +
labs(y = "Following 10-year annualized return", x = "Beginning of 10-year period CAPE",
caption = "Source: Ibbotson and Robert Shiller data") +
theme

Misc calcs used throughout text and Figure 4 charts
df <-
cape %>%
mutate(five_yr_change = (cape/lag(cape, 60)) - 1,
ten_yr_change = (cape/lag(cape, 120)) - 1) %>%
drop_na %>%
mutate(index = row_number())
# Freq of CAPE above pre 1990 mean
## pre 1990 mean
df %>%
summarize(mean(cape[year(date) < 1990]))
## # A tibble: 1 × 1
## `mean(cape[year(date) < 1990])`
## <dbl>
## 1 13.6
## Post 1990 mean
df %>%
summarize(mean(cape[year(date) >= 1990]))
## # A tibble: 1 × 1
## `mean(cape[year(date) >= 1990])`
## <dbl>
## 1 26.6
## Freq < pre-1990 mean since 1990
df %>%
filter(year(date) >= 1990) %>%
summarize(cape_count = sum(cape > 14.1)/length(cape))
## # A tibble: 1 × 1
## cape_count
## <dbl>
## 1 0.998
# Regressions
df %>%
lm(five_yr_change ~ lag(five_yr_change, 60),
data = ., subset = (between(year(date), 1992, 2024))) %>%
summary
##
## Call:
## lm(formula = five_yr_change ~ lag(five_yr_change, 60), data = .,
## subset = (between(year(date), 1992, 2024)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70993 -0.33373 0.00729 0.24555 1.04385
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.18381 0.02204 8.342 1.32e-15 ***
## lag(five_yr_change, 60) -0.06196 0.04415 -1.403 0.161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3746 on 385 degrees of freedom
## Multiple R-squared: 0.00509, Adjusted R-squared: 0.002505
## F-statistic: 1.97 on 1 and 385 DF, p-value: 0.1613
# Correlation plots
df %>%
filter(between(year(date), 1900, 1990)) %>%
ggplot(aes(x = lag(five_yr_change, 60), y = five_yr_change)) +
geom_point(color = "steelblue") +
geom_smooth(method = "lm", color = "black", se = F) +
theme +
labs(x = "Prior-five-year change as fraction", y = "Five-year change as fraction",
subtitle = "1900-1991")

df %>%
filter(between(year(date), 1991, 2024)) %>%
ggplot(aes(x = lag(five_yr_change, 60), y = five_yr_change)) +
geom_point(color = "steelblue") +
geom_smooth(method = "lm", color = "black", se = F) +
theme +
labs(x = "Prior-five-year change as fraction", y = "Five-year change as fraction",
subtitle = "1992-2024")
