File notes
The code below allows the reader to replicate the results
printed and references in the above-captioned article.
The supplement follows the paper, exhibit by exhibit
Data used are: Long Interest Rate (col G.), S&P Composite Real
Price (col. H), Real Total Return Price (col. J), Real Earnings
(col. K), and CAPE (col. M).
The author recommends copying these columns into a CSV file, and
then reading them into R.
The author has assigned the following names to these variables,
which are used in the code below: S&P composite (“p”), S&P
composite earnings (“e”), S&P composite total return price
(“tr_r_p”), CAPE (“cape”), and long interest rate (“long rate”)
The R libraries required to replicate results are found below
library(tidyverse)
library(slider)
library(vars)
library(patchwork)
library(tidyquant)
library(magrittr)
library(car)
library(stargazer)
library(zoo)
theme <-
theme(
plot.title = element_text(size = 30, lineheight = .9, face = "bold", hjust = .5, vjust = .75, family = "serif"
),
plot.subtitle = element_text(hjust = .5, vjust = 1, family = "serif", size = 20),
plot.caption = element_text(hjust = .5, family = "serif", size = 15),
panel.background = element_rect(fill = "white"),
axis.title = element_text(family = "serif", size = 20),
axis.text = element_text(size = 20, family = "serif"),
axis.line.y = element_line(color = "black"),
axis.line.x = element_line(color = "black"),
strip.text = element_text(size = 15, family = "serif", face = "bold"),
strip.background.x = element_rect(fill = "lightblue"))
recessions_df <-
read.table(
textConnection(
"begin, end
1857-06-01, 1858-12-01
1860-10-01, 1861-06-01
1865-04-01, 1867-12-01
1869-06-01, 1870-12-01
1873-10-01, 1879-03-01
1882-03-01, 1885-05-01
1887-03-01, 1888-04-01
1890-07-01, 1891-05-01
1893-01-01, 1894-06-01
1895-12-01, 1897-06-01
1899-06-01, 1900-12-01
1902-09-01, 1904-08-01
1907-05-01, 1908-06-01
1910-01-01, 1912-01-01
1913-01-01, 1914-12-01
1918-08-01, 1919-03-01
1920-01-01, 1921-07-01
1923-05-01, 1924-07-01
1926-10-01, 1927-11-01
1929-08-01, 1933-03-01
1937-05-01, 1938-06-01
1945-02-01, 1945-10-01
1948-11-01, 1949-10-01
1953-07-01, 1954-05-01
1957-08-01, 1958-04-01
1960-04-01, 1961-02-01
1969-12-01, 1970-11-01
1973-11-01, 1975-03-01
1980-01-01, 1980-07-01
1981-07-01, 1982-11-01
1990-07-01, 1991-03-01
2001-03-01, 2001-11-01
2007-12-01, 2009-06-01
2020-04-01, 2020-06-01"
),
sep = ',',
colClasses = c('Date', 'Date'),
header = TRUE
)
Exhibit 1. Real GDP change 3-year standard deviation, 1947:Q1 –
2025:Q1
gdp_df %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_vol = slide_dbl(gdp_chg, .f = sd, .before = 11, .complete = T)) %>%
drop_na %>%
ggplot() +
geom_rect(aes(xmin = begin, xmax = end, ymax = +Inf, ymin = -Inf), data = recessions_df %>% filter(year(begin) > 1953), fill = "grey", alpha = .3) +
geom_line(aes(x = date, y = gdp_vol, color = "gdp_vol"), linewidth = 1.7, color = "dodgerblue", show.legend = F) +
theme +
labs(x = NULL, y = "Rolling 3-year standard deviation of quarterly real GDP change",
caption = "NBER recessions shaded in gray\nSource: FRED, NBER, Author calculations")

Calculation: pre and post great moderation vol of other economic
time series
c("CPIAUCSL", "PAYEMS", "INDPRO") %>%
tq_get(get = "economic.data", from = "1947-01-01") %>%
pivot_wider(names_from = symbol, values_from = price) %>%
mutate(across(2:4, ~ 100*((. / lag(., 1)) - 1)),
across(2:4, ~ slide_dbl(., .f = sd, .before = 11, .complete = T))) %>%
drop_na %>%
reframe(mean(CPIAUCSL[year(date) < 1984]),
mean(CPIAUCSL[year(date) >= 1984]),
mean(PAYEMS[year(date) < 1984]),
mean(PAYEMS[year(date) >= 1984]),
mean(INDPRO[year(date) < 1984]),
mean(INDPRO[year(date) >= 1984]))
## # A tibble: 1 × 6
## mean(CPIAUCSL[year(date) < 198…¹ mean(CPIAUCSL[year(d…² mean(PAYEMS[year(dat…³
## <dbl> <dbl> <dbl>
## 1 0.239 0.207 0.269
## # ℹ abbreviated names: ¹`mean(CPIAUCSL[year(date) < 1984])`,
## # ²`mean(CPIAUCSL[year(date) >= 1984])`, ³`mean(PAYEMS[year(date) < 1984])`
## # ℹ 3 more variables: `mean(PAYEMS[year(date) >= 1984])` <dbl>,
## # `mean(INDPRO[year(date) < 1984])` <dbl>,
## # `mean(INDPRO[year(date) >= 1984])` <dbl>
Exhibit 2. CAPE and 120-month (10-year) centered moving average,
1871:1 – 2025:3.
shiller_df %>%
mutate(cape_cma_10 = slide_dbl(cape, .f = mean, .before = 60, .after = 60, .complete = T)) %>%
ggplot(aes(x = date)) +
geom_line(aes(y = cape, color = "CAPE"), linewidth = 1.7) +
geom_line(aes(y = cape_cma_10, color = "CAPE 10y CMA"), linewidth = 1.7) +
theme +
scale_x_date(expand = c(0, 0), date_breaks = "10 years", date_labels = "%Y") +
theme(legend.title = element_blank(),
legend.text = element_text(size = 15, family = "serif"),
legend.position = c(1,0),
legend.justification = c(1,0)) +
scale_color_manual(values = c("steelblue", "black")) +
labs(x = NULL, y = "CAPE / CAPE 10-year centered moving average",
caption = "Source: Robert Shiller data, Author calculations")

Exhibit 3. Rolling 3-year S&P composite stock monthly return
standard deviation with NBER recessions shaded in gray,
1947:1-2025:3.
shiller_df %>%
mutate(large = ((p / lag(p, 1)) - 1)*100,
large_vol = slide_dbl(large, .f = sd, .before = 35, .complete = T)) %>%
drop_na %>%
filter(year(date) > 1946) %>%
ggplot() +
geom_rect(aes(xmin = begin, xmax = end, ymax = +Inf, ymin = -Inf), data = recessions_df %>% filter(year(begin) > 1946), fill = "grey", alpha = .3) +
geom_line(aes(x = date, y = large_vol), linewidth = 1.7, color = "dodgerblue", show.legend = F) +
theme +
labs(x = NULL, y = "Rolling 3-year standard deviation of monthly large-cap stock returns",
caption = "Source: Robert Shiller data, NBER, Author calculations")

# Stock return SD pre and post CAPE break
shiller_df %>%
mutate(large = ((p / lag(p, 1)) - 1)*100,
large_vol = slide_dbl(large, .f = sd, .before = 35, .complete = T)) %>%
drop_na %>%
filter(between(date, as.Date("1947-01-01"), as.Date("1991-01-01"))) %>%
reframe(mean(large_vol, na.rm = T))
## # A tibble: 1 × 1
## `mean(large_vol, na.rm = T)`
## <dbl>
## 1 3.42
Exhibit 4. Rolling 3-year S&P composite earnings monthly change
standard deviation with NBER recessions shaded in gray,
1947:1-2025:3
shiller_df %>%
mutate(earnings = ((e / lag(e, 1)) - 1)*100,
earnings_vol = slide_dbl(earnings, .f = sd, .before = 59, .complete = T)) %>%
drop_na %>%
filter(year(date) > 1946) %>%
ggplot() +
geom_rect(aes(xmin = begin, xmax = end, ymax = +Inf, ymin = -Inf), data = recessions_df %>% filter(year(begin) > 1946), fill = "grey", alpha = .3) +
geom_line(aes(x = date, y = earnings_vol), linewidth = 1.7, color = "dodgerblue", show.legend = F) +
theme +
labs(x = NULL, y = "Rolling 5-year standard deviation of monthly S&P composite earnings",
caption = "Source: Robert Shiller data, NBER, Author calculations")

# Earnings change SD pre and post CAPE break
shiller_df %>%
mutate(earnings = ((e / lag(e, 1)) - 1)*100,
earnings_vol = slide_dbl(earnings, .f = sd, .before = 59, .complete = T)) %>%
filter(between(date, as.Date("1947-01-01"), as.Date("1991-01-01"))) %>%
reframe(mean(earnings_vol, na.rm = T))
## # A tibble: 1 × 1
## `mean(earnings_vol, na.rm = T)`
## <dbl>
## 1 1.34
Exhibit 5. Rolling 3-year CAPE quarterly-change standard deviation
and with NBER recessions shaded in gray, 1947:1-2025:1
shiller_df %>%
mutate(cape_chg = 100*((cape / lag(cape, 3)) - 1),
cape_sd = slide_dbl(cape_chg, .f = sd, .before = 11, .complete = T)) %>%
drop_na %>%
filter(year(date) > 1946 & year(date) < 2026) %>%
ggplot() +
geom_rect(aes(xmin = begin, xmax = end, ymin = -Inf, ymax = +Inf), data = recessions_df %>% filter(year(begin) > 1955),
fill = "gray", alpha = .3) +
geom_line(aes(x = date, y = cape_sd, color = "cape_sd"), linewidth = 1.7, show.legend = F) +
theme +
theme(legend.title = element_blank(),
legend.text = element_text(size = 15, family = "serif"),
legend.position = c(1,0),
legend.justification = c(1,0)) +
scale_x_date(expand = c(0, 0)) +
labs(x = NULL, y = "Rolling 3-year standard deviation of quarterly CAPE change",
caption = "Source: Robert Shiller data, NBER, Author calculations")

Calculation: CAPE break test
# Often the date of a possible break is unknown or known only within a range. Suppose, for example, you suspect that a break occurred sometime between two dates, t0 and t1.The Chow test can be extended to handle this situation by testing for breaks at all possible dates t between t0 and t1 and then using the largest of the resulting F-statistics to test for a break at an unknown date. This modified Chow test is variously called the Quandt likelihood ratio (QLR) statistic (Quandt 1960)
# Use monthly cape
cape <-
shiller_df %>%
mutate(time = row_number())
# step 1. create a date vector for the period that may contain the break
window <- seq(as.Date("1950-01-01"), as.Date("2000-12-01"), by = "month")
# step 2. initialize a vector to hold the F stats
F_stats <- numeric(length(window))
# step 3. create loop for period during which you want to look for break
for(i in 1:length(window)) {
t <- cape$date >= window[i]
test <- lm(log(cape) ~ time*t, data = cape)
F_stats[i] <- linearHypothesis(test, c("tTRUE = 0", "time:tTRUE = 0"))$F[2]
}
# Identify max F
window[which.max(F_stats)]
## [1] "1991-02-01"
# Plot
F_stats %>%
as_tibble %>%
mutate(date = window) %>%
dplyr::select(date, F_stat = value) %>%
ggplot(aes(x = date)) +
geom_line(aes(y = F_stat), color = "steelblue", linewidth = 1.7) +
geom_point(aes(x = window[which.max(F_stats)], y = max(F_stats)), color = "red", size = 8, shape = 16) +
theme +
labs(x = NULL, y = "Break test statistic", caption = "Source: Robert Shiller data")

Calculation: CAPE SD before and after break - calc and var.test
shiller_df %>%
mutate(cape_chg = 100*((cape / lag(cape, 1)) - 1),
cape_sd = slide_dbl(cape_chg, .f = sd, .before = 35, .complete = T)) %>%
drop_na %>%
reframe(mean_pre = mean(cape_sd[year(shiller_df$date) < 1991], na.rm = T),
mean_post = mean(cape_sd[year(shiller_df$date) >= 1991], na.rm = T))
## # A tibble: 1 × 2
## mean_pre mean_post
## <dbl> <dbl>
## 1 3.81 3.50
x1 <-
shiller_df %>%
mutate(cape_chg = 100*((cape / lag(cape, 1)) - 1),
cape_sd = slide_dbl(cape_chg, .f = sd, .before = 35, .complete = T)) %>%
filter(year(date) < 1991) %>%
dplyr::select(cape) %>%
as_vector
x2 <-
shiller_df %>%
mutate(cape_chg = 100*((cape / lag(cape, 1)) - 1),
cape_sd = slide_dbl(cape_chg, .f = sd, .before = 35, .complete = T)) %>%
filter(year(date) >= 1991) %>%
dplyr::select(cape) %>%
as_vector
var.test(x1, x2)
##
## F test to compare two variances
##
## data: x1 and x2
## F = 0.51097, num df = 1319, denom df = 410, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4353400 0.5958126
## sample estimates:
## ratio of variances
## 0.5109669
Exhibit 6. Rolling 4-quarter CAPE change standard deviation versus
GDP change standard deviation, 1947:Q1-2025:Q1
# Use rolling 4-quarter sd of quarterly percentage change
gdp_df %>%
left_join(shiller_df) %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
cape_chg = 100*((cape / lag(cape, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 3, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 3, .complete = T),
year = year(date)) %>%
#filter(gdp_chg_sd < 6) %>%
#cor.test(~ gdp_chg_sd + cape_chg_sd, data = .)
ggplot(aes(x = gdp_chg_sd, y = cape_chg_sd)) +
geom_point(fill = "steelblue", size = 4, shape = 21) +
geom_smooth(method = "lm", se = F, color = "black") +
labs(x = "GDP change standard deviation", y = "CAPE change standard deviation",
caption = "Source: FRED, Robert Shiller data, Author calculations") +
theme

Exhibit 7. Results for baseline regression of 3-year rolling CAPE
volatility on GDP volatility, 1947:Q1 – 2025:Q1
# Regression of CAPE chg vol on GDP vol and rates using 3-year windows
model_1 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 11, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 11, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 11, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 11, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 11, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol, data = .)
model_2 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 11, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 11, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 11, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 11, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 11, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number())%>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_4 + time, data = .)
model_3 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 11, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 11, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 11, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 11, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 11, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_vol + time, data = .)
model_4 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 11, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 11, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 11, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 11, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 11, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_4 + rate_diff_vol + time, data = .)
regressions <- list(model_1, model_2, model_3, model_4)
stargazer(
regressions,
ci = T,
ci.level = 0.95,
digits = 3,
omit.stat = c("f", "chi2", "ser", "adj.rsq"),
omit = c("Constant"),
type = "text",
notes = c("Standard errors are conventional",
"Source: author's regressions"),
dep.var.caption = "Dependent variable",
model.numbers = T)
##
## ==============================================================================
## Dependent variable
## ----------------------------------------------------------------
## cape_chg_vol
## (1) (2) (3) (4)
## ------------------------------------------------------------------------------
## gdp_chg_vol 0.335*** 0.326*** 0.341*** 0.339***
## (0.223, 0.448) (0.212, 0.440) (0.231, 0.450) (0.229, 0.449)
##
## rate_diff_4 -0.029 -0.008
## (-0.078, 0.020) (-0.056, 0.041)
##
## rate_diff_vol 0.553*** 0.546***
## (0.326, 0.780) (0.315, 0.778)
##
## time 0.0001 -0.0003 -0.0003
## (-0.001, 0.001) (-0.001, 0.0004) (-0.001, 0.0004)
##
## ------------------------------------------------------------------------------
## Observations 290 290 290 290
## R2 0.106 0.111 0.173 0.173
## ==============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Standard errors are conventional
## Source: author's regressions
Calculation: regressions with great moderation dummy
model_1 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 20, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 20, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 20, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 20, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = mean, .before = 20, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + GM_dummy, data = .)
model_2 <-
gdp_df %>%
left_join(shiller_df)%>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 20, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 20, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 20, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 20, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = mean, .before = 20, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_4 + time + GM_dummy, data = .)
model_3 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 20, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 20, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 20, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 20, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = mean, .before = 20, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number())%>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_vol + time + GM_dummy, data = .)
model_4 <-
gdp_df %>%
left_join(shiller_df) %>%
drop_na %>%
mutate(gdp_chg = 100*((gdp / lag(gdp, 1)) - 1),
gdp_chg_sd = slide_dbl(gdp_chg, .f = sd, .before = 20, .complete = T),
gdp_chg_sd_avg = slide_dbl(gdp_chg_sd, .f = mean, .before = 20, .complete = T),
gdp_chg_vol = gdp_chg_sd / gdp_chg_sd_avg,
cape_chg = 100*((cape / lag(cape, 1)) - 1),
rate_diff = long_rate - lag(long_rate, 1),
rate_diff_4 = long_rate - lag(long_rate, 4),
rate_diff_vol = slide_dbl(rate_diff, .f = sd, .before = 20, .complete = T),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 20, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = mean, .before = 20, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg,
GM_dummy = ifelse(between(year(date), 1990, 1992), 1, 0),
time = row_number()) %>%
drop_na %>%
lm(cape_chg_vol ~ gdp_chg_vol + rate_diff_4 + rate_diff_vol + time + GM_dummy, data = .)
regressions <- list(model_1, model_2, model_3, model_4)
stargazer(
regressions,
ci = T,
ci.level = 0.95,
digits = 3,
omit.stat = c("f", "chi2", "ser", "adj.rsq"),
omit = c("Constant"),
type = "text",
#out = "main_regressions.html",
notes = c("Standard errors are conventional",
"Source: author's regressions"),
dep.var.caption = "Dependent variable",
#dep.var.labels = c("Large cap", "Small cap", "LT gov. bond", "T-bills"),
model.numbers = T)
##
## ===================================================================================
## Dependent variable
## ---------------------------------------------------------------------
## cape_chg_vol
## (1) (2) (3) (4)
## -----------------------------------------------------------------------------------
## gdp_chg_vol 0.266*** 0.286*** 0.298*** 0.296***
## (0.221, 0.311) (0.241, 0.331) (0.254, 0.343) (0.253, 0.340)
##
## rate_diff_4 -0.029*** -0.021**
## (-0.048, -0.011) (-0.039, -0.003)
##
## rate_diff_vol 0.240*** 0.216***
## (0.142, 0.338) (0.117, 0.315)
##
## time -0.001*** -0.001*** -0.001***
## (-0.001, -0.0002) (-0.001, -0.0003) (-0.001, -0.0004)
##
## GM_dummy 0.013 -0.001 -0.024 -0.032
## (-0.090, 0.116) (-0.101, 0.099) (-0.123, 0.075) (-0.130, 0.066)
##
## -----------------------------------------------------------------------------------
## Observations 272 272 272 272
## R2 0.335 0.385 0.413 0.424
## ===================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
## Standard errors are conventional
## Source: author's regressions
Calculation: Vector autoregressions related to Exhibit 6 results
(trouble getting this to knit)
Exhibit 8, CAPE rolling 3-year volatility versus CAPE 3-year
percentage change, 1947:1, 2025:3
shiller_df %>%
mutate(cape_chg = 100 * ((cape / lag(cape, 1)) - 1),
cape_chg_36 = 100 * ((cape / lag(cape, 36)) - 1),
cape_chg_vol = slide_dbl(cape_chg, .f = sd, .before = 36, .complete = T)) %>%
drop_na %>%
filter(year(date) > 1946) %>%
ggplot(aes(x = cape_chg_vol, y = cape_chg_36)) +
geom_point(fill = "steelblue", size = 4, shape = 21) +
geom_smooth(method = "lm", se = F, color = "black") +
theme +
annotate("text", x = 2, y = -35, label = "R-squared = 0.15", family = "serif", size = 8) +
labs(x = "CAPE 1-year change 3-year volatility", y = "CAPE 3-year percent change", caption = "Source: Robert Shiller Data, Author calculations")

# R-sq for plot
shiller_df %>%
filter(year(date) > 1946) %>%
mutate(cape_chg = 100 * ((cape / lag(cape, 1)) - 1),
cape_chg_36 = 100 * ((cape / lag(cape, 36)) - 1),
cape_chg_vol = slide_dbl(cape_chg, .f = sd, .before = 36, .complete = T)) %>%
drop_na %>%
filter(year(date) > 1946) %>%
lm(cape_chg_36 ~ cape_chg_vol, data = .) %>%
summary
##
## Call:
## lm(formula = cape_chg_36 ~ cape_chg_vol, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47.849 -16.007 -1.982 13.819 82.684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.7623 3.0820 14.85 <2e-16 ***
## cape_chg_vol -11.3344 0.8866 -12.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.59 on 900 degrees of freedom
## Multiple R-squared: 0.1537, Adjusted R-squared: 0.1527
## F-statistic: 163.4 on 1 and 900 DF, p-value: < 2.2e-16
Regression and Exhibit 9: Baseline regression and coef est line
chart of CAPE chg ~ CAPE vol
# Regression of CAPE chg vol on GDP vol and rates.
# Using time as control throughout
# cape chg ~ cape vol (1871:1 - 2025:3)
shiller_df %>%
mutate(cape_chg = 100 * ((cape / lag(cape, 1)) - 1),
cape_chg_36 = 100 * ((cape / lag(cape, 36)) - 1),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 35, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 35, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg) %>%
drop_na %>%
lm(cape_chg_36 ~ cape_chg_vol, data = .) %>%
stargazer(
type = "text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## cape_chg_36
## -----------------------------------------------
## cape_chg_vol -18.840***
## (1.673)
##
## Constant 27.071***
## (1.985)
##
## -----------------------------------------------
## Observations 1,660
## R2 0.071
## Adjusted R2 0.070
## Residual Std. Error 31.724 (df = 1658)
## F Statistic 126.780*** (df = 1; 1658)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
ex_8_df <-
shiller_df %>%
mutate(cape_chg = 100 * ((cape / lag(cape, 1)) - 1),
cape_chg_36 = 100 * ((cape / lag(cape, 36)) - 1),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 35, .complete = T),
cape_chg_sd_avg = slide_dbl(cape_chg_sd, .f = first, .before = 35, .complete = T),
cape_chg_vol = cape_chg_sd / cape_chg_sd_avg) %>%
drop_na
ex_8_df %>%
dplyr::select(cape_chg_36, cape_chg_vol) %>%
zoo::rollapply(., function(x){lm(x[,1] ~ x[,2], data = .) %>% coefficients}, width = 120, by.column = F) %>%
as_tibble %>%
set_colnames(c("intercept", "coef_1")) %>%
mutate(date = ex_8_df$date[-c(1:119)]) %>%
ggplot(aes(x = date, y = coef_1)) +
geom_line(color = "steelblue", linewidth = 1.7) +
geom_hline(yintercept = 0) +
theme +
labs(x = NULL, y = "Response of CAPE return to change in CAPE volatility",
caption = "Source: Robert Shiller data, Author calculations")

Exhibit 9. The OLS-estimated response of CAPE return to CAPE
volatility, rolling 10-year window, 1871:1 – 2025:3y
Exhibit 10. Mean CAPE value by CAPE volatility “bin,” 1871:1 –
2025:3
shiller_df %>%
mutate(cape_chg = 100 * ((cape / lag(cape, 1)) - 1),
cape_chg_sd = slide_dbl(cape_chg, .f = sd, .before = 59, .complete = T),
cape_trend = slide_dbl(cape, .f = mean, .before = 30, .after = 30, .complete = T),
cape_chg_sd_bin = cut(cape_chg_sd, breaks = 20)) %>%
group_by(cape_chg_sd_bin) %>%
reframe(mean = mean(cape_trend, na.rm = T),
n = n()) %>%
ungroup %>%
drop_na %>%
mutate(freq = 100*(n / sum(n)),
freq = round(freq, 1)) %>%
filter(freq >= 5) %>%
ggplot(aes(x = cape_chg_sd_bin, y = mean)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = paste0("freq = ", freq, "%")), vjust = -.61, size = 7) +
theme +
theme(axis.text.x = element_text(angle = 45, size = 12),
axis.line.x = element_blank()) +
labs(x = "CAPE volatiilty bin", y = "Mean cape value", caption = "Source: Robert Shiller data")

Regression (table not shown in paper): CAPE variance and CAPE
trend
Exhibit 11: Exhibit 11. Five-year correlation of annual changes in
Shiller price and smoothed earnings, 1947:1 to 2025:3 and the
above-calculated CAPE break date (1991:2)
df <-
shiller_df %>%
mutate(price_chg = 100*((p / lag(p, 12)) - 1),
earnings_chg = 100*((smoothed_e / lag(smoothed_e, 12)) - 1)) %>%
drop_na %>%
group_by(year) %>%
filter(row_number() == n()) %>%
ungroup %>%
filter(year(date) > 1946)
df %>%
dplyr::select(price_chg, earnings_chg) %>%
zoo::rollapply(., width = 5, function(x) {cor(x[,1],x[,2])}, by.column = F) %>%
as_tibble %>%
mutate(date = df$date[-c(1:4)]) %>%
# reframe(mean(value[year(date) <= 1990], na.rm = T),
# mean(value[year(date) > 1990], na.rm = T))
ggplot(aes(x = date, y = value)) +
geom_line() +
geom_hline(yintercept = -.128, color = "steelblue", linetype = 2, linewidth = 1.7) +
geom_hline(yintercept = 0.252, color = "azure4", linetype = 2, linewidth = 1.7) +
annotate("text", x = as.Date("1975-01-01"), y = -0.2, label = "Pre-1991 avg. = -0.05", color = "steelblue", size = 5) +
annotate("text", x = as.Date("1975-01-01"), y = 0.35, label = "Post-1991 avg. = 0.13", color = "azure4", size = 5) +
theme +
labs(x = NULL, y = "Rolling 5-year correlation", caption = "Source: Robert Shiller data")

Exhibit 12. S&P composite total return 36-month price change
correlation with PCE 36-month change, 1959:1 – 2025:3
#Note: for this exhibit, I used two additional time series, personal consumption expenditures (PCE) and headline CPI inflation, both taken from FRED. The shortest common series (PCE, which starts in 1959:1) dictates the timeframe used.
df <-
c("PCE", "CPIAUCSL") %>%
tq_get(get = "economic.data", from = "1947-01-01") %>%
pivot_wider(names_from = symbol, values_from = price) %>%
drop_na %>%
mutate(PCE = 100*(PCE / CPIAUCSL))
df1 <-
shiller_df %>%
dplyr::select(date, tr_r_price) %>%
right_join(df, by = "date") %>%
mutate(tr_r_price = 100*(tr_r_price / CPIAUCSL),
real_tr_p_chg = (tr_r_price / lag(tr_r_price, 12)) - 1,
real_pce_chg = (PCE / lag(PCE, 12)) - 1) %>%
drop_na
df1 %>%
dplyr::select(real_tr_p_chg, real_pce_chg) %>%
zoo::rollapply(., function(x) {cor(x[,1], x[,2])}, width = 120, by.column = F) %>%
as_tibble %>%
mutate(date = df1$date[-c(1:119)]) %>%
ggplot(aes(x = date, y = value)) +
geom_line(color = "steelblue", linewidth = 1.7) +
geom_vline(xintercept = as.Date("1991-02-01"), color = "red", linewidth = 1.7) +
annotate("text", x = as.Date("1987-01-01"), y = .25,
label = "Estimated CAPE\nbreak date", color = "red",
size = 6) +
theme +
labs(x = NULL, y = "correlation coefficient", caption = "Source: Robert Shiller data, FRED, Author calculations")
