This Online Supplement provides R code, model output, and other results to accompany A time-series analysis and forecast of CAPE. The contents of this section are organized according to article headers.
I’ve turned off evaluation of chart code chunks to save space where charts appear in main article (thus the “eval=F” setting in some cases), and I’ve disabled time series cross validation (tsCV) chunks to reduce run time.
Please contact mfandetti@gmail.com with questions, suggestions, or comments.
library(tidyverse)
library(fpp2)
library(tseries)
library(urca)
library(knitr)
library(astsa)
library(quantmod)
library(arfima)
Data is from http://www.econ.yale.edu/~shiller/data.htm.(I delete the first top several rows.) I create a time series from the data frame.
cape = read.csv("cape.csv",header=TRUE)
cape = ts(na.omit(cape[,13]),start=1881,frequency=12)
These calculations are used in the introduction, as well as in the shape and spread discussion (see histogram code below).
cape.ann.rets = annualReturn(as.xts(cape),type="arithmetic")
length(cape.ann.rets[cape.ann.rets> .2])/length(cape.ann.rets)
## [1] 0.1560284
length(cape.ann.rets[cape.ann.rets< -.3]) /length(cape.ann.rets)
## [1] 0.04255319
length(cape.ann.rets[cape.ann.rets> .3])/length(cape.ann.rets)
## [1] 0.05673759
length(cape.ann.rets[cape.ann.rets< -.3]) /length(cape.ann.rets)
## [1] 0.04255319
mean(window(cape,start=1949))
## [1] 19.63629
mean(window(cape,start=1980))
## [1] 22.5116
ggplot(as.data.frame(cape),aes(cape))+
geom_boxplot(notch=T)+
labs(title="CAPE boxplot",subtitle = "January 1881 to March 2021",y=NULL,x="CAPE")+
coord_flip()+
scale_y_discrete(labels = NULL, breaks = NULL)
summary(cape)
autoplot(cape)+
labs(title="CAPE",subtitle = "January 1881 to March 2021",y="CAPE",x=NULL)+
geom_hline(yintercept = mean(cape),color="red")+
annotate("text",label="mean = 17.13",x=2011,y=13,color="darkred")
ggplot(as.data.frame(cape.ann.rets),aes(cape.ann.rets))+
geom_histogram(color="black",fill="white")+
labs(title="CAPE return histogram",subtitle = "January 1881 to March 2021",x="CAPE annual change",y=NULL)+
stat_bin(geom="text", aes(label=..count..),vjust = -3)+
ylim(0,13)
summary(cape.ann.rets)
cape.1881.to.1979=window(cape,start=1881,end=1980)
cape.1980.to.2020=window(cape,start=1980,end=2020)
autoplot(cape.1881.to.1979)+
geom_hline(yintercept = mean(cape.1881.to.1979),color="darkred")+
annotate("text",label="mean = 14.88",x=1890,y=12,color="darkred")+
labs(title="CAPE",subtitle = "1881 to 1979",x=NULL,y="CAPE")
autoplot(cape.1980.to.2020)+
geom_hline(yintercept = mean(cape.1980.to.2020),color="darkred")+
annotate("text",label="mean = 22.27",x=1985,y=25,color="darkred")+
labs(title="CAPE",subtitle = "1980 to 2020",x=NULL,y="CAPE")
autoplot(ts(rnorm(100)))+
geom_hline(yintercept = 0)+
labs(title="Simulated white noise",x=NULL,y=NULL)
cape.1881.to.1979=window(cape,start=1881,end=1980)
cape.1980.to.2020=window(cape,start=1980,end=2020)
autoplot(cape.1881.to.1979)+
labs(title="CAPE with time trend",subtitle = "1881 to 1979",y="CAPE",x=NULL)+
geom_smooth(method=lm,se=T)
autoplot(cape.1980.to.2020)+
labs(title="CAPE with time trend",subtitle = "1980 to 2020",y="CAPE",x=NULL)+
geom_smooth(method="lm",se=T)
adf.test(cape.1881.to.1979)#no differencing needed.
##
## Augmented Dickey-Fuller Test
##
## data: cape.1881.to.1979
## Dickey-Fuller = -3.5901, Lag order = 10, p-value = 0.03363
## alternative hypothesis: stationary
adf.test(cape.1980.to.2020)#differencing needed.
##
## Augmented Dickey-Fuller Test
##
## data: cape.1980.to.2020
## Dickey-Fuller = -1.7906, Lag order = 7, p-value = 0.6667
## alternative hypothesis: stationary
summary(ur.kpss(cape.1881.to.1979))#borderline non-stationary.
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 7 lags.
##
## Value of test-statistic is: 0.7605
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
summary(ur.kpss(cape.1980.to.2020))#not stationary.
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 3.6478
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
cape.1881.to.1939=window(cape,end=1940)
cape.1920.to.1959=window(cape,start=1920,end=1960)
cape.1940.to.1979=window(cape,start=1940,end=1980)
cape.1881.to.2005=window(cape,end=2006)#for ARFIMA.
autoplot(cape.1881.to.1939)+
geom_smooth(method=lm,se=T)+
labs("CAPE with trend",subtitle = "1881 to 1939",x=NULL,y=CAPE)+
geom_hline(yintercept = mean(cape.1881.to.1939))
autoplot(cape.1920.to.1959)+
geom_smooth(method=lm,se=T)+
labs(title="CAPE with trend",subtitle = "1920 to 1959",x=NULL,y="CAPE")+
geom_hline(yintercept = mean(cape.1920.to.1959))
autoplot(cape.1940.to.1979)+
geom_smooth(method=lm,se=T)+
labs(title="CAPE with trend",subtitle = "1940 to 1979",x=NULL,y="CAPE")+
geom_hline(yintercept = mean(cape.1940.to.1979))
adf.test(BoxCox(cape,lambda=0),k=12)
##
## Augmented Dickey-Fuller Test
##
## data: BoxCox(cape, lambda = 0)
## Dickey-Fuller = -3.261, Lag order = 12, p-value = 0.07739
## alternative hypothesis: stationary
summary(ur.kpss(BoxCox(cape,lambda = 0)))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 3.8217
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Check on optimal \(\lambda\) value. (\(\lambda\) minimizes the coefficient of variation for sub series of x.)
BoxCox.lambda(cape.1980.to.2020)
## [1] 0.2936857
Box-Cox routine optimal \(\lambda\) is close enough to zero (log transform) that I’m using a log transform throughout.
autoplot(cbind("CAPE"= cape.1980.to.2020,
"Log CAPE"=BoxCox(cape.1980.to.2020,lambda=0),
"Differenced log CAPE"=diff(BoxCox(cape.1980.to.2020,
lambda = 0))),facets=TRUE)+
xlab(NULL)+ylab(NULL)
ggAcf(diff(BoxCox(cape.1980.to.2020,lambda = 0)))+
labs(title="ACF of differenced and transformed CAPE")
adf.test(diff(BoxCox(cape.1980.to.2020,lambda=0)))
##
## Augmented Dickey-Fuller Test
##
## data: diff(BoxCox(cape.1980.to.2020, lambda = 0))
## Dickey-Fuller = -6.9115, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
summary(ur.kpss(diff(BoxCox(cape.1980.to.2020,lambda = 0))))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.1588
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(BoxCox(cape.1980.to.2020,lambda=0)))+
labs(title="CAPE autocorrelation function")
ggPacf(diff(BoxCox(cape.1980.to.2020,lambda=0)))+
labs(title="CAPE partial autocorrelation")
#compare to auto.arima-selected model
arima.mod=auto.arima(cape.1980.to.2020,stepwise = F,approximation = F,seasonal = F,lambda=0)
arima.mod
## Series: cape.1980.to.2020
## ARIMA(0,1,5)
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5
## 0.2835 0.0087 -0.0184 0.0135 0.1397
## s.e. 0.0451 0.0478 0.0495 0.0461 0.0453
##
## sigma^2 estimated as 0.001184: log likelihood=938.72
## AIC=-1865.45 AICc=-1865.27 BIC=-1840.41
#my AICs is only a little larger, and the model's much simpler
my.arima.mod=Arima(cape.1980.to.2020,order=c(0,1,1),lambda=0)
my.arima.mod
## Series: cape.1980.to.2020
## ARIMA(0,1,1)
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1
## 0.2732
## s.e. 0.0443
##
## sigma^2 estimated as 0.001196: log likelihood=934.21
## AIC=-1864.42 AICc=-1864.4 BIC=-1856.08
#check ARIMA(0,1,1) residuals
autoplot(forecast(my.arima.mod,h=12,level=95))+
labs(title="CAPE and forecast",subtitle = "ARIMA(0,1,1) 12 month forecast",x=NULL,y="CAPE")
forecast(my.arima.mod,h=12,level=95)
cape.1881.to.1994=window(cape,end=1995)
pre.1994.arima.mod=auto.arima(cape.1881.to.1994,lambda=0,stepwise = F,approximation = F,seasonal=F)
autoplot(forecast(pre.1994.arima.mod,h=60,level=95))+
labs(title="CAPE and forecast",subtitle="ARIMA(3,1,2) 60-month forecast",x=NULL,y="CAPE")+
autolayer(window(cape,start=1994,end=2000),color="black")
forecast(pre.1994.arima.mod,level=95,h=12)
First, I examine correlograms to get a sense for appropriate number of order terms. Then, I get direction from Rob Hyndman’s auto.arima function, which I use with caution (some practitioners recommend strongly against using it, and I don’t rely on it blindly). Auto-arima’s model has serially correlated residuals; I find an order that doesn’t, and I use it for forecasting.
ggtsdisplay(cape)#suggests a few to several AR terms
final.arima.mod.auto=auto.arima(cape,lambda=0,approximation = F,stepwise = F,seasonal = F)#poorly behaved residuals from (4,1,0) fit.
final.arima.mod=Arima(cape,lambda=0,order = c(5,1,2))
checkresiduals(final.arima.mod)
autoplot(forecast(final.arima.mod,h=12,level=95))
forecast(final.arima.mod,h=36,level=95)
Comparing tsCV RMSE accuracy of ARIMA (4,1,0) and ARIMA(5,1,2) models. (Disabling code to reduce Markdown knit time)
arima.fcast = function(x, h){forecast(Arima(x, order=c(4,1,0)), h=h)}
(arima.acc.tscv=sqrt(mean(tsCV(cape,arima.fcast, h=1)^2,na.rm=TRUE))) #0.705
arima.fcast = function(x, h){forecast(Arima(x, order=c(5,1,2)), h=h)}
(arima.acc.tscv=sqrt(mean(tsCV(cape,arima.fcast, h=1)^2,na.rm=TRUE))) #0.710
ggAcf(cape,lag.max = 120)+
labs(title="CAPE autocorrelation",subtitle = "120 months",x="Lag in months")
arfima.mod=arfima(BoxCox(cape,lambda=0))
## Note: only one starting point. Only one mode can be found -- this is now the default behavior.
## Beginning the fits with 1 starting values.
summary(arfima.mod)
##
## Call:
##
## arfima(z = BoxCox(cape, lambda = 0))
##
##
## Mode 1 Coefficients:
## Estimate Std. Error Th. Std. Err. z-value Pr(>|z|)
## d.f 4.99527e-01 6.32297e-07 1.90053e-02 7.90020e+05 < 2e-16 ***
## Fitted mean 3.19372e+00 1.33342e+00 NA 2.39514e+00 0.016614 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma^2 estimated as 0.00531642; Log-likelihood = 4403.49; AIC = -8800.99; BIC = -8784.7
##
## Numerical Correlations of Coefficients:
## d.f Fitted mean
## d.f 1.00 0.00
## Fitted mean 0.00 1.00
##
## Theoretical Correlations of Coefficients:
## d.f
## d.f 1.00
##
## Expected Fisher Information Matrix of Coefficients:
## d.f
## d.f 1.65
arima.fcast=function(x, h){forecast(Arima(x,order=c(0,1,1),lambda=0), h=h)}
arima.acc.tscv=sqrt(mean(tsCV(cape.1980.to.2020,arima.fcast, h=1)^2,na.rm=TRUE)) #0.796
arfima.fcast=function(x, h){forecast(arfima(x,lambda=0), h=h)}
arfima.acc.tscv=sqrt(mean(tsCV(cape.1980.to.2020,arfima.fcast, h=1)^2,na.rm=TRUE)) #0.944
tsCV error of ARIMA model is 0.796, and tsCV error of ARFIMA is 0.944.
arfima.mod1=arfima(BoxCox(cape.1881.to.1994,lambda=0));summary(arfima.mod1)
## Note: only one starting point. Only one mode can be found -- this is now the default behavior.
## Beginning the fits with 1 starting values.
##
## Call:
##
## arfima(z = BoxCox(cape.1881.to.1994, lambda = 0))
##
##
## Mode 1 Coefficients:
## Estimate Std. Error Th. Std. Err. z-value Pr(>|z|)
## d.f 4.99762e-01 6.32376e-07 2.10737e-02 7.90293e+05 < 2e-16 ***
## Fitted mean 3.12032e+00 1.88759e+00 NA 1.65307e+00 0.098317 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma^2 estimated as 0.00533813; Log-likelihood = 3578.18; AIC = -7150.37; BIC = -7134.7
##
## Numerical Correlations of Coefficients:
## d.f Fitted mean
## d.f 1.00 0.00
## Fitted mean 0.00 1.00
##
## Theoretical Correlations of Coefficients:
## d.f
## d.f 1.00
##
## Expected Fisher Information Matrix of Coefficients:
## d.f
## d.f 1.64
arima.mod1=auto.arima(cape.1881.to.1994,lambda=0,stepwise=F,approximation=F,seasonal=F);summary(arima.mod1)
## Series: cape.1881.to.1994
## ARIMA(3,1,2)
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2
## 1.2757 -1.2676 0.2880 -0.9923 0.9640
## s.e. 0.0294 0.0302 0.0263 0.0158 0.0235
##
## sigma^2 estimated as 0.001643: log likelihood=2446.49
## AIC=-4880.98 AICc=-4880.91 BIC=-4849.65
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003270239 0.5994922 0.4093252 -0.07699067 2.848815 0.1888427
## ACF1
## Training set -0.002696864
arfima.mod2=arfima(BoxCox(cape.1881.to.2005,lambda=0));summary(arfima.mod2)
## Note: only one starting point. Only one mode can be found -- this is now the default behavior.
## Beginning the fits with 1 starting values.
##
## Call:
##
## arfima(z = BoxCox(cape.1881.to.2005, lambda = 0))
##
##
## Mode 1 Coefficients:
## Estimate Std. Error Th. Std. Err. z-value Pr(>|z|)
## d.f 4.99438e-01 6.32297e-07 2.01246e-02 7.89878e+05 < 2.22e-16 ***
## Fitted mean 6.66649e+00 1.25579e+00 NA 5.30861e+00 1.1046e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma^2 estimated as 0.0055453; Log-likelihood = 3895.4; AIC = -7784.8; BIC = -7768.85
##
## Numerical Correlations of Coefficients:
## d.f Fitted mean
## d.f 1.00 0.00
## Fitted mean 0.00 1.00
##
## Theoretical Correlations of Coefficients:
## d.f
## d.f 1.00
##
## Expected Fisher Information Matrix of Coefficients:
## d.f
## d.f 1.64
arima.mod2=auto.arima(cape.1881.to.2005,lambda=0,stepwise=F,approximation=F,seasonal=F);summary(arima.mod2)
## Series: cape.1881.to.2005
## ARIMA(4,1,1)
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1
## 0.9518 -0.2359 -0.0252 0.0971 -0.6666
## s.e. 0.0962 0.0448 0.0362 0.0261 0.0940
##
## sigma^2 estimated as 0.001613: log likelihood=2696.1
## AIC=-5380.2 AICc=-5380.14 BIC=-5348.32
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003363667 0.6612344 0.4454921 -0.06507784 2.832917 0.1892925
## ACF1
## Training set -0.02951227
autoplot(forecast(arima.mod1,h=60,level=95))+
labs(title="CAPE and forecast",subtitle="ARIMA(3,1,2) 60-month forecast",x=NULL,y="CAPE")+
autolayer(window(cape,start=1994,end=2000),color="black")
autoplot(forecast(arfima.mod1,h=60,level=95))+
labs(title="CAPE and forecast",subtitle="ARFIMA 60-month forecast",x=NULL,y="CAPE")+
autolayer(window(cape,start=1994,end=2000),color="black")
autoplot(forecast(arima.mod2,h=60,level=95))+
ggtitle("CAPE and forecast",subtitle="ARIMA(3,1,2) 60-month forecast")+
ylab("CAPE")+xlab("")+
autolayer(window(cape,start=2006,end=2010),color="black")
autoplot(forecast(arfima.mod2,h=60,level=95))+
ggtitle("CAPE and forecast",subtitle="ARFIMA 60-month forecast")+
ylab("CAPE")+xlab("")+
autolayer(window(cape,start=2006,end=2010),color="black")
ARIMA has slight edge in some cases but it’s close. Over the entire time series (1881 to March 2021), ARFIMA and ARIMA are equally accurate.
#1881 to 1994
arima.fcast=function(x, h){forecast(Arima(x,order=c(3,1,2),lambda=0), h=h)}
(arima.acc.tscv=sqrt(mean(tsCV(cape.1881.to.1994,arima.fcast, h=1)^2,na.rm=TRUE))) #0.606
arfima.fcast=function(x, h){forecast(arfima(x,lambda=0), h=h)}
(arfima.acc.tscv=sqrt(mean(tsCV(cape.1881.to.1994,arfima.fcast, h=1)^2,na.rm=TRUE))) #0.611
#1881 to 2006
arima.fcast=function(x, h){forecast(Arima(x,order=c(3,1,2),lambda=0), h=h)}
(arima.acc.tscv=sqrt(mean(tsCV(cape.1881.to.2005,arima.fcast, h=1)^2,na.rm=TRUE))) #0.674
arfima.fcast=function(x, h){forecast(arfima(x,lambda=0), h=h)}
(arfima.acc.tscv=sqrt(mean(tsCV(cape.1881.to.2005,arfima.fcast, h=1)^2,na.rm=TRUE))) #0.673
#all cape
arfima.fcast=function(x, h){forecast(arfima(x,lambda=0), h=h)}
(arfima.acc.tscv=sqrt(mean(tsCV(cape,arfima.fcast, h=1)^2,na.rm=TRUE))) #0.706