DP ratio
library(multDM)
lagseries <- function(x) c(NA, x[1:(length(x)-1)])
hline <- function(yloc, xloc=c(-9e99,9e99), ...) { for (i in 1:length(yloc)) { lines(xloc, c(yloc[i], yloc[i]), ...) } }
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
## to perfectly replicate GW, use ds <- subset(dsraw, dsraw$yyyy<=2005)
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagdp <- lagseries(sp500d12)/lagseries(sp500index)
#lagdp <- log(sp500d12) - log(sp500index)
})
ds <- subset(ds, ds$yyyy>=1872, select=c("yyyy", "logeqp", "lagdp"))
print(summary(ds))
## yyyy logeqp lagdp
## Min. :1872 Min. :0.5587 Min. :0.01136
## 1st Qu.:1909 1st Qu.:0.9569 1st Qu.:0.03127
## Median :1946 Median :1.0698 Median :0.04280
## Mean :1946 Mean :1.0703 Mean :0.04327
## 3rd Qu.:1983 3rd Qu.:1.1929 3rd Qu.:0.05327
## Max. :2020 Max. :1.5269 Max. :0.10147
is.xyresid <- residuals(lm( logeqp ~ lagdp, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagdp, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagdp, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53048 -0.11329 0.00273 0.12528 0.42973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0307 0.0405 25.447 <2e-16 ***
## lagdp 0.9155 0.8678 1.055 0.293
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1853 on 147 degrees of freedom
## Multiple R-squared: 0.007514, Adjusted R-squared: 0.0007627
## F-statistic: 1.113 on 1 and 147 DF, p-value: 0.2932
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagdp, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagdp, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1, c = TRUE)
#resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )
text( 1890, 0.16, paste0("Dependent: Equity Premium\nIndependent: Lagged D/P\nData: 1872 (1892) to ", lyr), cex=0.85, pos=4)
text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
EP ratio
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- (1.0 + spret - tbill/100.0)
lagep <- log(lagseries(sp500e12/sp500index))
})
ds <- subset(ds, ds$yyyy>=1872, select=c("yyyy", "logeqp", "lagep"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagep, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagep, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagep, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50796 -0.11255 0.00406 0.12117 0.46303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.19523 0.10911 10.954 <2e-16 ***
## lagep 0.04657 0.04028 1.156 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1852 on 147 degrees of freedom
## Multiple R-squared: 0.009013, Adjusted R-squared: 0.002271
## F-statistic: 1.337 on 1 and 147 DF, p-value: 0.2495
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagep, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagep, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagep[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )
text( 1890, 0.16, paste0("Dependent: Equity Premium\nIndependent: Lagged E/P\nData: 1872 to ", lyr), cex=0.85, pos=4)
text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
DY ratio
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
})
ds <- subset(ds, ds$yyyy>=1873, select=c("yyyy", "logeqp", "dy"))
print(summary(ds))
## yyyy logeqp dy
## Min. :1873 Min. :0.5587 Min. :0.01107
## 1st Qu.:1910 1st Qu.:0.9512 1st Qu.:0.03327
## Median :1946 Median :1.0714 Median :0.04514
## Mean :1946 Mean :1.0704 Mean :0.04445
## 3rd Qu.:1983 3rd Qu.:1.1939 3rd Qu.:0.05532
## Max. :2020 Max. :1.5269 Max. :0.08771
is.xyresid <- residuals(lm( logeqp ~ dy, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ dy, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ dy, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51295 -0.11430 0.00255 0.12425 0.43901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.02506 0.04589 22.337 <2e-16 ***
## dy 1.01946 0.97356 1.047 0.297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.186 on 146 degrees of freedom
## Multiple R-squared: 0.007454, Adjusted R-squared: 0.0006561
## F-statistic: 1.097 on 1 and 146 DF, p-value: 0.2968
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ dy, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ dy, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$dy[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )
text( 1890, 0.16, paste0("Dependent: Log Equity Premium\nIndependent: Lagged D/Y\nData: 1872 to ", lyr), cex=0.85, pos=4)
text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
DE ratio
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
de <- log(lagseries(sp500d12)/lagseries(sp500e12))
})
ds <- subset(ds, ds$yyyy>=1873, select=c("yyyy", "logeqp", "de"))
print(summary(ds))
## yyyy logeqp de
## Min. :1873 Min. :0.5587 Min. :-1.2247
## 1st Qu.:1910 1st Qu.:0.9512 1st Qu.:-0.7290
## Median :1946 Median :1.0714 Median :-0.5556
## Mean :1946 Mean :1.0704 Mean :-0.5494
## 3rd Qu.:1983 3rd Qu.:1.1939 3rd Qu.:-0.3744
## Max. :2020 Max. :1.5269 Max. : 0.6459
is.xyresid <- residuals(lm( logeqp ~ de, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ de, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ de, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51376 -0.11897 0.00094 0.12422 0.45371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.072410 0.030601 35.045 <2e-16 ***
## de 0.003702 0.048189 0.077 0.939
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1866 on 146 degrees of freedom
## Multiple R-squared: 4.043e-05, Adjusted R-squared: -0.006809
## F-statistic: 0.005903 on 1 and 146 DF, p-value: 0.9389
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ de, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ de, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$de[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.5,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy [ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement [ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1890, 0.16, paste0("Independent: Lagged Dividend Payout (DE)\nData: 1872 to ", lyr), cex=0.85, pos=4)
text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
SVAR
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(svar)
})
ds <- subset(ds, ds$yyyy>=1886, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5327 -0.1141 0.0113 0.1208 0.4364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.060488 0.020087 52.794 <2e-16 ***
## lagterm 0.004685 0.004031 1.162 0.247
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1868 on 133 degrees of freedom
## Multiple R-squared: 0.01005, Adjusted R-squared: 0.002608
## F-statistic: 1.35 on 1 and 133 DF, p-value: 0.2473
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos= oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-1,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )
})
text( 1900, -0.56, paste0("Stock Variance\nSVAR"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
TBL
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(tbill)
})
ds <- subset(ds, ds$yyyy>=1921, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55010 -0.12823 0.02327 0.13960 0.40385
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.123395 0.028958 38.794 <2e-16 ***
## lagterm -0.009869 0.006377 -1.548 0.125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.19 on 98 degrees of freedom
## Multiple R-squared: 0.02386, Adjusted R-squared: 0.0139
## F-statistic: 2.395 on 1 and 98 DF, p-value: 0.1249
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Treasury Bill rates\nTBL"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
BKMK ratio
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(bkmk)
})
ds <- subset(ds, ds$yyyy>=1922, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53136 -0.12383 0.00069 0.13873 0.38279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0144333 0.0453528 22.368 <2e-16 ***
## lagterm 0.0013633 0.0007476 1.824 0.0713 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.19 on 97 degrees of freedom
## Multiple R-squared: 0.03315, Adjusted R-squared: 0.02318
## F-statistic: 3.326 on 1 and 97 DF, p-value: 0.07129
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Book to Market\nB/M"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
NTIS
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(ntis)
})
ds <- subset(ds, ds$yyyy>=1927, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54429 -0.11172 0.00177 0.10889 0.42792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.112192 0.023145 48.052 <2e-16 ***
## lagterm -0.015854 0.007191 -2.205 0.03 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1914 on 92 degrees of freedom
## Multiple R-squared: 0.05018, Adjusted R-squared: 0.03985
## F-statistic: 4.86 on 1 and 92 DF, p-value: 0.02998
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Net Equity Expansion\nNTIS"), cex=0.85, pos=4)
text( 1980, 0.10, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
EQIS
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(eqis)
})
ds <- subset(ds, ds$yyyy>=1928, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50136 -0.12464 0.01354 0.12754 0.42688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.173437 0.039187 29.944 < 2e-16 ***
## lagterm -0.004879 0.001830 -2.666 0.00907 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1885 on 91 degrees of freedom
## Multiple R-squared: 0.07247, Adjusted R-squared: 0.06228
## F-statistic: 7.11 on 1 and 91 DF, p-value: 0.009071
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1950, 0.16, paste0("Percent Equity Issuing \nEQIS"), cex=0.85, pos=4)
text( 1980, 0.08, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
IK
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(ik)
})
ds <- subset(ds, ds$yyyy>=1948, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42138 -0.10693 0.01645 0.11739 0.38294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.59741 0.19454 8.211 6.78e-12 ***
## lagterm -0.14488 0.05478 -2.645 0.0101 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1627 on 71 degrees of freedom
## Multiple R-squared: 0.0897, Adjusted R-squared: 0.07688
## F-statistic: 6.996 on 1 and 71 DF, p-value: 0.01005
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
#lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1980, -0.16, paste0("Investment to Capital\nIK"), cex=0.85, pos=4)
text( 1980, 0.16, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
LTY
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(ltyld10)
})
ds <- subset(ds, ds$yyyy>=1920, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54069 -0.11394 0.02043 0.14528 0.42861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.125341 0.040184 28.005 <2e-16 ***
## lagterm -0.008112 0.007341 -1.105 0.272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1929 on 99 degrees of freedom
## Multiple R-squared: 0.01219, Adjusted R-squared: 0.002208
## F-statistic: 1.221 on 1 and 99 DF, p-value: 0.2718
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Long Term Yield\nLTY"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
LTR
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(ltrate)
})
ds <- subset(ds, ds$yyyy>=1926, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52397 -0.12432 0.01557 0.12871 0.42050
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.071777 0.023363 45.874 <2e-16 ***
## lagterm 0.002346 0.002052 1.143 0.256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.194 on 93 degrees of freedom
## Multiple R-squared: 0.01386, Adjusted R-squared: 0.003254
## F-statistic: 1.307 on 1 and 93 DF, p-value: 0.2559
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.3,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Long Term Rate of Return\nltr"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
TMS
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(ltyld10)-lagseries(tbill)
})
ds <- subset(ds, ds$yyyy>=1921, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5383 -0.1268 0.0111 0.1463 0.4197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.05947 0.02891 36.647 <2e-16 ***
## lagterm 0.02180 0.01575 1.384 0.17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1904 on 98 degrees of freedom
## Multiple R-squared: 0.01917, Adjusted R-squared: 0.009162
## F-statistic: 1.915 on 1 and 98 DF, p-value: 0.1695
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Term Spread\nTMS"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
DFY
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(aaa)-lagseries(baa)
})
ds <- subset(ds, ds$yyyy>=1920, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54260 -0.10926 0.02003 0.13886 0.42433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.06688 0.03665 29.109 <2e-16 ***
## lagterm -0.01571 0.02519 -0.624 0.534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1937 on 99 degrees of freedom
## Multiple R-squared: 0.003916, Adjusted R-squared: -0.006145
## F-statistic: 0.3892 on 1 and 99 DF, p-value: 0.5341
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Default Yield Spread\nDFY"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
DFR
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(corprate)-lagseries(ltrate)
})
ds <- subset(ds, ds$yyyy>=1927, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51931 -0.10820 0.02046 0.13359 0.42455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.086660 0.020308 53.509 <2e-16 ***
## lagterm -0.002594 0.004485 -0.578 0.564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.196 on 92 degrees of freedom
## Multiple R-squared: 0.003622, Adjusted R-squared: -0.007208
## F-statistic: 0.3345 on 1 and 92 DF, p-value: 0.5645
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1945, 0.16, paste0("Default Return Spread\nDFR"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
INFL
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1915, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55940 -0.11452 0.02198 0.14502 0.40533
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.097495 0.022615 48.530 <2e-16 ***
## lagterm -0.003393 0.003915 -0.867 0.388
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1937 on 104 degrees of freedom
## Multiple R-squared: 0.007171, Adjusted R-squared: -0.002375
## F-statistic: 0.7512 on 1 and 104 DF, p-value: 0.3881
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1935, 0.16, paste0("Inflation\nINFL"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
BKMK ratio (1932)
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
lagterm = lagseries(bkmk)
})
ds <- subset(ds, ds$yyyy>=1932, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44895 -0.11680 0.00002 0.12753 0.38307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0160779 0.0450518 22.554 <2e-16 ***
## lagterm 0.0013379 0.0007249 1.846 0.0683 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.181 on 87 degrees of freedom
## Multiple R-squared: 0.03768, Adjusted R-squared: 0.02662
## F-statistic: 3.407 on 1 and 87 DF, p-value: 0.06833
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1950, 0.16, paste0("Book to Market (1932)\nB/M"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
ALL
dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv") ## download the latest version
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
TMS <- lagseries(ltyld10)-lagseries(tbill)
DFY <- lagseries(aaa)-lagseries(baa)
DFR <- lagseries(corprate)-lagseries(ltrate)
lagdp <- lagseries(sp500d12)/lagseries(sp500index)
lagep <- log(lagseries(sp500e12/sp500index))
dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
de <- log(lagseries(sp500d12)/lagseries(sp500e12))
lsvar <- lagseries(svar)
ltbill <- lagseries(tbill)
lbkmk <- lagseries(bkmk)
lntis <- lagseries(ntis)
leqis <- lagseries(eqis)
lik <- lagseries(ik)
l_ltyld10 <- lagseries(ltyld10)
l_ltrate <- lagseries(ltrate)
linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1949)
#print(summary(ds))
is.xyresid <- residuals(lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ lagdp + lagep + dy + de + lsvar + ltbill +
## lbkmk + lntis + leqis + lik + l_ltyld10 + l_ltrate + linfl,
## data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38171 -0.10217 0.01447 0.09337 0.30326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0216699 1.0255564 1.971 0.0535 .
## lagdp 7.6829462 8.5038595 0.903 0.3700
## lagep 0.1983463 0.2593475 0.765 0.4475
## dy -4.8817527 3.8978616 -1.252 0.2154
## de 0.0501479 0.2228055 0.225 0.8227
## lsvar 0.0141522 0.0104446 1.355 0.1807
## ltbill -0.0021745 0.0211509 -0.103 0.9185
## lbkmk -0.0009935 0.0024591 -0.404 0.6877
## lntis 0.0089132 0.0122783 0.726 0.4708
## leqis -0.0076170 0.0035953 -2.119 0.0384 *
## lik -0.0740755 0.0848877 -0.873 0.3865
## l_ltyld10 0.0062999 0.0178112 0.354 0.7248
## l_ltrate 0.0012228 0.0018814 0.650 0.5183
## linfl -0.0160571 0.0130514 -1.230 0.2236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.155 on 58 degrees of freedom
## Multiple R-squared: 0.3246, Adjusted R-squared: 0.1733
## F-statistic: 2.145 on 13 and 58 DF, p-value: 0.02444
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]+oos.lmcoef[3]*ds$lagep[i+1]+oos.lmcoef[4]*ds$dy[i+1]+oos.lmcoef[5]*ds$de[i+1]+oos.lmcoef[6]*ds$lsvar[i+1]+oos.lmcoef[7]*ds$ltbill[i+1]+oos.lmcoef[8]*ds$lbkmk[i+1]+oos.lmcoef[9]*ds$lntis[i+1]+oos.lmcoef[10]*ds$leqis[i+1]+oos.lmcoef[11]*ds$lik[i+1]+oos.lmcoef[12]*ds$l_ltyld10[i+1]+oos.lmcoef[13]*ds$l_ltrate[i+1]+oos.lmcoef[14]*ds$linfl[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-1.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
#lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1967, 0.20, paste0("All\nALL"), cex=0.85, pos=4)
text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
MS
library(leaps)
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
TMS <- lagseries(ltyld10)-lagseries(tbill)
DFY <- lagseries(aaa)-lagseries(baa)
DFR <- lagseries(corprate)-lagseries(ltrate)
lagdp <- lagseries(sp500d12)/lagseries(sp500index)
lagep <- log(lagseries(sp500e12/sp500index))
dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
de <- log(lagseries(sp500d12)/lagseries(sp500e12))
lsvar <- lagseries(svar)
ltbill <- lagseries(tbill)
lbkmk <- lagseries(bkmk)
lntis <- lagseries(ntis)
leqis <- lagseries(eqis)
lik <- lagseries(ik)
l_ltyld10 <- lagseries(ltyld10)
l_ltrate <- lagseries(ltrate)
linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1935)
#print(summary(ds))
predictors_df <- ds[, !names(ds) %in% "logeqp" ]
estimators_df <- ds["logeqp"]
res.sum = summary(regsubsets(logeqp ~ TMS+DFY+DFR+lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, method="seqrep", data=ds, nvmax=16))
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 1 linear dependencies found
## Reordering variables and trying again:
best.fits = data.frame(
Adj.R2 = which.max(res.sum$adjr2),
CP = which.min(res.sum$cp),
BIC = which.min(res.sum$bic)
)
res.sum
## Subset selection object
## Call: regsubsets.formula(logeqp ~ TMS + DFY + DFR + lagdp + lagep +
## dy + de + lsvar + ltbill + lbkmk + lntis + leqis + lik +
## l_ltyld10 + l_ltrate + linfl, method = "seqrep", data = ds,
## nvmax = 16)
## 16 Variables (and intercept)
## Forced in Forced out
## TMS FALSE FALSE
## DFY FALSE FALSE
## DFR FALSE FALSE
## lagdp FALSE FALSE
## lagep FALSE FALSE
## dy FALSE FALSE
## de FALSE FALSE
## lsvar FALSE FALSE
## ltbill FALSE FALSE
## lbkmk FALSE FALSE
## lntis FALSE FALSE
## leqis FALSE FALSE
## lik FALSE FALSE
## l_ltrate FALSE FALSE
## linfl FALSE FALSE
## l_ltyld10 FALSE FALSE
## 1 subsets of each size up to 15
## Selection Algorithm: 'sequential replacement'
## TMS DFY DFR lagdp lagep dy de lsvar ltbill lbkmk lntis leqis lik
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " " " "*"
## 2 ( 1 ) " " " " " " "*" " " " " " " " " " " " " " " "*" " "
## 3 ( 1 ) " " " " " " "*" " " " " " " " " " " " " " " "*" " "
## 4 ( 1 ) " " "*" " " "*" " " " " " " " " " " " " " " "*" " "
## 5 ( 1 ) " " "*" " " " " "*" " " " " " " " " " " "*" "*" " "
## 6 ( 1 ) "*" "*" "*" "*" "*" "*" " " " " " " " " " " " " " "
## 7 ( 1 ) " " "*" " " "*" "*" "*" " " " " " " " " "*" "*" " "
## 8 ( 1 ) " " "*" " " "*" "*" "*" " " " " " " "*" "*" "*" " "
## 9 ( 1 ) " " "*" "*" "*" "*" "*" " " " " " " "*" "*" "*" " "
## 10 ( 1 ) " " "*" "*" "*" "*" "*" "*" " " " " "*" "*" "*" " "
## 11 ( 1 ) " " "*" "*" "*" "*" "*" "*" " " " " "*" "*" "*" " "
## 12 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" " "
## 13 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" " "
## 14 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## l_ltyld10 l_ltrate linfl
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) " " " " " "
## 3 ( 1 ) " " " " "*"
## 4 ( 1 ) " " " " "*"
## 5 ( 1 ) " " " " "*"
## 6 ( 1 ) " " " " " "
## 7 ( 1 ) " " " " "*"
## 8 ( 1 ) " " " " "*"
## 9 ( 1 ) " " " " "*"
## 10 ( 1 ) " " " " "*"
## 11 ( 1 ) "*" " " "*"
## 12 ( 1 ) "*" " " "*"
## 13 ( 1 ) "*" "*" "*"
## 14 ( 1 ) "*" "*" "*"
## 15 ( 1 ) " " "*" "*"
best.fits
is.xyresid <- residuals(lm( logeqp ~ DFY+lagep+leqis+lntis+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ DFY+lagep+leqis+lntis+linfl, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ DFY + lagep + leqis + lntis + linfl, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48905 -0.10585 0.01762 0.10969 0.36488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.500979 0.173252 8.664 4.03e-13 ***
## DFY -0.060783 0.035907 -1.693 0.0944 .
## lagep 0.136010 0.052462 2.593 0.0113 *
## leqis -0.004805 0.002867 -1.676 0.0976 .
## lntis 0.004824 0.012387 0.389 0.6980
## linfl -0.007490 0.006545 -1.145 0.2558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1734 on 80 degrees of freedom
## Multiple R-squared: 0.1177, Adjusted R-squared: 0.06259
## F-statistic: 2.135 on 5 and 80 DF, p-value: 0.06963
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagep+leqis+lntis+linfl+DFY, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagep+leqis+lntis+linfl+DFY, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagep[i+1]+oos.lmcoef[3]*ds$leqis[i+1]+oos.lmcoef[4]*ds$lntis[i+1]+oos.lmcoef[5]*ds$linfl[i+1]+oos.lmcoef[6]*ds$DFY[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1955, 0.16, paste0("Model Selection\nMS"), cex=0.85, pos=4)
text( 1980, -0.15, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )
MS 2
library(leaps)
ds <- within(dsraw, {
spret <- (sp500index+sp500d12)/lagseries(sp500index)-1
logeqp <- 1.0 + spret - tbill/100.0
TMS <- lagseries(ltyld10)-lagseries(tbill)
DFY <- lagseries(aaa)-lagseries(baa)
DFR <- lagseries(corprate)-lagseries(ltrate)
lagdp <- lagseries(sp500d12)/lagseries(sp500index)
lagep <- log(lagseries(sp500e12/sp500index))
dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
de <- log(lagseries(sp500d12)/lagseries(sp500e12))
lsvar <- lagseries(svar)
ltbill <- lagseries(tbill)
lbkmk <- lagseries(bkmk)
lntis <- lagseries(ntis)
leqis <- lagseries(eqis)
lik <- lagseries(ik)
l_ltyld10 <- lagseries(ltyld10)
l_ltrate <- lagseries(ltrate)
linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1935)
#print(summary(ds))
predictors_df <- ds[, !names(ds) %in% "logeqp" ]
estimators_df <- ds["logeqp"]
res.sum = summary(regsubsets(logeqp ~ TMS+DFY+DFR+lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, method="seqrep", data=ds, nvmax=16))
## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 1 linear dependencies found
## Reordering variables and trying again:
best.fits = data.frame(
Adj.R2 = which.max(res.sum$adjr2),
CP = which.min(res.sum$cp),
BIC = which.min(res.sum$bic)
)
res.sum
## Subset selection object
## Call: regsubsets.formula(logeqp ~ TMS + DFY + DFR + lagdp + lagep +
## dy + de + lsvar + ltbill + lbkmk + lntis + leqis + lik +
## l_ltyld10 + l_ltrate + linfl, method = "seqrep", data = ds,
## nvmax = 16)
## 16 Variables (and intercept)
## Forced in Forced out
## TMS FALSE FALSE
## DFY FALSE FALSE
## DFR FALSE FALSE
## lagdp FALSE FALSE
## lagep FALSE FALSE
## dy FALSE FALSE
## de FALSE FALSE
## lsvar FALSE FALSE
## ltbill FALSE FALSE
## lbkmk FALSE FALSE
## lntis FALSE FALSE
## leqis FALSE FALSE
## lik FALSE FALSE
## l_ltrate FALSE FALSE
## linfl FALSE FALSE
## l_ltyld10 FALSE FALSE
## 1 subsets of each size up to 15
## Selection Algorithm: 'sequential replacement'
## TMS DFY DFR lagdp lagep dy de lsvar ltbill lbkmk lntis leqis lik
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " " " "*"
## 2 ( 1 ) " " " " " " "*" " " " " " " " " " " " " " " "*" " "
## 3 ( 1 ) " " " " " " "*" " " " " " " " " " " " " " " "*" " "
## 4 ( 1 ) " " "*" " " "*" " " " " " " " " " " " " " " "*" " "
## 5 ( 1 ) " " "*" " " " " "*" " " " " " " " " " " "*" "*" " "
## 6 ( 1 ) "*" "*" "*" "*" "*" "*" " " " " " " " " " " " " " "
## 7 ( 1 ) " " "*" " " "*" "*" "*" " " " " " " " " "*" "*" " "
## 8 ( 1 ) " " "*" " " "*" "*" "*" " " " " " " "*" "*" "*" " "
## 9 ( 1 ) " " "*" "*" "*" "*" "*" " " " " " " "*" "*" "*" " "
## 10 ( 1 ) " " "*" "*" "*" "*" "*" "*" " " " " "*" "*" "*" " "
## 11 ( 1 ) " " "*" "*" "*" "*" "*" "*" " " " " "*" "*" "*" " "
## 12 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" " "
## 13 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" " "
## 14 ( 1 ) " " "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## l_ltyld10 l_ltrate linfl
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) " " " " " "
## 3 ( 1 ) " " " " "*"
## 4 ( 1 ) " " " " "*"
## 5 ( 1 ) " " " " "*"
## 6 ( 1 ) " " " " " "
## 7 ( 1 ) " " " " "*"
## 8 ( 1 ) " " " " "*"
## 9 ( 1 ) " " " " "*"
## 10 ( 1 ) " " " " "*"
## 11 ( 1 ) "*" " " "*"
## 12 ( 1 ) "*" " " "*"
## 13 ( 1 ) "*" "*" "*"
## 14 ( 1 ) "*" "*" "*"
## 15 ( 1 ) " " "*" "*"
best.fits
is.xyresid <- residuals(lm( logeqp ~ DFY+lagdp+leqis+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)
isreg <- summary( lm( logeqp ~ DFY+lagdp+leqis+linfl, data=ds ) )
print(isreg)
##
## Call:
## lm(formula = logeqp ~ DFY + lagdp + leqis + linfl, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5000 -0.1101 0.0286 0.1205 0.3365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.023396 0.059714 17.138 < 2e-16 ***
## DFY -0.026642 0.034984 -0.762 0.44855
## lagdp 3.864531 1.365873 2.829 0.00588 **
## leqis -0.004815 0.002548 -1.890 0.06239 .
## linfl -0.004899 0.006103 -0.803 0.42450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1714 on 81 degrees of freedom
## Multiple R-squared: 0.1272, Adjusted R-squared: 0.08409
## F-statistic: 2.951 on 4 and 81 DF, p-value: 0.0249
tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)
firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
oos.meanpred <- mean( ds$logeqp[1:i] )
oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred
oos.lmcoef <- coef(lm( logeqp ~ lagdp+leqis+linfl+DFY, data=ds[1:i,]))
oosreg <- summary( lm( logeqp ~ lagdp+leqis+linfl+DFY, data=ds[1:i,]) )
oos.sterr[i+1] <- oosreg$sigma
oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]+oos.lmcoef[3]*ds$leqis[i+1]+oos.lmcoef[4]*ds$linfl[i+1]+oos.lmcoef[5]*ds$DFY[i+1]
oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))
plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds) ## done
plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )
plotwork <- plotwork[complete.cases(plotwork),]
#remove the above or comment it to show the full IS graph
rownames(plotwork) <- NULL
plotwork <- within(plotwork, {
is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})
annotateloc <- 35
with(plotwork, {
plot( yyyy, is.improvement, ylim=c(-0.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
lines( yyyy, oos.improvement, lwd=8, col="white" )
lines( yyyy, oos.improvement, lwd=3, col="blue" )
lines( yyyy, is.improvement, lwd=5, col="white" )
lines( yyyy, is.improvement, lty=1 )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
})
hline(0, lty=2)
text( 1955, 0.16, paste0("Model Selection\nMS"), cex=0.85, pos=4)
text( 1980, -0.15, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"), cex=0.85, pos=4 )