Welch & Goyal Replication

DP ratio

library(multDM)
lagseries <- function(x) c(NA, x[1:(length(x)-1)])
hline <- function(yloc, xloc=c(-9e99,9e99), ...) { for (i in 1:length(yloc)) { lines(xloc, c(yloc[i], yloc[i]), ...) } }

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version

## to perfectly replicate GW, use ds <- subset(dsraw, dsraw$yyyy<=2005)

ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagdp <- lagseries(sp500d12)/lagseries(sp500index)
    #lagdp <- log(sp500d12) - log(sp500index)
})
ds <- subset(ds, ds$yyyy>=1872, select=c("yyyy", "logeqp", "lagdp"))
print(summary(ds))

##       yyyy          logeqp           lagdp        
##  Min.   :1872   Min.   :0.5587   Min.   :0.01136  
##  1st Qu.:1909   1st Qu.:0.9569   1st Qu.:0.03127  
##  Median :1946   Median :1.0698   Median :0.04280  
##  Mean   :1946   Mean   :1.0703   Mean   :0.04327  
##  3rd Qu.:1983   3rd Qu.:1.1929   3rd Qu.:0.05327  
##  Max.   :2020   Max.   :1.5269   Max.   :0.10147

is.xyresid <- residuals(lm( logeqp ~ lagdp, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagdp, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagdp, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53048 -0.11329  0.00273  0.12528  0.42973 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.0307     0.0405  25.447   <2e-16 ***
## lagdp         0.9155     0.8678   1.055    0.293    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1853 on 147 degrees of freedom
## Multiple R-squared:  0.007514,   Adjusted R-squared:  0.0007627 
## F-statistic: 1.113 on 1 and 147 DF,  p-value: 0.2932

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagdp, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagdp, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1, c = TRUE)
#resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )
    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )
    
})

hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )

text( 1890, 0.16, paste0("Dependent: Equity Premium\nIndependent: Lagged D/P\nData: 1872 (1892) to ", lyr), cex=0.85, pos=4)

text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

EP ratio

ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  (1.0 + spret - tbill/100.0)
    lagep <- log(lagseries(sp500e12/sp500index))
})
ds <- subset(ds, ds$yyyy>=1872, select=c("yyyy", "logeqp", "lagep"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagep, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagep, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagep, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50796 -0.11255  0.00406  0.12117  0.46303 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.19523    0.10911  10.954   <2e-16 ***
## lagep        0.04657    0.04028   1.156    0.249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1852 on 147 degrees of freedom
## Multiple R-squared:  0.009013,   Adjusted R-squared:  0.002271 
## F-statistic: 1.337 on 1 and 147 DF,  p-value: 0.2495

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))

for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagep, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagep, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagep[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )

text( 1890, 0.16, paste0("Dependent: Equity Premium\nIndependent: Lagged E/P\nData: 1872 to ", lyr), cex=0.85, pos=4)

text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

DY ratio

ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <- 1.0 + spret - tbill/100.0
    dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
})
ds <- subset(ds, ds$yyyy>=1873, select=c("yyyy", "logeqp", "dy"))
print(summary(ds))

##       yyyy          logeqp             dy         
##  Min.   :1873   Min.   :0.5587   Min.   :0.01107  
##  1st Qu.:1910   1st Qu.:0.9512   1st Qu.:0.03327  
##  Median :1946   Median :1.0714   Median :0.04514  
##  Mean   :1946   Mean   :1.0704   Mean   :0.04445  
##  3rd Qu.:1983   3rd Qu.:1.1939   3rd Qu.:0.05532  
##  Max.   :2020   Max.   :1.5269   Max.   :0.08771

is.xyresid <- residuals(lm( logeqp ~ dy, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ dy, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ dy, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51295 -0.11430  0.00255  0.12425  0.43901 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.02506    0.04589  22.337   <2e-16 ***
## dy           1.01946    0.97356   1.047    0.297    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.186 on 146 degrees of freedom
## Multiple R-squared:  0.007454,   Adjusted R-squared:  0.0006561 
## F-statistic: 1.097 on 1 and 146 DF,  p-value: 0.2968

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ dy, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ dy, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$dy[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)
arrows( 1890, -0.12, 1895, -0.08 ); text( 1909, -0.09, "Conditional Model\nPredicts better", cex=0.75 )
arrows( 1890, -0.13, 1895, -0.17 ); text( 1909, -0.16, "Prevailing Mean \nPredicts better", cex=0.75 )

text( 1890, 0.16, paste0("Dependent: Log Equity Premium\nIndependent: Lagged D/Y\nData: 1872 to ", lyr), cex=0.85, pos=4)

text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

DE ratio

ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <- 1.0 + spret - tbill/100.0
    de <- log(lagseries(sp500d12)/lagseries(sp500e12))
})
ds <- subset(ds, ds$yyyy>=1873, select=c("yyyy", "logeqp", "de"))
print(summary(ds))

##       yyyy          logeqp             de         
##  Min.   :1873   Min.   :0.5587   Min.   :-1.2247  
##  1st Qu.:1910   1st Qu.:0.9512   1st Qu.:-0.7290  
##  Median :1946   Median :1.0714   Median :-0.5556  
##  Mean   :1946   Mean   :1.0704   Mean   :-0.5494  
##  3rd Qu.:1983   3rd Qu.:1.1939   3rd Qu.:-0.3744  
##  Max.   :2020   Max.   :1.5269   Max.   : 0.6459

is.xyresid <- residuals(lm( logeqp ~ de, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ de, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ de, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51376 -0.11897  0.00094  0.12422  0.45371 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.072410   0.030601  35.045   <2e-16 ***
## de          0.003702   0.048189   0.077    0.939    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1866 on 146 degrees of freedom
## Multiple R-squared:  4.043e-05,  Adjusted R-squared:  -0.006809 
## F-statistic: 0.005903 on 1 and 146 DF,  p-value: 0.9389

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ de, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ de, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$de[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.5,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy [ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement [ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1890, 0.16, paste0("Independent: Lagged Dividend Payout (DE)\nData: 1872 to ", lyr), cex=0.85, pos=4)

text( 1985, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

SVAR

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(svar)
})
ds <- subset(ds, ds$yyyy>=1886, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5327 -0.1141  0.0113  0.1208  0.4364 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.060488   0.020087  52.794   <2e-16 ***
## lagterm     0.004685   0.004031   1.162    0.247    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1868 on 133 degrees of freedom
## Multiple R-squared:  0.01005,    Adjusted R-squared:  0.002608 
## F-statistic:  1.35 on 1 and 133 DF,  p-value: 0.2473

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1)
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos= oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-1,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )

})

text( 1900, -0.56, paste0("Stock Variance\nSVAR"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

TBL

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(tbill)
})
ds <- subset(ds, ds$yyyy>=1921, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55010 -0.12823  0.02327  0.13960  0.40385 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.123395   0.028958  38.794   <2e-16 ***
## lagterm     -0.009869   0.006377  -1.548    0.125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.19 on 98 degrees of freedom
## Multiple R-squared:  0.02386,    Adjusted R-squared:  0.0139 
## F-statistic: 2.395 on 1 and 98 DF,  p-value: 0.1249

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Treasury Bill rates\nTBL"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

BKMK ratio

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(bkmk)
})
ds <- subset(ds, ds$yyyy>=1922, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53136 -0.12383  0.00069  0.13873  0.38279 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0144333  0.0453528  22.368   <2e-16 ***
## lagterm     0.0013633  0.0007476   1.824   0.0713 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.19 on 97 degrees of freedom
## Multiple R-squared:  0.03315,    Adjusted R-squared:  0.02318 
## F-statistic: 3.326 on 1 and 97 DF,  p-value: 0.07129

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Book to Market\nB/M"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

NTIS

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(ntis)
})
ds <- subset(ds, ds$yyyy>=1927, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54429 -0.11172  0.00177  0.10889  0.42792 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.112192   0.023145  48.052   <2e-16 ***
## lagterm     -0.015854   0.007191  -2.205     0.03 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1914 on 92 degrees of freedom
## Multiple R-squared:  0.05018,    Adjusted R-squared:  0.03985 
## F-statistic:  4.86 on 1 and 92 DF,  p-value: 0.02998

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)


text( 1945, 0.16, paste0("Net Equity Expansion\nNTIS"), cex=0.85, pos=4)

text( 1980, 0.10, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

EQIS

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(eqis)
})
ds <- subset(ds, ds$yyyy>=1928, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50136 -0.12464  0.01354  0.12754  0.42688 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.173437   0.039187  29.944  < 2e-16 ***
## lagterm     -0.004879   0.001830  -2.666  0.00907 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1885 on 91 degrees of freedom
## Multiple R-squared:  0.07247,    Adjusted R-squared:  0.06228 
## F-statistic:  7.11 on 1 and 91 DF,  p-value: 0.009071

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1950, 0.16, paste0("Percent Equity Issuing \nEQIS"), cex=0.85, pos=4)

text( 1980, 0.08, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(ik)
})
ds <- subset(ds, ds$yyyy>=1948, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42138 -0.10693  0.01645  0.11739  0.38294 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.59741    0.19454   8.211 6.78e-12 ***
## lagterm     -0.14488    0.05478  -2.645   0.0101 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1627 on 71 degrees of freedom
## Multiple R-squared:  0.0897, Adjusted R-squared:  0.07688 
## F-statistic: 6.996 on 1 and 71 DF,  p-value: 0.01005

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    #lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1980, -0.16, paste0("Investment to Capital\nIK"), cex=0.85, pos=4)

text( 1980, 0.16, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

LTY

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(ltyld10)
})
ds <- subset(ds, ds$yyyy>=1920, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54069 -0.11394  0.02043  0.14528  0.42861 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.125341   0.040184  28.005   <2e-16 ***
## lagterm     -0.008112   0.007341  -1.105    0.272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1929 on 99 degrees of freedom
## Multiple R-squared:  0.01219,    Adjusted R-squared:  0.002208 
## F-statistic: 1.221 on 1 and 99 DF,  p-value: 0.2718

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Long Term Yield\nLTY"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

LTR

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(ltrate)
})
ds <- subset(ds, ds$yyyy>=1926, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52397 -0.12432  0.01557  0.12871  0.42050 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.071777   0.023363  45.874   <2e-16 ***
## lagterm     0.002346   0.002052   1.143    0.256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.194 on 93 degrees of freedom
## Multiple R-squared:  0.01386,    Adjusted R-squared:  0.003254 
## F-statistic: 1.307 on 1 and 93 DF,  p-value: 0.2559

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.3,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Long Term Rate of Return\nltr"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

TMS

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(ltyld10)-lagseries(tbill)
})
ds <- subset(ds, ds$yyyy>=1921, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5383 -0.1268  0.0111  0.1463  0.4197 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.05947    0.02891  36.647   <2e-16 ***
## lagterm      0.02180    0.01575   1.384     0.17    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1904 on 98 degrees of freedom
## Multiple R-squared:  0.01917,    Adjusted R-squared:  0.009162 
## F-statistic: 1.915 on 1 and 98 DF,  p-value: 0.1695

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Term Spread\nTMS"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

DFY

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(aaa)-lagseries(baa)
})
ds <- subset(ds, ds$yyyy>=1920, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54260 -0.10926  0.02003  0.13886  0.42433 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.06688    0.03665  29.109   <2e-16 ***
## lagterm     -0.01571    0.02519  -0.624    0.534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1937 on 99 degrees of freedom
## Multiple R-squared:  0.003916,   Adjusted R-squared:  -0.006145 
## F-statistic: 0.3892 on 1 and 99 DF,  p-value: 0.5341

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Default Yield Spread\nDFY"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

DFR

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(corprate)-lagseries(ltrate)
})
ds <- subset(ds, ds$yyyy>=1927, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51931 -0.10820  0.02046  0.13359  0.42455 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.086660   0.020308  53.509   <2e-16 ***
## lagterm     -0.002594   0.004485  -0.578    0.564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.196 on 92 degrees of freedom
## Multiple R-squared:  0.003622,   Adjusted R-squared:  -0.007208 
## F-statistic: 0.3345 on 1 and 92 DF,  p-value: 0.5645

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1945, 0.16, paste0("Default Return Spread\nDFR"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

INFL

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1915, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55940 -0.11452  0.02198  0.14502  0.40533 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.097495   0.022615  48.530   <2e-16 ***
## lagterm     -0.003393   0.003915  -0.867    0.388    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1937 on 104 degrees of freedom
## Multiple R-squared:  0.007171,   Adjusted R-squared:  -0.002375 
## F-statistic: 0.7512 on 1 and 104 DF,  p-value: 0.3881

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1935, 0.16, paste0("Inflation\nINFL"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

BKMK ratio (1932)

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    lagterm = lagseries(bkmk)
})
ds <- subset(ds, ds$yyyy>=1932, select=c("yyyy", "logeqp", "lagterm"))
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagterm, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagterm, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagterm, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44895 -0.11680  0.00002  0.12753  0.38307 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.0160779  0.0450518  22.554   <2e-16 ***
## lagterm     0.0013379  0.0007249   1.846   0.0683 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.181 on 87 degrees of freedom
## Multiple R-squared:  0.03768,    Adjusted R-squared:  0.02662 
## F-statistic: 3.407 on 1 and 87 DF,  p-value: 0.06833

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagterm, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagterm, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagterm[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.2,0.2), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -1, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1950, 0.16, paste0("Book to Market (1932)\nB/M"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

ALL

dsraw <- read.csv("~/Downloads//Welch&GoyalReplication/goyal-welch-a.csv")  ## download the latest version
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    TMS <- lagseries(ltyld10)-lagseries(tbill)
    DFY <- lagseries(aaa)-lagseries(baa)
    DFR <- lagseries(corprate)-lagseries(ltrate)
    lagdp <- lagseries(sp500d12)/lagseries(sp500index)
    lagep <- log(lagseries(sp500e12/sp500index))
    dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
    de <- log(lagseries(sp500d12)/lagseries(sp500e12))
    lsvar <- lagseries(svar)
    ltbill <- lagseries(tbill)
    lbkmk <- lagseries(bkmk)
    lntis <- lagseries(ntis)
    leqis <- lagseries(eqis)
    lik <- lagseries(ik)
    l_ltyld10 <- lagseries(ltyld10)
    l_ltrate <- lagseries(ltrate)
    linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1949)
#print(summary(ds))

is.xyresid <- residuals(lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ lagdp + lagep + dy + de + lsvar + ltbill + 
##     lbkmk + lntis + leqis + lik + l_ltyld10 + l_ltrate + linfl, 
##     data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38171 -0.10217  0.01447  0.09337  0.30326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.0216699  1.0255564   1.971   0.0535 .
## lagdp        7.6829462  8.5038595   0.903   0.3700  
## lagep        0.1983463  0.2593475   0.765   0.4475  
## dy          -4.8817527  3.8978616  -1.252   0.2154  
## de           0.0501479  0.2228055   0.225   0.8227  
## lsvar        0.0141522  0.0104446   1.355   0.1807  
## ltbill      -0.0021745  0.0211509  -0.103   0.9185  
## lbkmk       -0.0009935  0.0024591  -0.404   0.6877  
## lntis        0.0089132  0.0122783   0.726   0.4708  
## leqis       -0.0076170  0.0035953  -2.119   0.0384 *
## lik         -0.0740755  0.0848877  -0.873   0.3865  
## l_ltyld10    0.0062999  0.0178112   0.354   0.7248  
## l_ltrate     0.0012228  0.0018814   0.650   0.5183  
## linfl       -0.0160571  0.0130514  -1.230   0.2236  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.155 on 58 degrees of freedom
## Multiple R-squared:  0.3246, Adjusted R-squared:  0.1733 
## F-statistic: 2.145 on 13 and 58 DF,  p-value: 0.02444

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]+oos.lmcoef[3]*ds$lagep[i+1]+oos.lmcoef[4]*ds$dy[i+1]+oos.lmcoef[5]*ds$de[i+1]+oos.lmcoef[6]*ds$lsvar[i+1]+oos.lmcoef[7]*ds$ltbill[i+1]+oos.lmcoef[8]*ds$lbkmk[i+1]+oos.lmcoef[9]*ds$lntis[i+1]+oos.lmcoef[10]*ds$leqis[i+1]+oos.lmcoef[11]*ds$lik[i+1]+oos.lmcoef[12]*ds$l_ltyld10[i+1]+oos.lmcoef[13]*ds$l_ltrate[i+1]+oos.lmcoef[14]*ds$linfl[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-1.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    #lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1967, 0.20, paste0("All\nALL"), cex=0.85, pos=4)

text( 1980, 0.13, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

library(leaps)
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    TMS <- lagseries(ltyld10)-lagseries(tbill)
    DFY <- lagseries(aaa)-lagseries(baa)
    DFR <- lagseries(corprate)-lagseries(ltrate)
    lagdp <- lagseries(sp500d12)/lagseries(sp500index)
    lagep <- log(lagseries(sp500e12/sp500index))
    dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
    de <- log(lagseries(sp500d12)/lagseries(sp500e12))
    lsvar <- lagseries(svar)
    ltbill <- lagseries(tbill)
    lbkmk <- lagseries(bkmk)
    lntis <- lagseries(ntis)
    leqis <- lagseries(eqis)
    lik <- lagseries(ik)
    l_ltyld10 <- lagseries(ltyld10)
    l_ltrate <- lagseries(ltrate)
    linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1935)
#print(summary(ds))
predictors_df <- ds[, !names(ds) %in% "logeqp" ]
estimators_df <- ds["logeqp"]

res.sum = summary(regsubsets(logeqp ~ TMS+DFY+DFR+lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, method="seqrep", data=ds, nvmax=16))

## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 1 linear dependencies found

## Reordering variables and trying again:

best.fits = data.frame(
  Adj.R2 = which.max(res.sum$adjr2),
  CP = which.min(res.sum$cp),
  BIC = which.min(res.sum$bic)
)
res.sum

## Subset selection object
## Call: regsubsets.formula(logeqp ~ TMS + DFY + DFR + lagdp + lagep + 
##     dy + de + lsvar + ltbill + lbkmk + lntis + leqis + lik + 
##     l_ltyld10 + l_ltrate + linfl, method = "seqrep", data = ds, 
##     nvmax = 16)
## 16 Variables  (and intercept)
##           Forced in Forced out
## TMS           FALSE      FALSE
## DFY           FALSE      FALSE
## DFR           FALSE      FALSE
## lagdp         FALSE      FALSE
## lagep         FALSE      FALSE
## dy            FALSE      FALSE
## de            FALSE      FALSE
## lsvar         FALSE      FALSE
## ltbill        FALSE      FALSE
## lbkmk         FALSE      FALSE
## lntis         FALSE      FALSE
## leqis         FALSE      FALSE
## lik           FALSE      FALSE
## l_ltrate      FALSE      FALSE
## linfl         FALSE      FALSE
## l_ltyld10     FALSE      FALSE
## 1 subsets of each size up to 15
## Selection Algorithm: 'sequential replacement'
##           TMS DFY DFR lagdp lagep dy  de  lsvar ltbill lbkmk lntis leqis lik
## 1  ( 1 )  " " " " " " " "   " "   " " " " " "   " "    " "   " "   " "   "*"
## 2  ( 1 )  " " " " " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 3  ( 1 )  " " " " " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 4  ( 1 )  " " "*" " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 5  ( 1 )  " " "*" " " " "   "*"   " " " " " "   " "    " "   "*"   "*"   " "
## 6  ( 1 )  "*" "*" "*" "*"   "*"   "*" " " " "   " "    " "   " "   " "   " "
## 7  ( 1 )  " " "*" " " "*"   "*"   "*" " " " "   " "    " "   "*"   "*"   " "
## 8  ( 1 )  " " "*" " " "*"   "*"   "*" " " " "   " "    "*"   "*"   "*"   " "
## 9  ( 1 )  " " "*" "*" "*"   "*"   "*" " " " "   " "    "*"   "*"   "*"   " "
## 10  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" " "   " "    "*"   "*"   "*"   " "
## 11  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" " "   " "    "*"   "*"   "*"   " "
## 12  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   " "    "*"   "*"   "*"   " "
## 13  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   " "    "*"   "*"   "*"   " "
## 14  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   "*"    "*"   "*"   "*"   " "
## 15  ( 1 ) "*" "*" "*" "*"   "*"   "*" "*" "*"   "*"    "*"   "*"   "*"   "*"
##           l_ltyld10 l_ltrate linfl
## 1  ( 1 )  " "       " "      " "  
## 2  ( 1 )  " "       " "      " "  
## 3  ( 1 )  " "       " "      "*"  
## 4  ( 1 )  " "       " "      "*"  
## 5  ( 1 )  " "       " "      "*"  
## 6  ( 1 )  " "       " "      " "  
## 7  ( 1 )  " "       " "      "*"  
## 8  ( 1 )  " "       " "      "*"  
## 9  ( 1 )  " "       " "      "*"  
## 10  ( 1 ) " "       " "      "*"  
## 11  ( 1 ) "*"       " "      "*"  
## 12  ( 1 ) "*"       " "      "*"  
## 13  ( 1 ) "*"       "*"      "*"  
## 14  ( 1 ) "*"       "*"      "*"  
## 15  ( 1 ) " "       "*"      "*"

best.fits

is.xyresid <- residuals(lm( logeqp ~ DFY+lagep+leqis+lntis+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ DFY+lagep+leqis+lntis+linfl, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ DFY + lagep + leqis + lntis + linfl, data = ds)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48905 -0.10585  0.01762  0.10969  0.36488 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.500979   0.173252   8.664 4.03e-13 ***
## DFY         -0.060783   0.035907  -1.693   0.0944 .  
## lagep        0.136010   0.052462   2.593   0.0113 *  
## leqis       -0.004805   0.002867  -1.676   0.0976 .  
## lntis        0.004824   0.012387   0.389   0.6980    
## linfl       -0.007490   0.006545  -1.145   0.2558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1734 on 80 degrees of freedom
## Multiple R-squared:  0.1177, Adjusted R-squared:  0.06259 
## F-statistic: 2.135 on 5 and 80 DF,  p-value: 0.06963

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagep+leqis+lntis+linfl+DFY, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagep+leqis+lntis+linfl+DFY, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagep[i+1]+oos.lmcoef[3]*ds$leqis[i+1]+oos.lmcoef[4]*ds$lntis[i+1]+oos.lmcoef[5]*ds$linfl[i+1]+oos.lmcoef[6]*ds$DFY[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1955, 0.16, paste0("Model Selection\nMS"), cex=0.85, pos=4)

text( 1980, -0.15, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )

MS 2

library(leaps)
ds <- within(dsraw, {
    spret <-  (sp500index+sp500d12)/lagseries(sp500index)-1
    logeqp <-  1.0 + spret - tbill/100.0
    TMS <- lagseries(ltyld10)-lagseries(tbill)
    DFY <- lagseries(aaa)-lagseries(baa)
    DFR <- lagseries(corprate)-lagseries(ltrate)
    lagdp <- lagseries(sp500d12)/lagseries(sp500index)
    lagep <- log(lagseries(sp500e12/sp500index))
    dy <- lagseries(sp500d12)/lagseries(lagseries(sp500index))
    de <- log(lagseries(sp500d12)/lagseries(sp500e12))
    lsvar <- lagseries(svar)
    ltbill <- lagseries(tbill)
    lbkmk <- lagseries(bkmk)
    lntis <- lagseries(ntis)
    leqis <- lagseries(eqis)
    lik <- lagseries(ik)
    l_ltyld10 <- lagseries(ltyld10)
    l_ltrate <- lagseries(ltrate)
    linfl <- lagseries(infl)
})
ds <- subset(ds, ds$yyyy>=1935)
#print(summary(ds))
predictors_df <- ds[, !names(ds) %in% "logeqp" ]
estimators_df <- ds["logeqp"]

res.sum = summary(regsubsets(logeqp ~ TMS+DFY+DFR+lagdp+lagep+dy+de+lsvar+ltbill+lbkmk+lntis+leqis+lik+l_ltyld10+l_ltrate+linfl, method="seqrep", data=ds, nvmax=16))

## Warning in leaps.setup(x, y, wt = wt, nbest = nbest, nvmax = nvmax, force.in =
## force.in, : 1 linear dependencies found

## Reordering variables and trying again:

best.fits = data.frame(
  Adj.R2 = which.max(res.sum$adjr2),
  CP = which.min(res.sum$cp),
  BIC = which.min(res.sum$bic)
)
res.sum

## Subset selection object
## Call: regsubsets.formula(logeqp ~ TMS + DFY + DFR + lagdp + lagep + 
##     dy + de + lsvar + ltbill + lbkmk + lntis + leqis + lik + 
##     l_ltyld10 + l_ltrate + linfl, method = "seqrep", data = ds, 
##     nvmax = 16)
## 16 Variables  (and intercept)
##           Forced in Forced out
## TMS           FALSE      FALSE
## DFY           FALSE      FALSE
## DFR           FALSE      FALSE
## lagdp         FALSE      FALSE
## lagep         FALSE      FALSE
## dy            FALSE      FALSE
## de            FALSE      FALSE
## lsvar         FALSE      FALSE
## ltbill        FALSE      FALSE
## lbkmk         FALSE      FALSE
## lntis         FALSE      FALSE
## leqis         FALSE      FALSE
## lik           FALSE      FALSE
## l_ltrate      FALSE      FALSE
## linfl         FALSE      FALSE
## l_ltyld10     FALSE      FALSE
## 1 subsets of each size up to 15
## Selection Algorithm: 'sequential replacement'
##           TMS DFY DFR lagdp lagep dy  de  lsvar ltbill lbkmk lntis leqis lik
## 1  ( 1 )  " " " " " " " "   " "   " " " " " "   " "    " "   " "   " "   "*"
## 2  ( 1 )  " " " " " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 3  ( 1 )  " " " " " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 4  ( 1 )  " " "*" " " "*"   " "   " " " " " "   " "    " "   " "   "*"   " "
## 5  ( 1 )  " " "*" " " " "   "*"   " " " " " "   " "    " "   "*"   "*"   " "
## 6  ( 1 )  "*" "*" "*" "*"   "*"   "*" " " " "   " "    " "   " "   " "   " "
## 7  ( 1 )  " " "*" " " "*"   "*"   "*" " " " "   " "    " "   "*"   "*"   " "
## 8  ( 1 )  " " "*" " " "*"   "*"   "*" " " " "   " "    "*"   "*"   "*"   " "
## 9  ( 1 )  " " "*" "*" "*"   "*"   "*" " " " "   " "    "*"   "*"   "*"   " "
## 10  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" " "   " "    "*"   "*"   "*"   " "
## 11  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" " "   " "    "*"   "*"   "*"   " "
## 12  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   " "    "*"   "*"   "*"   " "
## 13  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   " "    "*"   "*"   "*"   " "
## 14  ( 1 ) " " "*" "*" "*"   "*"   "*" "*" "*"   "*"    "*"   "*"   "*"   " "
## 15  ( 1 ) "*" "*" "*" "*"   "*"   "*" "*" "*"   "*"    "*"   "*"   "*"   "*"
##           l_ltyld10 l_ltrate linfl
## 1  ( 1 )  " "       " "      " "  
## 2  ( 1 )  " "       " "      " "  
## 3  ( 1 )  " "       " "      "*"  
## 4  ( 1 )  " "       " "      "*"  
## 5  ( 1 )  " "       " "      "*"  
## 6  ( 1 )  " "       " "      " "  
## 7  ( 1 )  " "       " "      "*"  
## 8  ( 1 )  " "       " "      "*"  
## 9  ( 1 )  " "       " "      "*"  
## 10  ( 1 ) " "       " "      "*"  
## 11  ( 1 ) "*"       " "      "*"  
## 12  ( 1 ) "*"       " "      "*"  
## 13  ( 1 ) "*"       "*"      "*"  
## 14  ( 1 ) "*"       "*"      "*"  
## 15  ( 1 ) " "       "*"      "*"

best.fits

is.xyresid <- residuals(lm( logeqp ~ DFY+lagdp+leqis+linfl, data=ds ) )
is.meanresid <- ds$logeqp - mean(ds$logeqp)

isreg <- summary( lm( logeqp ~ DFY+lagdp+leqis+linfl, data=ds ) )
print(isreg)

## 
## Call:
## lm(formula = logeqp ~ DFY + lagdp + leqis + linfl, data = ds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5000 -0.1101  0.0286  0.1205  0.3365 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.023396   0.059714  17.138  < 2e-16 ***
## DFY         -0.026642   0.034984  -0.762  0.44855    
## lagdp        3.864531   1.365873   2.829  0.00588 ** 
## leqis       -0.004815   0.002548  -1.890  0.06239 .  
## linfl       -0.004899   0.006103  -0.803  0.42450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1714 on 81 degrees of freedom
## Multiple R-squared:  0.1272, Adjusted R-squared:  0.08409 
## F-statistic: 2.951 on 4 and 81 DF,  p-value: 0.0249

tstat <- round(coef(isreg)[2,3],2)
rsq <- round(isreg$adj.r.squared, 3)*100
lyr <- tail(ds$yyyy,1)

firstyear <- 20
oos.xyresid <- rep(NA, nrow(ds))
oos.meanresid <- rep(NA, nrow(ds))
oos.sterr <- rep(NA, nrow(ds))
for (i in firstyear:nrow(ds)) {
    oos.meanpred <- mean( ds$logeqp[1:i] )
    oos.meanresid[i+1] <- ds$logeqp[i+1]-oos.meanpred

    oos.lmcoef <- coef(lm( logeqp ~ lagdp+leqis+linfl+DFY, data=ds[1:i,]))
    oosreg <- summary( lm( logeqp ~ lagdp+leqis+linfl+DFY, data=ds[1:i,]) )
    oos.sterr[i+1] <- oosreg$sigma
    oos.pred <- oos.lmcoef[1]+oos.lmcoef[2]*ds$lagdp[i+1]+oos.lmcoef[3]*ds$leqis[i+1]+oos.lmcoef[4]*ds$linfl[i+1]+oos.lmcoef[5]*ds$DFY[i+1]
    oos.xyresid[i+1] <- ds$logeqp[i+1]-oos.pred
}
RMSt = DM.test(oos.meanresid[21:(length(oos.meanresid)-1)],oos.xyresid[21:(length(oos.xyresid)-1)],ds$logeqp[21:length(ds$logeqp)] ,h=1 )
resid_bar = mean(oos.xyresid[21:(length(oos.xyresid)-1)])
se_oos = se_oos = oos.sterr[21:(length(oos.sterr)-1)]
confidenceterm = RMSt$statistic*(se_oos/sqrt(length(oos.xyresid[21:(length(oos.xyresid)-1)])))

plotwork <- data.frame(yyyy=ds$yyyy, is.meanresid=is.meanresid, is.xyresid=is.xyresid,
                   oos.meanresid=oos.meanresid[1:nrow(ds)], oos.xyresid=oos.xyresid[1:nrow(ds)] )
rm(ds)  ## done

plotwork[firstyear,] <- c( plotwork$yyyy[firstyear-1]+1, 0, 0, 0, 0 )

plotwork <- plotwork[complete.cases(plotwork),]
#remove the above or comment it to show the full IS graph
rownames(plotwork) <- NULL

plotwork <- within(plotwork, {
    is.improvement <- cumsum( plotwork$is.meanresid**2 ) - cumsum( is.xyresid**2 )
    oos.improvement <- cumsum( oos.meanresid**2 ) - cumsum( oos.xyresid**2 )
})

annotateloc <- 35
with(plotwork, {
    plot( yyyy, is.improvement, ylim=c(-0.5,0.5), type="n", xlab="Year", ylab="Cumulative SSE Difference" )
    rect(1973, -2, 1975, 1, col="red"); text( 1977, -0.15, "Oil Shock", cex=0.75, srt=90, col="red")
    lines( yyyy, oos.improvement, lwd=8, col="white" )
    lines( yyyy, oos.improvement, lwd=3, col="blue" )
    lines( yyyy, is.improvement, lwd=5, col="white" )
    lines( yyyy, is.improvement, lty=1 )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]+confidenceterm, lty = 3, lwd=2, col="orange" )
    lines( yyyy[1:length(yyyy)-1], oos.improvement[1:length(oos.improvement)-1]-confidenceterm, lty = 3, lwd=2, col="orange" )
    text( yyyy[annotateloc], oos.improvement[annotateloc]-0.01, pos=1, "OOS", col="blue" )
    text( yyyy[annotateloc], is.improvement[annotateloc]+0.01, pos=3, "IS", col="black" )

    lines( c(yyyy[ which(plotwork$yyyy == 1965)], 2100), rep(oos.improvement[ which(plotwork$yyyy == 1965)],2), lty=2 )

})

hline(0, lty=2)

text( 1955, 0.16, paste0("Model Selection\nMS"), cex=0.85, pos=4)

text( 1980, -0.15, paste0("\nIS T-stat: ", tstat, "\nIS Adj R^2: ", rsq, "%"),  cex=0.85, pos=4 )