Lets get the data and define the local level model
# annual flow of the Nile from 1871-1970
data(Nile)
plot(Nile)
T<-length(Nile)
# Make the dummy variable that is 1 post 1889 when the Aswan dam was built.
year<-seq(1871,1970)
dummy<-ifelse(year>1898,1,0)
y<-datasets::Nile
y<-cbind(y,dummy)
Then we run the same analysis as the regular Nile dataset.
# define the state-space local level model
y.LLM <- SSModel(y ~ SSMtrend(1, Q=list(NA))+dummy, data=y, H=NA )
y.LLM
# check state variable
y.LLM$a
# check system matrices
y.LLM$Z
y.LLM$T
y.LLM$R
y.LLM$H
y.LLM$Q
# maximum likelihood estimation of paramaters of Q and H (i.e. variance of the two inovations)
pars <- log( rep(var(Nile, na.rm=TRUE), 2) )
y.fit <- fitSSM( model=y.LLM, inits=pars, method='BFGS' )
y.fit
# run Kalman Filter and Smoother with estimated parameters
y.out <- KFS(y.fit$model)
# construct 90% confidence intervals for filtered state - shorter way, using predict function with missing n.ahead option
y.KF.CI <- predict(y.fit$model, interval="confidence", level=0.9, filtered=TRUE)
# construct 90% confidence intervals for smoothed state
y.KS.CI <- predict(y.fit$model, interval="confidence", level=0.9)
# replace the filtered state for first period by NA
y.KF.CI[1,] <- NA
par(mfrow=c(2,2), cex=0.6)
# plot filtered state
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend = c("data","filtered state","90% confidence interval"),
col = c("black","blue","blue"), lty = c(1,1,2), lwd = c(1,1,1),bty="n", cex=0.9 )
plot( ts( c(y.out$v[-1]), start=1871), col="blue", lwd=2, xlab="", ylab="", main="forecast error")
abline(h=0, lty=3)
plot( ts( c(y.out$P)[-1], start=1871), col="blue", lwd=2, xlab="", ylab="", main="variance of state")
plot( ts( c(y.out$F)[-1], start=1871), col="blue", lwd=2, xlab="", ylab="", main="variance of forecast error")
par(mfrow=c(1,2), mar=c(3,3,2,1), cex=0.8)
# filtered
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","filtered","90% confidence interval"), col=c(1,4,4), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
# smoothed
plot.ts( cbind(y, y.KS.CI, Nile), plot.type="single",
col=c(1,2,2,2,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","smoothed","90% confidence interval"), col=c(1,2,2), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
par(mfrow=c(1,2), mar=c(3,3,2,1), cex=0.8)
# filtered
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","filtered","90% confidence interval"), col=c(1,4,4), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
# smoothed
plot.ts( cbind(y, y.KS.CI, Nile), plot.type="single",
col=c(1,2,2,2,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","smoothed","90% confidence interval"), col=c(1,2,2), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
When we run the normal Nile dataset, it looks similar to the one with the dam dummy, except it is scaled differently. Something went wrong, but I am not sure what it is. It may be the type of matrix I made when I added a dummy column. Either way, in the dummy chart, you can see that there is a thin black line that represents the dummy variable, while the latter charts do not have that.
# annual flow of the Nile from 1871-1970
data(Nile)
plot(Nile)
T<-length(Nile)
# Make the dummy variable that is 1 post 1889 when the Aswan dam was built.
year<-seq(1871,1970)
dummy<-ifelse(year>1889,1,0)
y<-datasets::Nile
y<-cbind(y,dummy)
y[21:50] <- NA
y[71:80] <- NA
Then we run the same analysis as the regular Nile dataset.
# define the state-space local level model
y.LLM <- SSModel(y ~ SSMtrend(1, Q=list(NA))+dummy, data=y, H=NA )
y.LLM
# check state variable
y.LLM$a
# check system matrices
y.LLM$Z
y.LLM$T
y.LLM$R
y.LLM$H
y.LLM$Q
# maximum likelihood estimation of paramaters of Q and H (i.e. variance of the two inovations)
pars <- log( rep(var(Nile, na.rm=TRUE), 2) )
y.fit <- fitSSM( model=y.LLM, inits=pars, method='BFGS' )
y.fit
# run Kalman Filter and Smoother with estimated parameters
y.out <- KFS(y.fit$model)
# construct 90% confidence intervals for filtered state - shorter way, using predict function with missing n.ahead option
y.KF.CI <- predict(y.fit$model, interval="confidence", level=0.9, filtered=TRUE)
# construct 90% confidence intervals for smoothed state
y.KS.CI <- predict(y.fit$model, interval="confidence", level=0.9)
# replace the filtered state for first period by NA
y.KF.CI[1,] <- NA
par(mfrow=c(2,2), cex=0.6)
# plot filtered state
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend = c("data","filtered state","90% confidence interval"),
col = c("black","blue","blue"), lty = c(1,1,2), lwd = c(1,1,1),bty="n", cex=0.9 )
plot( ts( c(y.out$v[-1]), start=1871), col="blue", lwd=2, xlab="", ylab="", main="forecast error")
abline(h=0, lty=3)
plot( ts( c(y.out$P)[-1], start=1871), col="blue", lwd=2, xlab="", ylab="", main="variance of state")
plot( ts( c(y.out$F)[-1], start=1871), col="blue", lwd=2, xlab="", ylab="", main="variance of forecast error")
par(mfrow=c(1,2), mar=c(3,3,2,1), cex=0.8)
# filtered
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","filtered","90% confidence interval"), col=c(1,4,4), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
# smoothed
plot.ts( cbind(y, y.KS.CI, Nile), plot.type="single",
col=c(1,2,2,2,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","smoothed","90% confidence interval"), col=c(1,2,2), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
par(mfrow=c(1,2), mar=c(3,3,2,1), cex=0.8)
# filtered
plot.ts( cbind(y, y.KF.CI, Nile), plot.type="single",
col=c(1,4,4,4,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","filtered","90% confidence interval"), col=c(1,4,4), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
# smoothed
plot.ts( cbind(y, y.KS.CI, Nile), plot.type="single",
col=c(1,2,2,2,1), lwd=c(2,2,2,2,1), lty=c(1,1,2,2,3), xlab="", ylab="", main="")
abline(v=1898)
legend("topright", legend=c("data","smoothed","90% confidence interval"), col=c(1,2,2), lty=c(1,1,2), lwd=2, bty="n", cex=0.9 )
When the missing values are added in, we can see that the filtered data goes flat and the dummy variable line is very volatile. We can also see that the confidence interval is very smoothed out for both, and the one with the dummy variable has a weird patter in its 90% percentile confidence interval.
While the dummies screwed up the class variable of my dataset, we can see that filtered and smoothed series are smoother when there is missing data vs. when there is no missing data.