Emily Gill - CVEN6833 - Homework #3

December 16, 2013

Question 8: SSA & Prediction

rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("ssab.txt")
source("bestARIMA.r")
library(stats)

Read in the monthly Lees Ferry streamflow and remove the seasonal cycle (remove the mean and divide by the standard deviation) :: NONSEASONAL

LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")

years <- LFmon[, 1]
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))  # convert to Million Acre Feet (MaF)
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", 
    "Oct", "Nov", "Dec")
LFann <- apply(LFmon, 1, sum)  # creates annual sums (across row)
LFmean <- apply(LFmon, 2, mean)  # monthly mean (across col)
LFsd <- apply(LFmon, 2, sd)  # monthly sd (across col)

X <- t((t(LFmon) - LFmean)/LFsd)  # same as scale(X)
years <- 1906:2006

Test out various lag window (M) values to find the noise floor by visual inspection.

Ms <- c(20, 25, 30)  # window for SSA
par(mfrow = c(3, 2))
for (i in 1:length(Ms)) {
    M <- Ms[i]
    SSAout <- ssab(LFann, M)
    plot(SSAout$lambdas, type = "l", xlab = "Modes/PCSs", ylab = "Fraction of Variance Explained", 
        main = "EVS")
    points(1:M, SSAout$lambdas[1:M], col = "red")
    plot(cumsum(SSAout$lambdas), type = "l")  # shows to use the first 4
    points(cumsum(SSAout$lambdas))
    title(main = "Cumulative Variance Explained")
}

plot of chunk unnamed-chunk-3

M <- 20  # choosing an M of 20
SSAout <- ssab(LFann, M)

The first ten reconstructed components (RPCs) explain almost 70% of the variance, so we will keep those. Therefore, K=10. We can plot these leading RPCs:

K <- 10
par(mfrow = c(2, 5))
for (i in 1:K) {
    plot(SSAout$Rpc[, i], type = "l", main = paste("RC", i, sep = " "))
}

plot of chunk unnamed-chunk-4

This plot shows the 10 reconstructed components (which is a result of choosing K=10). We can see from this that various frequencies of oscillation are captured as well as some trends.


# plot the reconstructed time series based on the K and M choices
par(mfrow = c(1, 1))
recon <- apply(SSAout$Rpc[, 1:K], 1, sum)
plot(years, LFann, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow", 
    ylim = range(LFann, recon))
lines(years, recon, col = "red", lwd = 2)

plot of chunk unnamed-chunk-5

This plot shows the affect of the window size chosen. The reconstructed time series follows the historical pretty well with a window size of 20 and a K size of 10. You can see the affect of changing these numbers by looking at the resulting plot of this.

# show that the sum of all the reconstructed components is equal to
# historical
xrecover <- apply(SSAout$Rpc, 1, sum)
plot(LFann, type = "l", main = "sum of reconstructed components")
lines(xrecover, col = "red")

plot of chunk unnamed-chunk-6

This plot shows that when we sum the reconstructed components, it sums to the historical data, which is good.

Now we can use this to predict. Fit an AR model to the leading modes and use them to predict 1 year ahead.

LFsub <- as.matrix(LFann[1:83])
M <- 20
K <- 5
forecast <- matrix(0, nrow = 1, ncol = M)

for (j in 1:18) {
    ssaout <- ssab(LFsub, M)
    RC <- ssaout$Rpc

    for (i in 1:K) {
        zz <- ar(RC[, i])
        pred <- predict(zz)
        forecast[i] <- pred$pred[1]
    }

    for (g in i + 1:M - K) {
        forecast[g] <- rnorm(1, mean = mean(ssaout$Rpc[, g]), sd = sd(ssaout$Rpc[, 
            g]))
    }

    # create new matrix including last prediction to do ssa, predict next year
    LFsub <- rbind(LFsub, sum(forecast))
}

plot(LFsub, type = "l", col = "red", main = "forecast year by year using SSA")
lines(LFann)
axis(1, years)

plot of chunk unnamed-chunk-7

This plot provides the forecast for post-1998. The forecast follows the historical values relatively well. We compute R-squared and RMSE values for the predicted versus observed…

Compute the R2 and RMSE for the predicted points

RMSE <- sum((LFann[81:101] - LFsub[81:101])^2)/21
RMSE
## [1] 63.39
R2 <- cor(LFann[81:101], LFsub[81:101])^2
R2
## [1] 0.007235