# HW 3 Q 8 JLCY, 19 Dec 2013

############################################################ 

# clear variables
rm(list = ls())

Qnumber = 12

source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\loaddata.r")
## Package `sm', version 2.2-5: type help(sm) for summary information

# source('http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/ssa-b.txt')
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\ssa-b.r")

#### This code does the SSA and produces the RCs, plots the fraction Eigen
#### values (similar to the Eigen spectrum of PCa) the first part of problem 7

## For forecasting, fit AR models to the RCs Suppose RC is the reconstructed
## component through time t and you want a one step ahead forecast rc.ar =
## ar(RC) xp = predict(rc.ar,n.ahead=1) xp$pred gives the prediction xp$se
## gives the prediction standard error

# Repeat this for all the RCs. As before, fit a Normal distribution to the
# noise RCs

## The mean forecast is the sum of all the component forecasts The standard
## error of this is = sqrt(serc1^2 + serc2^2 + ... + serck^2) Assuming normal
## distribution you can compute the threshold probabilities for RPSS

############################################################ 

# select best M

par(ask = TRUE)

### SSA on Annual total volume
M = 20  # lag window
ssaout = ssab(Annual, M)

# ssaout$lambdas = Fraction Eigen Value

# Eigen spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained", 
    main = "M = 20")
points(1:20, ssaout$lambdas[1:20], col = "red")

plot of chunk unnamed-chunk-1


############################################################ 

M = 25  # lag window
ssaout = ssab(Annual, M)

# Eigen spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained", 
    main = "M = 25")
points(1:20, ssaout$lambdas[1:20], col = "red")

plot of chunk unnamed-chunk-1


############################################################ 

M = 30  # lag window
ssaout = ssab(Annual, M)

# Eigen Spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained", 
    main = "M = 30")
points(1:20, ssaout$lambdas[1:20], col = "red")

plot of chunk unnamed-chunk-1


############################################################ 

# M = 20: first 5 modes explain > 50% of variance, better than others choose
# M = 20

M = 20  # lag window
ssaout = ssab(Annual, M)

# ssaout$Rpc = reconstructed components RCs

# Temporal plot of leading RCs
plot(1906:2006, ssaout$Rpc[, 1], type = "l", main = "RC1 Temporal Mode of Variability")

plot of chunk unnamed-chunk-1

plot(1906:2006, ssaout$Rpc[, 2], type = "l", main = "RC2 Temporal Mode of Variability")

plot of chunk unnamed-chunk-1

plot(1906:2006, ssaout$Rpc[, 3], type = "l", main = "RC3 Temporal Mode of Variability")

plot of chunk unnamed-chunk-1

plot(1906:2006, ssaout$Rpc[, 4], type = "l", main = "RC4 Temporal Mode of Variability")

plot of chunk unnamed-chunk-1

plot(1906:2006, ssaout$Rpc[, 5], type = "l", main = "RC5 Temporal Mode of Variability")

plot of chunk unnamed-chunk-1


############################################################ 

## Add the first K reconstructed components
K = 5

Recon = apply(ssaout$Rpc[, 1:K], 1, sum)

# Plot sum of leading RCs & historical time series

plot(year, Annual, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow", 
    main = "First 5 RCs (Red) & Obs time series (Black)", ylim = range(Annual, 
        Recon))
lines(year, Recon, col = "red", lwd = 2)

plot of chunk unnamed-chunk-1


############################################################ 

## as a check if you sum *all* the reconstructed components you should get
## the original data back

xrecover = apply(ssaout$Rpc, 1, sum)

plot(year, Annual, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow", 
    main = "All RCs (Red) & Obs time series (Black)", ylim = range(Annual, Recon))
lines(year, xrecover, col = "red", lwd = 2)

plot of chunk unnamed-chunk-1


## this equals to Annual

############################################################ 

library(stats)

# predicting years ahead
fyr = 1

# SSA from 1906 to t year & predict t+1 year

SSA_AR_fn = function(t_year) {

    # data
    input = Annual[1:t_year]

    # SSA
    ssaout_t = ssab(input, M)

    sum_RC = 0

    for (i in 1:K) {
        # K = 5
        arbest = ar(ssaout_t$Rpc[i, ])
        p = order(arbest$aic)[1]
        zz = arima(ssaout_t$Rpc[i, ], order = c(p, 0, 0), include.mean = TRUE, 
            method = "ML")

        rc.ar = zz

        # predict
        xp = predict(zz, n.ahead = fyr, se.fit = T)

        sum_RC = sum_RC + xp$pred

        # back standardize xp_pred = xp$pred * sd(input) + mean(input)
    }

    result = list(pred = sum_RC)

    return(result)
}

############################################################ 

# SSA from 1906 to 1998 (93th value) --> predcit 1998 so on till 2006

forecast = c(1:9)
j = 1

for (i in 1:9) {
    pred_tyear = SSA_AR_fn(t_year = (i + 92))
    forecast[i] = pred_tyear$pred
}

############################################################ 

# plot against observed value & calculate statistics

library(graphics)

plot(Annual[93:101], forecast, xlab = "Obs flow", main = "Obs & Forecast flow 1998 to 2006")

plot of chunk unnamed-chunk-1


# plot with 1:1 line
plot(Annual[93:101], forecast, xlab = "Obs flow", ylim = range(0, forecast), 
    xlim = range(0, Annual[93:101]), main = "Obs & Forecast flow 1998 to 2006")
abline(0, 1)

plot of chunk unnamed-chunk-1


R2 = (cor(Annual[93:101], forecast))^2
RMSE = sqrt((sum((forecast - Annual[93:101])^2))/9)

cat("R^2 for predicting 1998-2006 = ", R2, "\n", "\n")
## R^2 for predicting 1998-2006 =  0.09789 
## 
cat("RMSE for predicting 1998-2006 = ", RMSE, "\n", "\n")
## RMSE for predicting 1998-2006 =  9.031 
## 

############################################################ 

# SSA from 1906 to 2006 (101th value) --> predcit 2013

# 2006 to 2013, 8 years ahead
fyr = 8

pred_tyear_2013 = SSA_AR_fn(t_year = 101)

plot(2006:2013, pred_tyear_2013$pred, ylab = "forecast flow", main = "Predict 2006 to 2013 using AR(1906-2006)")

plot of chunk unnamed-chunk-1


############################################################