# HW 3 Q 8 JLCY, 19 Dec 2013
############################################################
# clear variables
rm(list = ls())
Qnumber = 12
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\loaddata.r")
## Package `sm', version 2.2-5: type help(sm) for summary information
# source('http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/ssa-b.txt')
source("C:\\Users\\jfklee90\\Desktop\\CVEN 6833\\HW 3\\ssa-b.r")
#### This code does the SSA and produces the RCs, plots the fraction Eigen
#### values (similar to the Eigen spectrum of PCa) the first part of problem 7
## For forecasting, fit AR models to the RCs Suppose RC is the reconstructed
## component through time t and you want a one step ahead forecast rc.ar =
## ar(RC) xp = predict(rc.ar,n.ahead=1) xp$pred gives the prediction xp$se
## gives the prediction standard error
# Repeat this for all the RCs. As before, fit a Normal distribution to the
# noise RCs
## The mean forecast is the sum of all the component forecasts The standard
## error of this is = sqrt(serc1^2 + serc2^2 + ... + serck^2) Assuming normal
## distribution you can compute the threshold probabilities for RPSS
############################################################
# select best M
par(ask = TRUE)
### SSA on Annual total volume
M = 20 # lag window
ssaout = ssab(Annual, M)
# ssaout$lambdas = Fraction Eigen Value
# Eigen spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "M = 20")
points(1:20, ssaout$lambdas[1:20], col = "red")
############################################################
M = 25 # lag window
ssaout = ssab(Annual, M)
# Eigen spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "M = 25")
points(1:20, ssaout$lambdas[1:20], col = "red")
############################################################
M = 30 # lag window
ssaout = ssab(Annual, M)
# Eigen Spectrum
plot(1:20, ssaout$lambdas[1:20], type = "l", xlab = "Modes", ylab = "Frac. Var. explained",
main = "M = 30")
points(1:20, ssaout$lambdas[1:20], col = "red")
############################################################
# M = 20: first 5 modes explain > 50% of variance, better than others choose
# M = 20
M = 20 # lag window
ssaout = ssab(Annual, M)
# ssaout$Rpc = reconstructed components RCs
# Temporal plot of leading RCs
plot(1906:2006, ssaout$Rpc[, 1], type = "l", main = "RC1 Temporal Mode of Variability")
plot(1906:2006, ssaout$Rpc[, 2], type = "l", main = "RC2 Temporal Mode of Variability")
plot(1906:2006, ssaout$Rpc[, 3], type = "l", main = "RC3 Temporal Mode of Variability")
plot(1906:2006, ssaout$Rpc[, 4], type = "l", main = "RC4 Temporal Mode of Variability")
plot(1906:2006, ssaout$Rpc[, 5], type = "l", main = "RC5 Temporal Mode of Variability")
############################################################
## Add the first K reconstructed components
K = 5
Recon = apply(ssaout$Rpc[, 1:K], 1, sum)
# Plot sum of leading RCs & historical time series
plot(year, Annual, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow",
main = "First 5 RCs (Red) & Obs time series (Black)", ylim = range(Annual,
Recon))
lines(year, Recon, col = "red", lwd = 2)
############################################################
## as a check if you sum *all* the reconstructed components you should get
## the original data back
xrecover = apply(ssaout$Rpc, 1, sum)
plot(year, Annual, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow",
main = "All RCs (Red) & Obs time series (Black)", ylim = range(Annual, Recon))
lines(year, xrecover, col = "red", lwd = 2)
## this equals to Annual
############################################################
library(stats)
# predicting years ahead
fyr = 1
# SSA from 1906 to t year & predict t+1 year
SSA_AR_fn = function(t_year) {
# data
input = Annual[1:t_year]
# SSA
ssaout_t = ssab(input, M)
sum_RC = 0
for (i in 1:K) {
# K = 5
arbest = ar(ssaout_t$Rpc[i, ])
p = order(arbest$aic)[1]
zz = arima(ssaout_t$Rpc[i, ], order = c(p, 0, 0), include.mean = TRUE,
method = "ML")
rc.ar = zz
# predict
xp = predict(zz, n.ahead = fyr, se.fit = T)
sum_RC = sum_RC + xp$pred
# back standardize xp_pred = xp$pred * sd(input) + mean(input)
}
result = list(pred = sum_RC)
return(result)
}
############################################################
# SSA from 1906 to 1998 (93th value) --> predcit 1998 so on till 2006
forecast = c(1:9)
j = 1
for (i in 1:9) {
pred_tyear = SSA_AR_fn(t_year = (i + 92))
forecast[i] = pred_tyear$pred
}
############################################################
# plot against observed value & calculate statistics
library(graphics)
plot(Annual[93:101], forecast, xlab = "Obs flow", main = "Obs & Forecast flow 1998 to 2006")
# plot with 1:1 line
plot(Annual[93:101], forecast, xlab = "Obs flow", ylim = range(0, forecast),
xlim = range(0, Annual[93:101]), main = "Obs & Forecast flow 1998 to 2006")
abline(0, 1)
R2 = (cor(Annual[93:101], forecast))^2
RMSE = sqrt((sum((forecast - Annual[93:101])^2))/9)
cat("R^2 for predicting 1998-2006 = ", R2, "\n", "\n")
## R^2 for predicting 1998-2006 = 0.09789
##
cat("RMSE for predicting 1998-2006 = ", RMSE, "\n", "\n")
## RMSE for predicting 1998-2006 = 9.031
##
############################################################
# SSA from 1906 to 2006 (101th value) --> predcit 2013
# 2006 to 2013, 8 years ahead
fyr = 8
pred_tyear_2013 = SSA_AR_fn(t_year = 101)
plot(2006:2013, pred_tyear_2013$pred, ylab = "forecast flow", main = "Predict 2006 to 2013 using AR(1906-2006)")
############################################################