# function to compute skew and interquartile range together
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew-iqr.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew.r")
# functions to produce boxplots with whiskers at 5th and 95th percentile
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot-stats.r")
library(sm)
library(MASS)
CART.pred = function(X, Y) # X is data outside fo specific epoch. Must be a dataframe
{
LF.pred = tree(X[, 2] ~ X[, 3] + X[, 4] + X[, 5], data = X)
X = as.data.frame(Y)
LFpred = predict(LF.pred, newdata = X)
plot(Y[, 2], LFpred)
lines(Y[, 2], Y[, 2], col = "red")
points(mean(LFpred), mean(LFpred), type = "p", bg = "green", pch = 21, col = "black")
rmse = sqrt(mean((Y[, 2] - LFpred)^2))
SST = sum((Y[, 2] - mean(Y[, 2]))^2)
SSR = sum((Y[, 2] - LFpred)^2)
rsq = 1 - (SSR/SST)
myreturn = list(rmse = rmse, rsq = rsq)
}
fun = function(p, q, X) {
modelaic = matrix(0, nrow = length(p), ncol = length(q))
for (i in p[1]:p[length(p)]) {
for (j in q[1]:q[length(q)]) {
zz = arima0(X, order = c(i, 0, j))
modelaic[i, j] = zz$aic #this gives the AIC
colnames(modelaic) = paste("q=", q[1]:q[length(q)], sep = "")
}
rownames(modelaic) = paste("p=", p[1]:p[length(p)], sep = "")
}
inds = which(modelaic == min(modelaic), arr.ind = TRUE)
myresult = list(lowest.aic = modelaic[order(modelaic)[1]], p = inds[1],
q = inds[2])
}
### Data: Lee's Ferry: Used Question 1, Question 2, Question 3, Question
### 4, Question 5
LFannual = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/LF_1906-2005.txt",
F)
LF.annual = LFannual[, 2]/1e+06
year = LFannual[, 1]
AMO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/AMO_1856-2011.txt",
F)
AMO = AMO[51:150, 2] #get 1906- 2005
PDO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/PDO_1900-2011.txt",
F)
PDO = PDO[7:106, 2]
ENSO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/nino3_1905-2007.txt",
F)
ENSO = ENSO[2:101, 2]
library(tree)
LFdata = cbind(year, LF.annual, ENSO, PDO, AMO)
names(LFdata) <- c("year", "LF.annual", "ENSO", "PDO", "AMO")
LFdata = as.data.frame(LFdata)
# Fit regression tree
ztree <- tree(LFdata[, 2] ~ LFdata[, 3] + LFdata[, 4] + LFdata[, 5], LFdata)
plot(ztree)
text(ztree, cex = 0.75)
par(mfrow = c(3, 1))
# first epoch: 1906-1921
LFdata.1 = as.data.frame(LFdata[1:16, ])
LFdata.not1 = as.data.frame(LFdata[17:100, ])
epoch.1 = CART.pred(LFdata.not1, LFdata.1)
# second epoch: 1975-1990
LFdata.2 = LFdata[70:85, ]
LFdata.not2 = rbind(LFdata[1:69, ], LFdata[86:100, ])
epoch.2 = CART.pred(LFdata.not2, LFdata.2)
# third epoch: 1990-2005
LFdata.3 = LFdata[1:84, ]
LFdata.not3 = LFdata[85:100, ]
epoch.3 = CART.pred(LFdata.not3, LFdata.3)
performance.table = matrix(c(epoch.1$rmse, epoch.1$rsq, epoch.2$rmse, epoch.2$rsq,
epoch.3$rmse, epoch.3$rsq), ncol = 2, byrow = T)
rownames(performance.table) = c("epoch1", "epoch2", "epoch3")
colnames(performance.table) = c("rmse", "rsq")
as.table(performance.table)
## rmse rsq
## epoch1 4.7955 -0.6207
## epoch2 5.4047 0.1131
## epoch3 5.3122 -0.4772
monthdata = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/Leesferry-mon-data.txt"),
ncol = 13, byrow = T)
WYmonflow = monthdata[, 2:13]
WYmonflow = WYmonflow/10^6 # convert to MAF
yrs = monthdata[, 1]
nyrs = length(yrs)
nyrs1 = nyrs - 1
### Scale the data..
fmean = apply(WYmonflow, 2, mean) #monthly mean
fsdev = apply(WYmonflow, 2, sd) #monthly standard deviation
# get the data into one long array...
X = t((t(WYmonflow) - fmean)/fsdev)
X = array(t(X)) #standardized anomalies..
## Fit various ARMA models
bestmodel = fun(1:3, 1:3, X)
p = bestmodel$p
q = bestmodel$q
pq = matrix(c(p, q), ncol = 2, byrow = T)
colnames(pq) = c("p", "q")
rownames(pq) = "[1]"
as.table(pq)
## p q
## [1] 3 1
zz = arima0(X, order = c(p, 0, q))
################### Simulate
nsim = 250
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/arma.sim.txt")
fu = arma.sim(nsim, "mean", zz, X, nyrs, p, q, 12)
LF.sim.mean = fu$LF.mean
fu = arma.sim(nsim, "sd", zz, X, nyrs, p, q, 12)
LF.sim.sd = fu$LF.sd
fu = arma.sim(nsim, "skew", zz, X, nyrs, p, q, 12)
LF.sim.skew = fu$LF.skew
fu = arma.sim(nsim, "cor", zz, X, nyrs, p, q, 12)
LF.sim.cor = fu$LF.cor
fu = arma.sim(nsim, "max", zz, X, nyrs, p, q, 12)
LF.sim.max = fu$LF.max
fu = arma.sim(nsim, "min", zz, X, nyrs, p, q, 12)
LF.sim.min = fu$LF.min
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/arma.sim.month.txt")
May = WYmonflow[, 5]
fu = arma.sim.month(nsim, May, zz, X, p, q, nyrs)
May.sim = fu$Monthpdf
xeval = fu$xeval
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/obs.stats.txt")
fu = obs.stats("mean", WYmonflow, nyrs)
Obs.mean = fu$obs.mean
fu = obs.stats("cor", WYmonflow, nyrs)
Obs.cor = fu$obs.cor
fu = obs.stats("sd", WYmonflow, nyrs)
Obs.sd = fu$obs.sd
fu = obs.stats("skew", WYmonflow, nyrs)
Obs.skew = fu$obs.skew
fu = obs.stats("max", WYmonflow, nyrs)
Obs.max = fu$obs.max
fu = obs.stats("min", WYmonflow, nyrs)
Obs.min = fu$obs.min
########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max,
LF.sim.min)
o = list(1:250, 251:500, 501:750, 751:1000, 1001:1250, 1251:1500)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")
par(mfrow = c(3, 2))
for (i in 1:length(o)) {
xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ]) #the first row is the means of the original data
xmeans1 = sim.list[o[i][[1]], ]
zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
axis(1, at = 1:12, labels = months)
points(z1, xmeans[1, ], pch = 16, col = "red")
lines(z1, xmeans[1, ], pch = 16, col = "red")
title(main = titles[i])
}
# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF",
cex = 1.25)
evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
z2[index[i]] = z1[evaluate[i]]
n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
nsim = 250
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/seasonalAR1.sim.txt")
fu = seasonalAR1.sim(nsim, "mean", WYmonflow, nyrs)
LF.sim.mean = fu$LF.mean
fu = seasonalAR1.sim(nsim, "sd", WYmonflow, nyrs)
LF.sim.sd = fu$LF.sd
fu = seasonalAR1.sim(nsim, "skew", WYmonflow, nyrs)
LF.sim.skew = fu$LF.skew
fu = seasonalAR1.sim(nsim, "cor", WYmonflow, nyrs)
LF.sim.cor = fu$LF.cor
fu = seasonalAR1.sim(nsim, "max", WYmonflow, nyrs)
LF.sim.max = fu$LF.max
fu = seasonalAR1.sim(nsim, "min", WYmonflow, nyrs)
LF.sim.min = fu$LF.min
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/seasonalAR1.sim.month.txt")
May = WYmonflow[, 5]
fu = seasonalAR1.sim.month(nsim, May, WYmonflow, nyrs)
May.sim = fu$Monthpdf
xeval = fu$xeval
########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max,
LF.sim.min)
o = list(1:250, 251:500, 501:750, 751:1000, 1001:1250, 1251:1500)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")
par(mfrow = c(3, 2))
for (i in 1:length(o)) {
xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ]) #the first row is the means of the original data
xmeans1 = sim.list[o[i][[1]], ]
zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
axis(1, at = 1:12, labels = months)
points(z1, xmeans[1, ], pch = 16, col = "red")
lines(z1, xmeans[1, ], pch = 16, col = "red")
title(main = titles[i])
}
# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF",
cex = 1.25)
evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
z2[index[i]] = z1[evaluate[i]]
n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
# you have to string it to create a long vector
x = array(t(WYmonflow))
N = length(x)
nsim = 100
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/knn1.sim.txt")
fu = knn1.sim(nsim, "mean", WYmonflow, x, nyrs, N)
LF.sim.mean = fu$LF.mean
fu = knn1.sim(nsim, "sd", WYmonflow, x, nyrs, N)
LF.sim.sd = fu$LF.sd
fu = knn1.sim(nsim, "skew", WYmonflow, x, nyrs, N)
LF.sim.skew = fu$LF.skew
fu = knn1.sim(nsim, "cor", WYmonflow, x, nyrs, N)
LF.sim.cor = fu$LF.cor
fu = knn1.sim(nsim, "max", WYmonflow, x, nyrs, N)
LF.sim.max = fu$LF.max
fu = knn1.sim(nsim, "min", WYmonflow, x, nyrs, N)
LF.sim.min = fu$LF.min
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/knn1.sim.month.txt")
May = WYmonflow[, 5]
fu = knn1.sim.month(nsim, May, WYmonflow, nyrs, N)
May.sim = fu$Monthpdf
xeval = fu$xeval
########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max,
LF.sim.min)
o = list(1:101, 102:202, 203:303, 304:404, 405:505, 506:606)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")
par(mfrow = c(3, 2))
for (i in 1:length(o)) {
xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ]) #the first row is the means of the original data
xmeans1 = sim.list[o[i][[1]], ]
zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
axis(1, at = 1:12, labels = months)
points(z1, xmeans[1, ], pch = 16, col = "red")
lines(z1, xmeans[1, ], pch = 16, col = "red")
title(main = titles[i])
}
# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF",
cex = 1.25)
evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
z2[index[i]] = z1[evaluate[i]]
n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
(i) Perform PCA on the monthly streamflow data. Seem how many PCs to retain based on the Eigen spectrum, retain as many as you need to capture most of the variance, say ~75-80%
monthdata = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/Leesferry-mon-data.txt"),
ncol = 13, byrow = T)
nyrs = length(monthdata[, 1])
WYmonflow = monthdata[, 2:13]
WYmonflow = WYmonflow/10^6
fmean = apply(WYmonflow, 2, mean) #monthly mean
fsdev = apply(WYmonflow, 2, sd)
# scale data
WYmonflow1 = t((t(WYmonflow) - fmean)/fsdev)
zs = var(WYmonflow1) #variance matrix
zsvd = svd(zs) #Eigen decomposition..
pcsLF = t(t(zsvd$u) %*% t(WYmonflow1))
evect = zsvd$u # EigenVector
lambdas = (zsvd$d/sum(zsvd$d))
plot(1:12, lambdas[1:12], type = "l", xlab = "Modes", ylab = "Frac. Var. explained")
points(1:12, lambdas[1:12], col = "red")
# keep first 4 PCs
(ii) Fit a best ARMA model for each retained PC and a best fit Normal distribution for the others (i.e., the noise PCs)
## Fit various ARMA models
pq = matrix(0, nrow = 4, ncol = 2)
for (i in 1:4) {
bestmodel = fun(1:3, 1:3, pcsLF[, i])
p = bestmodel$p
q = bestmodel$q
pq[i, 1] = p
pq[i, 2] = q
}
colnames(pq) = c("p", "q")
as.table(pq)
## p q
## A 1 1
## B 1 1
## C 1 1
## D 3 1
(iii) Simulate each PC from their respective model and invert back to the flow space using Eigen Vector matrix. Repeat to generate ensembles Boxplot the statistics listed in 2. and compare with the results from the previous methods
################### Simulate
nsim = 100
nyrs1 = nyrs - 1
May = WYmonflow[, 5]
xeval = seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals = length(xeval)
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
armean = matrix(0, nsim, 12)
arstdev = matrix(0, nsim, 12)
arcor = matrix(0, nsim, 12)
arskw = matrix(0, nsim, 12)
armax = matrix(0, nsim, 12)
armin = matrix(0, nsim, 12)
simpdf = matrix(0, nrow = nsim, ncol = nevals)
for (k in 1:nsim) {
p = pq[1, 1]
q = pq[1, 2]
zz = arima0(pcsLF[, 1], order = c(p, 0, q))
xsim1 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p +
1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
p = pq[2, 1]
q = pq[2, 2]
zz = arima0(pcsLF[, 2], order = c(p, 0, q))
xsim2 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p +
1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
p = pq[3, 1]
q = pq[3, 2]
zz = arima0(pcsLF[, 3], order = c(p, 0, q))
xsim3 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p +
1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
p = pq[4, 1]
q = pq[4, 2]
zz = arima0(pcsLF[, 4], order = c(p, 0, q))
xsim4 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p +
1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
# Simulate the remaining pcs with a normal distribution
pred = matrix(0, nrow = nyrs, ncol = 8)
j1 = c(5, 6, 7, 8, 9, 10, 11, 12)
for (j in 1:8) {
PC = pcsLF[, j1[j]]
pred[, j] = rnorm(nyrs, mean = mean(PC), sd = sd(PC))
}
# combine all simulated pcs
PC.sim = matrix(cbind(xsim1, xsim2, xsim3, xsim4), nrow = 95)
pc.sim = cbind(PC.sim, pred)
# Put statistics back
LF.sim = pc.sim %*% t(evect)
# Back standardize
LFsim = t((t(LF.sim) * fsdev) + fmean)
for (j in 1:12) {
armean[k, j] = mean(LFsim[, j])
armax[k, j] = max(LFsim[, j])
armin[k, j] = min(LFsim[, j])
arstdev[k, j] = sd(LFsim[, j])
arskw[k, j] = skew(LFsim[, j])
}
# correlation between one month to another..
for (j in 2:12) {
j1 = j - 1
arcor[k, j] = cor(LFsim[, j], LFsim[, j1])
}
arcor[k, 1] = cor(LFsim[1:nyrs1, 12], LFsim[2:nyrs, 1])
###### Simulate May May.sim=LFsim[,5]
###### xeval=seq(min(May)-0.25*sd(May),max(May)+0.25*sd(May),length=length(May))
###### nevals=length(xeval)
# simpdf[k,]=sm.density(May.sim,eval.points=xeval,display='none')$estimate
}
########### boxplot the stats ###############
months = month.abb
sim.list = rbind(armean, arstdev, arskw, arcor, armax, armin)
o = list(1:100, 101:200, 201:300, 301:400, 401:500, 501:600)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Lag 1 Cor", "Max", "Min")
par(mfrow = c(3, 2))
for (i in 1:length(o)) {
xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ]) #the first row is the means of the original data
xmeans1 = sim.list[o[i][[1]], ]
zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
axis(1, at = 1:12, labels = months)
points(z1, xmeans[1, ], pch = 16, col = "red")
lines(z1, xmeans[1, ], pch = 16, col = "red")
title(main = titles[i])
}
#### PDF of may
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(simpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(simpdf, xdensityorig), xlab = "May flow MAF", ylab = "PDF",
cex = 1.25)
evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
z2[index[i]] = z1[evaluate[i]]
n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
library(tseries)
library(MASS)
source("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/rawspec.R")
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/smoothspec.txt")
source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/spec.txt")
lf = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/Leesferry-mon-data.txt"),
ncol = 13, byrow = T)
lf = lf[, 2:13]
lf = lf/10^6 #convert to MAF
## Select the May monthly flow..
y = lf[, 5]
nsim = 100
deltaT = 1
t = 1:length(y)
f = spec1(y, nsim)
specsimrawar = f$raw.spec.ar
specsimrawars = f$smooth.spec.ar
specsimrawsur = f$raw.spec.sur
specsimrawsurs = f$smooth.spec.sur
pgram = f$raw.spec.orig
pgrams = f$smooth.spec.orig
plotfreqs = f$plotfreqs
######################### Plot the raw spectrum from AR simulations
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawar,
pgram), col = "red", lwd = 2)
for (i in 1:nsim) {
lines(plotfreqs, specsimrawar[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
############## plot the raw spectrum from surrogate data
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsur,
pgram), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsur[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
############## Plot the smooth spectrum of the AR simulations
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawars,
pgrams), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawars[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
############## Plot the smooth spectrum of the surrogate data
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsurs,
pgrams), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsurs[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
############## Calculate mean, sd, skew, cor, max, min from AR model
N = length(specsimrawar[, 1])
armean = matrix(0, nrow = N, ncol = 1)
armax = matrix(0, nrow = N, ncol = 1)
armin = matrix(0, nrow = N, ncol = 1)
arcor = matrix(0, nrow = N, ncol = 1)
arskw = matrix(0, nrow = N, ncol = 1)
arstdev = matrix(0, nrow = N, ncol = 1)
for (j in 1:N) {
armean[j] = mean(specsimrawar[j, ])
armax[j] = max(specsimrawar[j, ])
armin[j] = min(specsimrawar[j, ])
arstdev[j] = sd(specsimrawar[j, ])
arskw[j] = skew(specsimrawar[j, ])
}
# correlation between one year to another..
for (j in 2:N) {
j1 = j - 1
arcor[j] = cor(specsimrawar[j, ], specsimrawar[j1, ])
}
arcor[1] = cor(specsimrawar[1:(N - 1), 2], specsimrawar[2:N, 2])
# Observed stats
obs.mean = as.matrix(mean(pgram))
obs.skew = as.matrix(skew(pgram))
obs.sd = as.matrix(sd(pgram))
obs.min = as.matrix(min(pgram))
obs.max = as.matrix(max(pgram))
obs.cor = as.matrix(cor(pgram[1:(N - 1)], pgram[2:N]))
########### boxplot the stats ###############
simulated = c("armean", "arstdev", "arskw", "arcor", "armax", "armin")
observ = c("obs.mean", "obs.sd", "obs.skew", "obs.cor", "obs.max", "obs.min")
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")
par(mfrow = c(1, 2))
for (i in 1:length(observ)) {
xmeans = rbind(get(observ[i]), get(simulated[i]))
xmeans1 = get(simulated[i])
zz = boxplot(xmeans1, plot = F)
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
points(z1, xmeans[1], pch = 16, col = "red")
lines(z1, xmeans[1], pch = 16, col = "red")
title(main = titles[i])
}
#### Plot the PDF of May
simpdf = matrix(0, nrow = 48, ncol = nsim)
# xeval=seq(min(y)-0.25*sd(y),max(y)+0.25*sd(y),length=95)
for (i in 1:100) {
xeval = seq(min(specsimrawar[, i]) - 0.25 * sd(specsimrawar[, i]), max(specsimrawar[,
i]) + 0.25 * sd(specsimrawar[, i]), length = N)
zz = sm.density(specsimrawar[, i], eval.points = xeval, display = "none")
simpdf[, i] = zz$estimate
}
xeval = seq(min(pgram) - 0.25 * sd(pgram), max(pgram) + 0.25 * sd(pgram), length = N)
zz = sm.density(pgram, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(simpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(simpdf, xdensityorig), xlab = "May flow MAF", ylab = "PDF",
cex = 1.25)
evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
z2[index[i]] = z1[evaluate[i]]
n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
# Put data into binary.
LFann1 <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/LF_1906-2005.txt")
LFann = LFann1/(10^6)
ENSOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/nino3_1905-2007.txt")
PDOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/PDO_1900-2011.txt")
AMOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/AMO_1856-2011.txt")
# Compile data from matching years
years <- seq(1906, 2005, 1)
LFann = LFann[, 2]
N = length(LFann)
yth = median(LFann)
LFann[LFann < yth] = 0
LFann[LFann >= yth] = 1
ydes1 = LFann[-1]
ydeslag = LFann[-N]
ENSOann1 = ENSOann[match(years, ENSOann[, 1]), 2]
PDOann1 = PDOann[match(years, PDOann[, 1]), 2]
AMOann1 = AMOann[match(years, AMOann[, 1]), 2]
ENSOann1 = ENSOann1[-1]
PDOann1 = PDOann1[-1]
AMOann1 = AMOann1[-1]
X = as.data.frame(cbind(ydeslag, ENSOann1, PDOann1, AMOann1))
names(X) <- c("LF1", "ENSO", "PDO", "AMO")
X = cbind(ydeslag, PDOann1)
glmmarkov = glm(ydes1 ~ X, family = binomial())
sim = predict(glmmarkov)
Nl = length(sim)
statesim = 1:Nl
for (i in 1:Nl) {
pk = sim[i]
statesim[i] <- ifelse(runif(1) < pk, 1, 0)
}
statesim = as.matrix(statesim)
##### get states from original data
historical = LFann1[, 2]/(10^6)
LFann2 = LFann1[, 2]/(10^6)
N = length(LFann2)
yth = median(LFann2)
LFann2[LFann2 < yth] = 0
LFann2[LFann2 >= yth] = 1
pairs.obs = cbind(LFann2[1:(N - 1)], LFann2[2:N])
p00.obs = length(which(pairs.obs[, 1] == 0 & pairs.obs[, 2] == 0))
p10.obs = length(which(pairs.obs[, 1] == 1 & pairs.obs[, 2] == 0))
p01.obs = length(which(pairs.obs[, 1] == 0 & pairs.obs[, 2] == 1))
p11.obs = length(which(pairs.obs[, 1] == 1 & pairs.obs[, 2] == 1))
trans.obs = matrix(0, nrow = 2, ncol = 2)
rownames = c("0", "1")
colnames = c("0", "1")
trans.obs[1, 1] = p00.obs/(N - 1)
trans.obs[1, 2] = p01.obs/(N - 1)
trans.obs[2, 1] = p10.obs/(N - 1)
trans.obs[2, 2] = p11.obs/(N - 1)
trans.obs
## [,1] [,2]
## [1,] 0.2626 0.2424
## [2,] 0.2424 0.2525
##### get pairs from simulated data
n = length(statesim)
pairs.sim = cbind(statesim[1:(n - 1)], statesim[2:n])
colnames = c("t-1", "t")
p00 = length(which(pairs.sim[, 1] == 0 & pairs.sim[, 2] == 0))
p10 = length(which(pairs.sim[, 1] == 1 & pairs.sim[, 2] == 0))
p01 = length(which(pairs.sim[, 1] == 0 & pairs.sim[, 2] == 1))
p11 = length(which(pairs.sim[, 1] == 1 & pairs.sim[, 2] == 1))
# Transition Prob Matirx
trans.sim1 = matrix(0, nrow = 2, ncol = 2)
rownames = c("0", "1")
colnames = c("0", "1")
trans.sim1[1, 1] = p00/(n - 1)
trans.sim1[1, 2] = p01/(n - 1)
trans.sim1[2, 1] = p10/(n - 1)
trans.sim1[2, 2] = p11/(n - 1)
trans.sim1
## [,1] [,2]
## [1,] 0.83673 0.07143
## [2,] 0.07143 0.02041
# the prediction of the glm seems to return alot mor 0s than the original
# data; the transition matrix is very different from the historical. Use
# the function simulate and see if it returns better results.
sim.fnc = simulate(glmmarkov, 1)
n = length(sim.fnc[, 1])
pairs.sim = cbind(sim.fnc[1:(n - 1), ], sim.fnc[2:n, ])
colnames = c("t-1", "t")
p00 = length(which(pairs.sim[, 1] == 0 & pairs.sim[, 2] == 0))
p10 = length(which(pairs.sim[, 1] == 1 & pairs.sim[, 2] == 0))
p01 = length(which(pairs.sim[, 1] == 0 & pairs.sim[, 2] == 1))
p11 = length(which(pairs.sim[, 1] == 1 & pairs.sim[, 2] == 1))
# Transition Prob Matirx
trans.sim2 = matrix(0, nrow = 2, ncol = 2)
rownames = c("0", "1")
colnames = c("0", "1")
trans.sim2[1, 1] = p00/(n - 1)
trans.sim2[1, 2] = p01/(n - 1)
trans.sim2[2, 1] = p10/(n - 1)
trans.sim2[2, 2] = p11/(n - 1)
trans.sim2
## [,1] [,2]
## [1,] 0.2857 0.2143
## [2,] 0.2143 0.2857
# this is much closer to the transition matrix of the historical data
############## Fist flow ##############
nsim = 500
N = length(sim.fnc[, 1])
simflow = matrix(0, ncol = nsim, nrow = N)
## Since p00.obs, p01.obs, p10.obs and p11.obs are close, the weight
## metric is the same
N1 = p00.obs
K = round(sqrt(N1))
W = 1:K
W = 1/W
W = W/sum(W)
W = cumsum(W)
#### Simulate flow
for (i in 1:nsim) {
for (j in 2:N) {
# what state is flow in
j1 = j - 1
val1 = sim.fnc[j1, ]
# associate that value with a flow. Randomly chose that flow
if (val1 == 0) {
indX = which(LFann2 == 0)
if (j == 2) {
indX = sample(indX, 1)
}
Hist0 = historical[indX]
}
if (val1 == 1) {
indX = which(LFann2 == 1)
if (j == 2) {
index = sample(indX, 1)
}
Hist1 = historical[indX]
}
# look at second value in simulated sequence
val2 = sim.fnc[j, ]
if (val1 == 0 && val2 == 0) {
# associate flow values for all 00 sequences
all00 = which(pairs.obs[, 1] == 0 & pairs.obs[, 2] == 0)
Hist00 = historical[all00]
# calculate distance
xdist = order(abs(Hist0 - Hist00))
# select one of the nearest neighbor with the weight function above.
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
# Next flow
simflow[j, i] = historical[index]
}
if (val1 == 0 && val2 == 1) {
# associate flow values for all 01 sequences
all01 = which(pairs.obs[, 1] == 0 & pairs.obs[, 2] == 1)
Hist01 = historical[all01]
# calculate distance
xdist = order(abs(Hist0 - Hist01))
# select one of the nearest neighbor with the weight function above.
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
# Next flow
simflow[j, i] = historical[index]
}
if (val1 == 1 && val2 == 0) {
# associate flow values for all 01 sequences
all10 = which(pairs.obs[, 1] == 1 & pairs.obs[, 2] == 0)
Hist10 = historical[all10]
# calculate distance
xdist = order(abs(Hist1 - Hist10))
# select one of the nearest neighbor with the weight function above.
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
# Next flow
simflow[j, i] = historical[index]
}
if (val1 == 1 && val2 == 1) {
# associate flow values for all 01 sequences
all11 = which(pairs.obs[, 1] == 1 & pairs.obs[, 2] == 1)
Hist11 = historical[all11]
# calculate distance
xdist = order(abs(Hist1 - Hist11))
# select one of the nearest neighbor with the weight function above.
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
# Next flow
simflow[j, i] = historical[index]
}
}
simflow[1, i] = sample(historical, 1)
}
armean = matrix(0, nrow = N, ncol = 1)
armax = matrix(0, nrow = N, ncol = 1)
armin = matrix(0, nrow = N, ncol = 1)
arcor = matrix(0, nrow = N, ncol = 1)
arskw = matrix(0, nrow = N, ncol = 1)
arstdev = matrix(0, nrow = N, ncol = 1)
for (j in 1:N) {
armean[j] = mean(simflow[j, ])
armax[j] = max(simflow[j, ])
armin[j] = min(simflow[j, ])
arstdev[j] = sd(simflow[j, ])
arskw[j] = skew(simflow[j, ])
}
# correlation between one year to another..
for (j in 2:N) {
j1 = j - 1
arcor[j] = cor(simflow[j, ], simflow[j1, ])
}
arcor[1] = cor(simflow[1:(N - 1), 2], simflow[2:N, 2])
# Observed stats
obs.mean = as.matrix(mean(historical))
obs.skew = as.matrix(skew(historical))
obs.sd = as.matrix(sd(historical))
obs.min = as.matrix(min(historical))
obs.max = as.matrix(max(historical))
obs.cor = as.matrix(cor(historical[1:(N - 1)], historical[2:N]))
########### boxplot the stats ###############
simulated = c("armean", "arstdev", "arskw", "arcor", "armax", "armin")
observ = c("obs.mean", "obs.sd", "obs.skew", "obs.cor", "obs.max", "obs.min")
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")
par(mfrow = c(1, 2))
for (i in 1:length(observ)) {
xmeans = rbind(get(observ[i]), get(simulated[i]))
xmeans1 = get(simulated[i])
zz = boxplot(xmeans1, plot = F)
z1 = bxp(zz, ylim = range(xmeans), cex = 1)
points(z1, xmeans[1], pch = 16, col = "red")
lines(z1, xmeans[1], pch = 16, col = "red")
title(main = titles[i])
}
## simulate from spectrum
nsim = 100
deltaT = 1
t = 1:length(historical)
fu = spec1(historical, nsim)
specsimrawsur = fu$raw.spec.sur
specsimrawsurs = fu$smooth.spec.sur
pgram = fu$raw.spec.orig
pgrams = fu$smooth.spec.orig
plotfreqs = fu$plotfreqs
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsur,
pgram), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsur[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsurs,
pgrams), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsurs[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
source("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session4/ssa-b.txt")
lf = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session4/LeesFerry-monflows-1906-2006.txt"),
ncol = 13, byrow = T)
yr = lf[, 1]
lf = lf[, 2:13]
lf = lf * 0.0004690502
lfann = apply(lf, 1, sum)
lfmean = colMeans(lf)
lfsd = apply(lf, 2, sd)
lfscale = t((t(lf) - lfmean)/lfsd)
### SSA on Annual total volume
M = c(20, 25, 30) # try different M's
titles1 = c("M=20", "M=25", "M=30")
index = c()
par(mfrow = c(3, 2))
for (i in 1:length(M)) {
ssaout = ssab(lfann, M[i])
# Eigen Spectrum
lambdas = ssaout$lambdas #Fraction Eigen Value
plot(1:M[i], lambdas[1:M[i]], type = "l", xlab = "Modes", ylab = "Frac. Var. explained")
points(1:M[i], lambdas[1:M[i]], col = "red")
title(main = titles1[i])
plot(1:M[i], cumsum(lambdas))
title(main = titles1[i])
index[i] = list(which(cumsum(lambdas) > 0.6))
}
index
## [[1]]
## [1] 9 10 11 12 13 14 15 16 17 18 19 20
##
## [[2]]
## [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
##
## [[3]]
## [1] 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# Use M=20 as the first 9 modes explain over 60% of the variance
ssaout = ssab(lfann, 25)
# Plot the leading RCs, each against the timeseries
plot(yr, lfann, type = "l", col = "black", ylim = range(ssaout$Rpc[, 1], ssaout$Rpc[,
2], ssaout$Rpc[, 3], ssaout$Rpc[, 4]))
lines(yr, ssaout$Rpc[, 1], col = "red", lwd = 2)
lines(yr, ssaout$Rpc[, 2], col = "purple", lwd = 2)
lines(yr, ssaout$Rpc[, 3], col = "green", lwd = 2)
lines(yr, ssaout$Rpc[, 4], col = "blue", lwd = 2)
legend("bottomleft", legend = c("Original Data", "RC1", "RC2", "RC3", "RC4"),
lty = 1, lwd = 1, col = c("black", "red", "purple", "green", "blue"))
# Add the first K reconstructed components
K = 10
par(mfrow = c(1, 1))
Recon.LF = apply(ssaout$Rpc[, 1:K], 1, sum)
plot(yr, lfann, type = "l", xlab = "Year", ylab = "Lees Ferry Annual Flow",
ylim = range(lfann, Recon.LF))
lines(yr, Recon.LF, col = "red", lwd = 2)
xrecover = apply(ssaout$Rpc, 1, sum)
plot(xrecover, lfann, main = "summed Reconstructed Components vs Annual Lee's Ferry Data")
# This plot proves that the reconstructed components are exactly eqaul to
# the data set.
# Find best M
M = c(10, 15, 20, 25, 30, 35, 40) # try different M's
titles1 = c("M=10", "M=15", "M=20", "M=25", "M=30", "M=35", "M=40")
index = c()
par(mfrow = c(3, 2))
for (i in 1:length(M)) {
ssaout = ssab(lfann[1:92], M[i]) #fit model 1906-1997
# Eigen Spectrum
lambdas = ssaout$lambdas #Fraction Eigen Value
plot(1:M[i], lambdas[1:M[i]], type = "l", xlab = "Modes", ylab = "Frac. Var. explained")
points(1:M[i], lambdas[1:M[i]], col = "red")
title(main = titles1[i])
plot(1:M[i], cumsum(lambdas))
title(main = titles1[i])
index[i] = list(which(cumsum(lambdas) > 0.6))
}
index
## [[1]]
## [1] 5 6 7 8 9 10
##
## [[2]]
## [1] 7 8 9 10 11 12 13 14 15
##
## [[3]]
## [1] 9 10 11 12 13 14 15 16 17 18 19 20
##
## [[4]]
## [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
##
## [[5]]
## [1] 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
##
## [[6]]
## [1] 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
## [24] 35
##
## [[7]]
## [1] 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
## [24] 35 36 37 38 39 40
pred.yr = c(1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006)
pred = 1:K
pred.norm = 1:length((K + 1):M)
forcast = matrix(0, nrow = length(pred.yr), ncol = 1)
RC.noise = (K + 1):M
for (j in 1:length(pred.yr)) {
if (j == 1) {
M = 15
ssaout = ssab(lfann[1:92], M)
} else {
ssaout = ssab(c(lfann[1:92], forcast[1:(j - 1)]), M)
}
lambdas = ssaout$lambdas #Fraction Eigen Value
K = which(cumsum(lambdas) > 0.6)[1]
for (i in 1:K) {
rc.ar = ar(ssaout$Rpc[, i])
xp = predict(rc.ar, n.ahead = 1)
pred[i] = xp$pred
}
for (i in 1:length(RC.noise)) {
pred.norm[i] = rnorm(1, mean(ssaout$Rpc[, RC.noise[i]]), sd(ssaout$Rpc[,
RC.noise[i]]))
}
forcast[j] = sum(c(pred, pred.norm))
}
rownames(forcast) = pred.yr
forcast
## [,1]
## 1998 9140
## 1999 8387
## 2000 7855
## 2001 6548
## 2002 5545
## 2003 4740
## 2004 2462
## 2005 2404
## 2006 2838
# calculate Stats: RMSE and rsquared
data = lfann[93:101]
par(mfrow = c(1, 1))
plot(data, forcast)
lines(data, data, col = "red")
rmse = sqrt(mean((data - forcast)^2))
SST = sum((data - mean(data))^2)
SSR = sum((data - forcast)^2)
rsq = 1 - (SST/SSR)
rsq
## [1] 0.6344
rmse
## [1] 2685