rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
# source('bestARIMA.r') source('skew.s.s')
source("stats.r")
library(MASS)
Read in the monthly Lees Ferry streamflow
LFann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/LF_1906-2005.txt")
ENSOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/nino3_1905-2007.txt")
PDOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/PDO_1900-2011.txt")
AMOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/AMO_1856-2011.txt")
years <- seq(1906, 2005, 1)
nyrs <- length(years)
nyrs1 <- nyrs - 1
nyrs2 <- nyrs - 2
data <- as.data.frame(cbind(LFann[, 1], LFann[, 2]/(10^6), ENSOann[match(years,
ENSOann[, 1]), 2], PDOann[match(years, PDOann[, 1]), 2], AMOann[match(years,
AMOann[, 1]), 2]))
names(data) <- c("year", "LF", "ENSO", "PDO", "AMO")
LFmed <- median(data$LF)
ones <- which(data$LF > LFmed)
bin <- rep(0, nyrs)
bin[ones] <- 1
fulldata <- as.data.frame(cbind(bin[2:nyrs], bin[1:(nyrs - 1)], data$ENSO[2:nyrs],
data$AMO[2:nyrs], data$PDO[2:nyrs]))
names(fulldata) <- c("flow", "prevf", "enso", "amo", "pdo")
Fit a best GLM for the state series of previous annual streamflow, ENSO, AMO, PDO
zz <- glm(flow ~ ., data = fulldata, family = "binomial")
# zz <- stepAIC(zz) # PDO is the lowest AIC
zz <- glm(flow ~ prevf + pdo, data = fulldata, family = "binomial")
Write a TPM
TPM <- matrix(0, 2, 2)
for (i in 1:nyrs1) {
if (bin[i] == 0) {
if (bin[i + 1] == 0) {
TPM[1, 1] = TPM[1, 1] + 1
} else {
TPM[1, 2] = TPM[1, 2] + 1
}
}
if (bin[i] == 1) {
if (bin[i + 1] == 0) {
TPM[2, 1] = TPM[2, 1] + 1
} else {
TPM[2, 2] = TPM[2, 2] + 1
}
}
}
pTPM <- cbind((TPM[1, ]/sum(TPM[1, ])), (TPM[2, ]/sum(TPM[2, ])))
Create pairs of flows for each set of transition states
binlag1 <- cbind(bin[1:nyrs1], bin[2:nyrs])
flowlag1 <- cbind(data$LF[1:nyrs1], data$LF[2:nyrs])
f0s <- data$LF[which(bin == 0)] # all flows that were 0
f1s <- data$LF[which(bin == 1)] # all flows that were 1
i00 <- which(binlag1[, 1] == 0 & binlag1[, 2] == 0)
f00 <- flowlag1[i00, ] # all flow pairs that were 00
i01 <- which(binlag1[, 1] == 0 & binlag1[, 2] == 1)
f01 <- flowlag1[i01, ] # all flow pairs that were 01
i10 <- which(binlag1[, 1] == 1 & binlag1[, 2] == 0)
f10 <- flowlag1[i10, ] # all flow pairs that were 10
i11 <- which(binlag1[, 1] == 1 & binlag1[, 2] == 1)
f11 <- flowlag1[i11, ] # all flow pairs that were 11
Create 250 binary simulations from the GLM and randomly assign flow values to the first of each simulation using historical data.
nsim <- 250
statesim <- matrix(0, nyrs1, nsim)
flowsim <- statesim
for (j in 1:nsim) {
# simulate 99 values from the GLM fit
sim = predict(zz)
# obtain binary values for simulations
for (i in 1:nyrs1) {
pk = sim[i]
statesim[i, j] <- ifelse(runif(1) < pk, 1, 0)
}
# randomly sample historical data of 1s and 0s to assign first flow values
if (statesim[1, j] == 0) {
flowsim[1, j] <- sample(f0s, 1)
} else {
flowsim[1, j] <- sample(f1s, 1)
}
}
Now that the simulations have the first flow values, we can conditionally simulate flow using KNN.
# Since all four states have almost the same number of pairs, the weight
# vector will be the same so we initiate this outside of the loop.
K <- round(sqrt(length(i00)))
W = 1/(1:K)
W = W/sum(W)
W = cumsum(W)
# Begin conditional simulation with KNN
for (j in 1:nsim) {
# set the current flow to the first of each simulation
current <- flowsim[1, j]
for (i in 1:nyrs2) {
if (statesim[i, j] == 0 && statesim[i + 1, j] == 0) {
xdist <- order(abs(flowsim[i, j] - f00[, 1]))
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
flowsim[i + 1, j] = f00[index, 2]
current <- flowsim[i + 1, j]
}
if (statesim[i, j] == 0 & statesim[i + 1, j] == 1) {
xdist <- order(abs(flowsim[i, j] - f01[, 1]))
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
flowsim[i + 1, j] = f01[index, 2]
current <- flowsim[i + 1, j]
}
if (statesim[i, j] == 1 & statesim[i + 1, j] == 0) {
xdist <- order(abs(flowsim[i, j] - f10[, 1]))
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
flowsim[i + 1, j] = f10[index, 2]
current <- flowsim[i + 1, j]
}
if (statesim[i, j] == 1 & statesim[i + 1, j] == 1) {
xdist <- order(abs(flowsim[i, j] - f11[, 1]))
xx = runif(1, 0, 1)
xy = c(xx, W)
xx = rank(xy)
index = xdist[xx[1]]
flowsim[i + 1, j] = f11[index, 2]
current <- flowsim[i + 1, j]
}
}
}
Compute statistics and boxplot
LFdata <- matrix(data$LF, 100, 1)
obs.stats <- stats(LFdata, corr = FALSE, multicorr = FALSE)
sim.stats <- stats(flowsim, corr = FALSE, multicorr = FALSE)
par(mfrow = c(2, 3))
boxplot(sim.stats$mean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(sim.stats$max, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(sim.stats$min, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(sim.stats$skew, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(sim.stats$stdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Stationary Markov Chain", outer = TRUE)