Emily Gill - CVEN6833 - Homework #3

December 16, 2013

Question 7: Simulation using Markov Chain GLM

rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
# source('bestARIMA.r') source('skew.s.s')
source("stats.r")
library(MASS)

Read in the monthly Lees Ferry streamflow

LFann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/LF_1906-2005.txt")
ENSOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/nino3_1905-2007.txt")
PDOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/PDO_1900-2011.txt")
AMOann <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/AMO_1856-2011.txt")

years <- seq(1906, 2005, 1)
nyrs <- length(years)
nyrs1 <- nyrs - 1
nyrs2 <- nyrs - 2
data <- as.data.frame(cbind(LFann[, 1], LFann[, 2]/(10^6), ENSOann[match(years, 
    ENSOann[, 1]), 2], PDOann[match(years, PDOann[, 1]), 2], AMOann[match(years, 
    AMOann[, 1]), 2]))
names(data) <- c("year", "LF", "ENSO", "PDO", "AMO")

LFmed <- median(data$LF)
ones <- which(data$LF > LFmed)
bin <- rep(0, nyrs)
bin[ones] <- 1

fulldata <- as.data.frame(cbind(bin[2:nyrs], bin[1:(nyrs - 1)], data$ENSO[2:nyrs], 
    data$AMO[2:nyrs], data$PDO[2:nyrs]))
names(fulldata) <- c("flow", "prevf", "enso", "amo", "pdo")

Fit a best GLM for the state series of previous annual streamflow, ENSO, AMO, PDO

zz <- glm(flow ~ ., data = fulldata, family = "binomial")
# zz <- stepAIC(zz) # PDO is the lowest AIC
zz <- glm(flow ~ prevf + pdo, data = fulldata, family = "binomial")

Write a TPM

TPM <- matrix(0, 2, 2)
for (i in 1:nyrs1) {
    if (bin[i] == 0) {
        if (bin[i + 1] == 0) {
            TPM[1, 1] = TPM[1, 1] + 1
        } else {
            TPM[1, 2] = TPM[1, 2] + 1
        }
    }

    if (bin[i] == 1) {
        if (bin[i + 1] == 0) {
            TPM[2, 1] = TPM[2, 1] + 1
        } else {
            TPM[2, 2] = TPM[2, 2] + 1
        }
    }

}

pTPM <- cbind((TPM[1, ]/sum(TPM[1, ])), (TPM[2, ]/sum(TPM[2, ])))

Create pairs of flows for each set of transition states

binlag1 <- cbind(bin[1:nyrs1], bin[2:nyrs])
flowlag1 <- cbind(data$LF[1:nyrs1], data$LF[2:nyrs])

f0s <- data$LF[which(bin == 0)]  # all flows that were 0
f1s <- data$LF[which(bin == 1)]  # all flows that were 1

i00 <- which(binlag1[, 1] == 0 & binlag1[, 2] == 0)
f00 <- flowlag1[i00, ]  # all flow pairs that were 00
i01 <- which(binlag1[, 1] == 0 & binlag1[, 2] == 1)
f01 <- flowlag1[i01, ]  # all flow pairs that were 01
i10 <- which(binlag1[, 1] == 1 & binlag1[, 2] == 0)
f10 <- flowlag1[i10, ]  # all flow pairs that were 10
i11 <- which(binlag1[, 1] == 1 & binlag1[, 2] == 1)
f11 <- flowlag1[i11, ]  # all flow pairs that were 11

Create 250 binary simulations from the GLM and randomly assign flow values to the first of each simulation using historical data.

nsim <- 250
statesim <- matrix(0, nyrs1, nsim)
flowsim <- statesim
for (j in 1:nsim) {
    # simulate 99 values from the GLM fit
    sim = predict(zz)

    # obtain binary values for simulations
    for (i in 1:nyrs1) {
        pk = sim[i]
        statesim[i, j] <- ifelse(runif(1) < pk, 1, 0)
    }

    # randomly sample historical data of 1s and 0s to assign first flow values
    if (statesim[1, j] == 0) {
        flowsim[1, j] <- sample(f0s, 1)
    } else {
        flowsim[1, j] <- sample(f1s, 1)
    }

}

Now that the simulations have the first flow values, we can conditionally simulate flow using KNN.

# Since all four states have almost the same number of pairs, the weight
# vector will be the same so we initiate this outside of the loop.
K <- round(sqrt(length(i00)))
W = 1/(1:K)
W = W/sum(W)
W = cumsum(W)

# Begin conditional simulation with KNN
for (j in 1:nsim) {
    # set the current flow to the first of each simulation
    current <- flowsim[1, j]

    for (i in 1:nyrs2) {

        if (statesim[i, j] == 0 && statesim[i + 1, j] == 0) {
            xdist <- order(abs(flowsim[i, j] - f00[, 1]))
            xx = runif(1, 0, 1)
            xy = c(xx, W)
            xx = rank(xy)
            index = xdist[xx[1]]

            flowsim[i + 1, j] = f00[index, 2]
            current <- flowsim[i + 1, j]
        }

        if (statesim[i, j] == 0 & statesim[i + 1, j] == 1) {
            xdist <- order(abs(flowsim[i, j] - f01[, 1]))
            xx = runif(1, 0, 1)
            xy = c(xx, W)
            xx = rank(xy)
            index = xdist[xx[1]]

            flowsim[i + 1, j] = f01[index, 2]
            current <- flowsim[i + 1, j]
        }

        if (statesim[i, j] == 1 & statesim[i + 1, j] == 0) {
            xdist <- order(abs(flowsim[i, j] - f10[, 1]))
            xx = runif(1, 0, 1)
            xy = c(xx, W)
            xx = rank(xy)
            index = xdist[xx[1]]

            flowsim[i + 1, j] = f10[index, 2]
            current <- flowsim[i + 1, j]
        }

        if (statesim[i, j] == 1 & statesim[i + 1, j] == 1) {
            xdist <- order(abs(flowsim[i, j] - f11[, 1]))
            xx = runif(1, 0, 1)
            xy = c(xx, W)
            xx = rank(xy)
            index = xdist[xx[1]]

            flowsim[i + 1, j] = f11[index, 2]
            current <- flowsim[i + 1, j]
        }
    }
}

Compute statistics and boxplot

LFdata <- matrix(data$LF, 100, 1)
obs.stats <- stats(LFdata, corr = FALSE, multicorr = FALSE)
sim.stats <- stats(flowsim, corr = FALSE, multicorr = FALSE)

par(mfrow = c(2, 3))
boxplot(sim.stats$mean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(sim.stats$max, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(sim.stats$min, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(sim.stats$skew, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(sim.stats$stdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Stationary Markov Chain", outer = TRUE)

plot of chunk unnamed-chunk-8