Emily Gill - CVEN6833 - Homework #3

December 16, 2013

Question 3: ARMA and Nonparametric Time Series Simulation ::: SEASONAL ARMA. Fits a Nn-stationary lag-1 AR model.

rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("bestARIMA.r")
source("skew.r")
source("stats.r")
source("plotMay.r")
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information

Read in the monthly Lees Ferry streamflow and remove the seasonal cycle (remove the mean and divide by the standard deviation) :: SEASONAL

LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")

years <- LFmon[, 1]
nyrs <- length(years)
nyrs1 <- nyrs - 1
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", 
    "Oct", "Nov", "Dec")

Get the coefficients of the Thomas Fiering Model to implement the non-stationary model

coef1 <- 1:12
coef2 <- 1:12

for (j in 1:12) {
    j1 <- j - 1
    if (j == 1) 
        j1 = 12
    if (j == 1) 
        coef1[j] <- (cor(LFmon[2:nyrs, j], LFmon[1:nyrs1, j1]))
    if (j > 1) 
        coef1[j] <- cor(LFmon[1:nyrs, j], LFmon[1:nyrs, j1])
    coef2[j] <- sqrt((var(LFmon[, j])) * (1 - coef1[j] * coef1[j]))
    coef1[j] <- coef1[j] * sqrt(var(LFmon[, j])/var(LFmon[, j1]))

}

Set up for 250 simulations. Initialize matrices to store statistics of simulations.

nsim <- 250  # number of simulations 
nyrs <- length(LFmon[, 1])  # number of years of historical data
nyrs1 <- nyrs - 1  # number of years minus one

# initialize the matrices that store the statistics
armean <- matrix(0, nsim, 12)
arstdev <- matrix(0, nsim, 12)
arcor <- matrix(0, nsim, 12)
arcor2 <- matrix(0, nsim, 12)
arcor3 <- matrix(0, nsim, 12)
arskw <- matrix(0, nsim, 12)
armax <- matrix(0, nsim, 12)
armin <- matrix(0, nsim, 12)

# define points to evaluate the May pdf over
May <- LFmon[, 5]
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)

# store the May simulations and create an array for the PDF
maysim <- 1:nyrs
simpdf <- matrix(0, nrow = nsim, ncol = nevals)

Perform simulation

for (k in 1:nsim) {
    nmons <- nyrs * 12  # number of values to be generated

    xsim <- 1:nmons
    i <- round(runif(1, 1, nyrs))  # random number from uniform dist to start sim 
    xsim[1] <- LFmon[i, 1]  # first sim is flow from random i
    xprev <- xsim[1]

    # loop through remaining months using Thomas Fiering coeficients
    for (i in 2:nmons) {
        j <- i%%12
        if (j == 0) 
            j <- 12
        j1 <- j - 1
        if (j == 1) 
            j1 <- 12
        x1 <- xprev - mean(LFmon[, j1])
        x2 <- coef2[j] * rnorm(1, 0, 1)

        xsim[i] <- mean(LFmon[, j]) + x1 * coef1[j] + x2
        xprev <- xsim[i]
    }

    simdismon <- matrix(xsim, nrow = 12)  # put the array into a matrix 
    simdismon <- t(simdismon)  # transpose

    # evaluate May simulated PDF at each time step
    maysim <- simdismon[, 5]
    simpdf[k, ] <- sm.density(maysim, eval.points = xeval, display = "none")$estimate

    # compute stats on simulated timeseries
    ar.stats <- stats(simdismon, corr = TRUE, multicorr = TRUE)
    armean[k, ] <- ar.stats$mean
    armax[k, ] <- ar.stats$max
    armin[k, ] <- ar.stats$min
    arstdev[k, ] <- ar.stats$stdev
    arskw[k, ] <- ar.stats$skew
    arcor[k, ] <- ar.stats$cor
    arcor2[k, ] <- ar.stats$mcor2
    arcor3[k, ] <- ar.stats$mcor3
}

Compute statistics from historical data and boxplot versus simulated

obs.stats <- stats(LFmon, corr = TRUE, multicorr = TRUE)
mos <- as.character(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", 
    "Sep", "Oct", "Nov", "Dec"))
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
lines(obs.stats$mean, col = "red")
title(main = "Mean")
axis(1, at = 1:12, labels = mos)
boxplot(armax, xaxt = "n")
lines(obs.stats$max, col = "red")
title(main = "Max")
axis(1, at = 1:12, labels = mos)
boxplot(armin, xaxt = "n")
lines(obs.stats$min, col = "red")
title(main = "Min")
axis(1, at = 1:12, labels = mos)
boxplot(arskw, xaxt = "n")
lines(obs.stats$skew, col = "red")
title(main = "Skew")
axis(1, at = 1:12, labels = mos)
boxplot(arstdev, xaxt = "n")
lines(obs.stats$stdev, col = "red")
title(main = "SD")
axis(1, at = 1:12, labels = mos)
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Statistics for Nonstationary Lag-1 ARMA model", outer = TRUE)

plot of chunk unnamed-chunk-6

Comments on Statistical Plots:

Using the nonstationary (seasonal) lag-1 ARMA gives us good fit on the Lag-1 Correlation plot (compare this to the lack of one in the stationary nonseasonal ARMA). This is because we are basically using 12 different models taking into account the change from month to month. As expected, mean, maximum, minimum and SD are also good fits. The skew is off since this is still assuming normal distribution.

Boxplot the May PDF

plotMay(May, simpdf)  # used own function to consolidate May PDf plotting commands
par(oma = c(2, 2, 3, 2))
title("May PDF from Simulation", outer = TRUE)

plot of chunk unnamed-chunk-7

Comments on May PDF

While this method improves our lag-1 correlation plot (above), it still misses the bimodality in the May streamflow PDF because it assumes normality. Until we use a nonparametric method, we won't capture the bimodality.

Compare multiple lag correlation results

par(mfrow = c(1, 3))
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor2, xaxt = "n")
lines(obs.stats$mcor2, col = "red")
title(main = "Lag-2 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor3, xaxt = "n")
lines(obs.stats$mcor3, col = "red")
title(main = "Lag-3 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Comparison of multiple lags", outer = TRUE)

plot of chunk unnamed-chunk-8

Comments on lag correlations

This method does capture multiple correlations. This is because we are simulating all 12 at the same time. Therefore, correlation between most months at lags of 1, 2, and 3 months out are captured.