Emily Gill - CVEN6833 - Homework #3

December 16, 2013

Question 5: Multivariate Simulation

rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("bestARIMA.r")
source("skew.r")
source("stats.r")
source("plotMay.r")
library(sm)

## Package `sm', version 2.2-5: type help(sm) for summary information

Read in the monthly Lees Ferry streamflow

LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")

years <- LFmon[, 1]
nyrs <- length(years)
nyrs1 <- nyrs - 1
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", 
    "Oct", "Nov", "Dec")

LFmean <- apply(LFmon, 2, mean)  # monthly mean
LFsd <- apply(LFmon, 2, sd)  # monthly sd
X <- t((t(LFmon) - LFmean)/LFsd)
# for (i in 1:12){ LFmon[,i]=(LFmon[,i]-LFmean[i])/LFsd[i] }

Perform a PCA

zs <- var(X)
zsvd <- svd(zs)
pcs <- t(t(zsvd$u) %*% t(X))
lambdas <- (zsvd$d/sum(zsvd$d))

par(mfrow = c(1, 1))
plot(1:12, lambdas[1:12], type = "l", xlab = "Modes/PCSs", ylab = "Fraction of Variance Explained", 
    main = "Eigen Value Spectrum")
points(1:12, lambdas[1:12], col = "red")

plot of chunk unnamed-chunk-3

plot(cumsum(lambdas))  # shows to use the first 4
title(main = "Cumulative Variance Explained")

plot of chunk unnamed-chunk-3

Comments on Cumulative Variance Plot

In plotting the cumulative variance explained, we see that the first four explain approximately 80% iof the variance. Therefore, we will fit an ARMA to each of those.

Fit a best ARMA to each PC

ps <- c(1, 2, 3, 4)
qs <- c(1, 2, 3, 4)
z1 <- bestARIMA(pcs[, 1], ps, qs)  # ARMA fit for PC1

## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1

z1

## $aics
##      AIC p q
## 1  447.5 1 1
## 2  449.2 1 2
## 3  451.2 1 3
## 4  452.7 1 4
## 5  449.3 2 1
## 6  450.5 2 2
## 7  451.8 2 3
## 8  454.3 2 4
## 9  450.5 3 1
## 10 452.1 3 2
## 11 455.2 3 3
## 12 456.2 3 4
## 13 452.1 4 1
## 14 454.1 4 2
## 15 449.3 4 3
## 16 451.6 4 4
## 
## $bestp
## [1] 1
## 
## $bestq
## [1] 1
## 
## $bestmod
## 
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
## 
## Coefficients:
##         ar1     ma1  intercept
##       0.533  -0.162     -0.013
## s.e.  0.094   0.208      0.377
## 
## sigma^2 estimated as 4.54:  log likelihood = -219.8,  aic = 447.5

zz1 <- z1$bestmod  # p=1 q=1

z2 <- bestARIMA(pcs[, 2], ps, qs)  # ARMA fit for PC2
z2

## $aics
##      AIC p q
## 1  348.9 1 1
## 2  351.2 1 2
## 3  352.4 1 3
## 4  354.6 1 4
## 5  350.2 2 1
## 6  352.4 2 2
## 7  351.6 2 3
## 8  353.0 2 4
## 9  352.1 3 1
## 10 351.6 3 2
## 11 353.4 3 3
## 12 355.4 3 4
## 13 353.7 4 1
## 14 353.3 4 2
## 15 355.3 4 3
## 16 356.0 4 4
## 
## $bestp
## [1] 1
## 
## $bestq
## [1] 1
## 
## $bestmod
## 
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
## 
## Coefficients:
##          ar1    ma1  intercept
##       -0.984  0.934     -0.001
## s.e.   0.002  0.014      0.127
## 
## sigma^2 estimated as 1.71:  log likelihood = -170.5,  aic = 348.9

zz2 <- z2$bestmod  # p=1 q=1

z3 <- bestARIMA(pcs[, 3], ps, qs)  # ARMA fit for PC3

## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1

z3

## $aics
##      AIC p q
## 1  315.9 1 1
## 2  321.6 1 2
## 3  323.5 1 3
## 4  321.8 1 4
## 5  320.9 2 1
## 6  314.3 2 2
## 7  316.3 2 3
## 8  326.0 2 4
## 9  322.2 3 1
## 10 316.3 3 2
## 11 318.5 3 3
## 12 318.2 3 4
## 13 315.5 4 1
## 14 317.4 4 2
## 15 320.1 4 3
## 16 321.1 4 4
## 
## $bestp
## [1] 2
## 
## $bestq
## [1] 2
## 
## $bestmod
## 
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
## 
## Coefficients:
##         ar1    ar2     ma1     ma2  intercept
##       0.040  0.939  -0.046  -0.797      0.038
## s.e.  0.057  0.055   0.054   0.251      0.498
## 
## sigma^2 estimated as 1.15:  log likelihood = -151.2,  aic = 314.3

zz3 <- z3$bestmod  # p=2 q=2

z4 <- bestARIMA(pcs[, 4], ps, qs)  # ARMA fit for PC4

## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1

z4

## $aics
##      AIC p q
## 1  257.7 1 1
## 2  258.3 1 2
## 3  259.2 1 3
## 4  259.0 1 4
## 5  258.7 2 1
## 6  260.2 2 2
## 7  257.4 2 3
## 8  258.9 2 4
## 9  257.1 3 1
## 10 259.1 3 2
## 11 260.9 3 3
## 12 262.9 3 4
## 13 257.5 4 1
## 14 258.1 4 2
## 15 262.5 4 3
## 16 264.6 4 4
## 
## $bestp
## [1] 3
## 
## $bestq
## [1] 1
## 
## $bestmod
## 
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
## 
## Coefficients:
##          ar1    ar2    ar3    ma1  intercept
##       -0.095  0.191  0.313  0.217      0.019
## s.e.   0.101  0.108  0.107  0.347      0.163
## 
## sigma^2 estimated as 0.66:  log likelihood = -122.5,  aic = 257.1

zz4 <- z4$bestmod  # p=3 q=1

Initialize vectors to store statistics

nsim <- 250
armean <- matrix(0, nsim, 12)
arstdev <- matrix(0, nsim, 12)
arcor <- matrix(0, nsim, 12)
arcor2 <- matrix(0, nsim, 12)
arcor3 <- matrix(0, nsim, 12)
arskw <- matrix(0, nsim, 12)
armax <- matrix(0, nsim, 12)
armin <- matrix(0, nsim, 12)

# set up points to evaluate May PDFs over
May <- LFmon[, 5]
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)

# store the May simulations and create an array for the PDF
maysim <- 1:nyrs
simpdf <- matrix(0, nrow = nsim, ncol = nevals)

Simulate each PC from it's respective model and take the normal distribution

for (k in 1:nsim) {

    # Sim PCs from ARIMA model
    PC1sim <- arima.sim(n = nyrs, list(ar = c(zz1$coef[1:z1$bestp]), ma = c(zz1$coef[((z1$bestp) + 
        1):(z1$bestp + z1$bestq)])), sd = sqrt(zz1$sigma2)) + mean(pcs[, 1])
    PC2sim <- arima.sim(n = nyrs, list(ar = c(zz2$coef[1:z2$bestp]), ma = c(zz2$coef[((z2$bestp) + 
        1):(z2$bestp + z2$bestq)])), sd = sqrt(zz2$sigma2)) + mean(pcs[, 2])
    PC3sim <- arima.sim(n = nyrs, list(ar = c(zz3$coef[1:z3$bestp]), ma = c(zz3$coef[((z3$bestp) + 
        1):(z3$bestp + z3$bestq)])), sd = sqrt(zz3$sigma2)) + mean(pcs[, 3])
    PC4sim <- arima.sim(n = nyrs, list(ar = c(zz4$coef[1:z4$bestp]), ma = c(zz4$coef[((z4$bestp) + 
        1):(z4$bestp + z4$bestq)])), sd = sqrt(zz4$sigma2)) + mean(pcs[, 4])

    # Sim remaining from normal distribution
    zzs <- matrix(0, 101, 8)
    for (i in 1:8) {
        zzs[, i] <- rnorm(101, mean = mean(pcs[, i + 4]), sd = sd(pcs[, i + 
            4]))
    }

    simPCs <- cbind(PC1sim, PC2sim, PC3sim, PC4sim, zzs)
    simstflow <- simPCs %*% t(zsvd$u)

    # Unscale the PCs
    simflow <- t(t(simstflow) * LFsd + LFmean)

    # PDF for May
    maysim = simflow[, 5]
    simpdf[k, ] = sm.density(maysim, eval.points = xeval, display = "none")$estimate

    # compute stats on simulated timeseries
    ar.stats <- stats(simflow, corr = TRUE, multicorr = TRUE)
    armean[k, ] <- ar.stats$mean
    armax[k, ] <- ar.stats$max
    armin[k, ] <- ar.stats$min
    arstdev[k, ] <- ar.stats$stdev
    arskw[k, ] <- ar.stats$skew
    arcor[k, ] <- ar.stats$cor
    arcor2[k, ] <- ar.stats$mcor2
    arcor3[k, ] <- ar.stats$mcor3

}

Compute statistics from historical data and boxplot versus simulated

obs.stats <- stats(LFmon, corr = TRUE, multicorr = TRUE)
mos <- as.character(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", 
    "Sep", "Oct", "Nov", "Dec"))
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
lines(obs.stats$mean, col = "red")
title(main = "Mean")
axis(1, at = 1:12, labels = mos)
boxplot(armax, xaxt = "n")
lines(obs.stats$max, col = "red")
title(main = "Max")
axis(1, at = 1:12, labels = mos)
boxplot(armin, xaxt = "n")
lines(obs.stats$min, col = "red")
title(main = "Min")
axis(1, at = 1:12, labels = mos)
boxplot(arskw, xaxt = "n")
lines(obs.stats$skew, col = "red")
title(main = "Skew")
axis(1, at = 1:12, labels = mos)
boxplot(arstdev, xaxt = "n")
lines(obs.stats$stdev, col = "red")
title(main = "SD")
axis(1, at = 1:12, labels = mos)
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Statistics for Multivariate Simulation on PCs", outer = TRUE)

plot of chunk unnamed-chunk-7

Comments on Statistics Plots:

Using an ARMA multivariate simulation using PCs of the timeseries we can notice a couple things from the statistical plots comparing simulatd and observed timeseries:

The mean, max and SD are captured nicely by the simulations. The min is generally captured.

Additionally, lag-1 correlation is captured nicely since this is a seasonal model on the PCs.

Skew is not captured, as expected since this is still assuming normality.

Boxplot the May PDF

plotMay(May, simpdf)  # used own function to consolidate May PDf plotting commands
par(oma = c(2, 2, 3, 2))
title("May PDF from Simulation", outer = TRUE)

plot of chunk unnamed-chunk-8

Comments on May PDF

As expected, since we are still using ARMA, we are assuming a normal distribution. Therefore we are unable to capture the bimodality in May streamflow.

Compare multiple lag correlation results

par(mfrow = c(1, 3))
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor2, xaxt = "n")
lines(obs.stats$mcor2, col = "red")
title(main = "Lag-2 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor3, xaxt = "n")
lines(obs.stats$mcor3, col = "red")
title(main = "Lag-3 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Comparison of multiple lags", outer = TRUE)

plot of chunk unnamed-chunk-9

Comments on lag correlations

This method does capture multiple correlations. This is because we are simulating all 12 at the same time. Therefore, correlation between months at most lags are captured.