Emily Gill - CVEN6833 - Homework #3

December 16, 2013

Question 6: Spectral Simulation of the Fourier Spectrum by WARMA

rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
library(tseries)
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
source("rawspec.r")
source("smoothspec.r")
source("bestARIMA.r")
source("stats.r")
source("plotMay.r")

Read in the monthly Lees Ferry streamflow and define May flow values

LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")

years <- LFmon[, 1]
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))  # convert to Million Acre Feet (MaF)
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", 
    "Oct", "Nov", "Dec")

y <- LFmon[, 5]
deltaT <- 1
t <- 1:length(y)

Find the number of frequencies

N <- length(y)
freq <- 2 * pi * (1:round((N/2)))/N
plotfreqs <- freq/(2 * pi * (deltaT))
nfreq <- length(freq)

Fit a best AR model to the data

ps <- c(1, 2, 3, 4)
q <- 0
zz <- bestARIMA((y - mean(y)), ps, q)
p <- zz$bestp
armodel <- arima((y - mean(y)), order = c(p, 0, 0), include.mean = TRUE, method = "ML")

Initialize vectors to store statistics

nsim <- 250
armean <- matrix(0, nsim, 1)
surmean <- matrix(0, nsim, 1)
arstdev <- matrix(0, nsim, 1)
surstdev <- matrix(0, nsim, 1)
arskw <- matrix(0, nsim, 1)
surskw <- matrix(0, nsim, 1)
armax <- matrix(0, nsim, 1)
surmax <- matrix(0, nsim, 1)
armin <- matrix(0, nsim, 1)
surmin <- matrix(0, nsim, 1)

# set up points to evaluate May PDFs over
May <- y
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)

# store the May simulations and create an array for the PDF
simpdf.ar <- matrix(0, nrow = nsim, ncol = nevals)
simpdf.sur <- matrix(0, nrow = nsim, ncol = nevals)

# matrix to hold the raw spectrum
specsimrawar = matrix(0, ncol = nsim, nrow = nfreq)
specsimrawsur = matrix(0, ncol = nsim, nrow = nfreq)

# matrix to hold the smoothed spectrum
specsimrawars = matrix(0, ncol = nsim, nrow = nfreq)
specsimrawsurs = matrix(0, ncol = nsim, nrow = nfreq)

Simluate from the fitted AR-1 model

for (i in 1:nsim) {
    # simulate from the AR1 model
    ysim = arima.sim(n = N, list(ar = c(armodel$coef[1:p])), sd = sqrt(armodel$sigma2)) + 
        mean(y)

    # simluate from the spectrum
    ysims = surrogate(y, fft = TRUE, amplitude = TRUE)

    # compute pdfs of simulations
    simpdf.ar[i, ] = sm.density(ysim, eval.points = xeval, display = "none")$estimate
    simpdf.sur[i, ] = sm.density(ysims, eval.points = xeval, display = "none")$estimate

    # compute stats on simulated ARMA
    ysimmat <- matrix(ysim, length(ysim), 1)
    ar.stats <- stats(ysimmat, corr = FALSE, multicorr = FALSE)
    armean[i, ] <- ar.stats$mean
    armax[i, ] <- ar.stats$max
    armin[i, ] <- ar.stats$min
    arstdev[i, ] <- ar.stats$stdev
    arskw[i, ] <- ar.stats$skew

    # compute stats on simulated ARMA
    ysimsmat <- matrix(ysims, length(ysims), 1)
    sur.stats <- stats(ysimsmat, corr = FALSE, multicorr = FALSE)
    surmean[i, ] <- sur.stats$mean
    surmax[i, ] <- sur.stats$max
    surmin[i, ] <- sur.stats$min
    surstdev[i, ] <- sur.stats$stdev
    surskw[i, ] <- sur.stats$skew

    # compute the raw spectrum of the simulations
    specsimrawar[, i] = rawspec(ysim, deltaT)
    specsimrawsur[, i] = rawspec(ysims, deltaT)

    # compute the smooth spectrum of the simulations
    specsimrawars[, i] = smoothspec(ysim, deltaT)
    specsimrawsurs[, i] = smoothspec(ysims, deltaT)
}

pgram <- rawspec(y, deltaT)  # raw spectrum of original data
pgrams <- smoothspec(y, deltaT)  # smooth spectrum of original data
# plot the raw spectrum from the AR simulations
par(mfrow = c(2, 2))
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawar, 
    pgram), col = "red", lwd = 2)

for (i in 1:nsim) {
    lines(plotfreqs, specsimrawar[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
title(main = "Raw Spectrum - AR Simulation")

# plot the raw spectrum from the surrogate data
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsur, 
    pgram), col = "red", lwd = 3, type = "l")

for (i in 1:nsim) {
    lines(plotfreqs, specsimrawsur[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
title(main = "Raw Spectrum - Surrogate Data")

# plot the smooth spectrum of the AR simulation
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawars, 
    pgrams), col = "red", lwd = 3, type = "l")

for (i in 1:nsim) {
    lines(plotfreqs, specsimrawars[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
title(main = "Smooth Spectrum - AR Simulation")

# plot the smooth spectrum of the surrogate data
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsurs, 
    pgrams), col = "red", lwd = 3, type = "l")

for (i in 1:nsim) {
    lines(plotfreqs, specsimrawsurs[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
title(main = "Smooth Spectrum - Surrogate Data")

plot of chunk unnamed-chunk-7

Comments on the Fourier Simulations

Comparing simulations from the Fourier Spectrum using ARMA versus surrogate simulation, we see that the surrogate data does a much better job. This makes sense because in using this surrogate function we basically generate a sample following a bootstrap approach and implement phase randomization. In this way, we are not assuming anything about the underlying distribution in the way that ARMA does.

Compute statistics and boxplot for best ARMA in Fourier

obs.flow <- matrix(y, length(y), 1)
obs.stats <- stats(obs.flow, corr = FALSE, multicorr = FALSE)

par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(armax, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(armin, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(arskw, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(arstdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Fourier Spectrum Simulation using ARMA", outer = TRUE)

plot of chunk unnamed-chunk-8

Comments on statistics from ARMA in Fourier

Here, the mean and sd are captured the best. The maximum, minimum and skew are decently captured, but slightly off. This is due to the underlying issues of using ARMA on a dataset that is non-normal.

Compute statistics and boxplot for surrogate and Fourier

par(mfrow = c(2, 3))
boxplot(surmean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(surmax, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(surmin, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(surskw, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(surstdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Fourier Spectrum Simulation using surrogate", outer = TRUE)

plot of chunk unnamed-chunk-9

Comments on statistics from surrogate data in Fourier

The surrogate data, since it is bootstrapped, captures all of the statistics perfectly. In fact, each surrogate dataset has exactly the same statistics as the original dataset.

Plot the May PDF based on ARMA

plotMay(y, simpdf.ar)
par(oma = c(2, 2, 3, 2))
title("May PDF from ARMA Simulation", outer = TRUE)

plot of chunk unnamed-chunk-10

Comments on best ARMA May PDF

Here, we see the same problem as in the other questions that used ARMA. Since ARMA assumes a normal distribution, we will not be about to capture the bimodality of May streamflow.

Plot the May PDF based on surrogate data

plotMay(y, simpdf.sur)
par(oma = c(2, 2, 3, 2))
title("May PDF from Surrogate Simulation", outer = TRUE)

plot of chunk unnamed-chunk-11

Comments on surrogate data May PDF

Here, because the surrogate data is selected by a bootstrapping method, we retain the underlying distribution of the original data. Therefore, we capture the bimodality of May.