rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
library(tseries)
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
source("rawspec.r")
source("smoothspec.r")
source("bestARIMA.r")
source("stats.r")
source("plotMay.r")
Read in the monthly Lees Ferry streamflow and define May flow values
LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")
years <- LFmon[, 1]
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6)) # convert to Million Acre Feet (MaF)
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec")
y <- LFmon[, 5]
deltaT <- 1
t <- 1:length(y)
Find the number of frequencies
N <- length(y)
freq <- 2 * pi * (1:round((N/2)))/N
plotfreqs <- freq/(2 * pi * (deltaT))
nfreq <- length(freq)
Fit a best AR model to the data
ps <- c(1, 2, 3, 4)
q <- 0
zz <- bestARIMA((y - mean(y)), ps, q)
p <- zz$bestp
armodel <- arima((y - mean(y)), order = c(p, 0, 0), include.mean = TRUE, method = "ML")
Initialize vectors to store statistics
nsim <- 250
armean <- matrix(0, nsim, 1)
surmean <- matrix(0, nsim, 1)
arstdev <- matrix(0, nsim, 1)
surstdev <- matrix(0, nsim, 1)
arskw <- matrix(0, nsim, 1)
surskw <- matrix(0, nsim, 1)
armax <- matrix(0, nsim, 1)
surmax <- matrix(0, nsim, 1)
armin <- matrix(0, nsim, 1)
surmin <- matrix(0, nsim, 1)
# set up points to evaluate May PDFs over
May <- y
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)
# store the May simulations and create an array for the PDF
simpdf.ar <- matrix(0, nrow = nsim, ncol = nevals)
simpdf.sur <- matrix(0, nrow = nsim, ncol = nevals)
# matrix to hold the raw spectrum
specsimrawar = matrix(0, ncol = nsim, nrow = nfreq)
specsimrawsur = matrix(0, ncol = nsim, nrow = nfreq)
# matrix to hold the smoothed spectrum
specsimrawars = matrix(0, ncol = nsim, nrow = nfreq)
specsimrawsurs = matrix(0, ncol = nsim, nrow = nfreq)
Simluate from the fitted AR-1 model
for (i in 1:nsim) {
# simulate from the AR1 model
ysim = arima.sim(n = N, list(ar = c(armodel$coef[1:p])), sd = sqrt(armodel$sigma2)) +
mean(y)
# simluate from the spectrum
ysims = surrogate(y, fft = TRUE, amplitude = TRUE)
# compute pdfs of simulations
simpdf.ar[i, ] = sm.density(ysim, eval.points = xeval, display = "none")$estimate
simpdf.sur[i, ] = sm.density(ysims, eval.points = xeval, display = "none")$estimate
# compute stats on simulated ARMA
ysimmat <- matrix(ysim, length(ysim), 1)
ar.stats <- stats(ysimmat, corr = FALSE, multicorr = FALSE)
armean[i, ] <- ar.stats$mean
armax[i, ] <- ar.stats$max
armin[i, ] <- ar.stats$min
arstdev[i, ] <- ar.stats$stdev
arskw[i, ] <- ar.stats$skew
# compute stats on simulated ARMA
ysimsmat <- matrix(ysims, length(ysims), 1)
sur.stats <- stats(ysimsmat, corr = FALSE, multicorr = FALSE)
surmean[i, ] <- sur.stats$mean
surmax[i, ] <- sur.stats$max
surmin[i, ] <- sur.stats$min
surstdev[i, ] <- sur.stats$stdev
surskw[i, ] <- sur.stats$skew
# compute the raw spectrum of the simulations
specsimrawar[, i] = rawspec(ysim, deltaT)
specsimrawsur[, i] = rawspec(ysims, deltaT)
# compute the smooth spectrum of the simulations
specsimrawars[, i] = smoothspec(ysim, deltaT)
specsimrawsurs[, i] = smoothspec(ysims, deltaT)
}
pgram <- rawspec(y, deltaT) # raw spectrum of original data
pgrams <- smoothspec(y, deltaT) # smooth spectrum of original data
# plot the raw spectrum from the AR simulations
par(mfrow = c(2, 2))
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawar,
pgram), col = "red", lwd = 2)
for (i in 1:nsim) {
lines(plotfreqs, specsimrawar[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
title(main = "Raw Spectrum - AR Simulation")
# plot the raw spectrum from the surrogate data
plot(plotfreqs, pgram, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsur,
pgram), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsur[, i], col = "grey")
}
lines(plotfreqs, pgram, col = "red", lwd = 3)
title(main = "Raw Spectrum - Surrogate Data")
# plot the smooth spectrum of the AR simulation
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawars,
pgrams), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawars[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
title(main = "Smooth Spectrum - AR Simulation")
# plot the smooth spectrum of the surrogate data
plot(plotfreqs, pgrams, xlab = "Freq. cy/yr", ylab = "Spectrum", ylim = range(specsimrawsurs,
pgrams), col = "red", lwd = 3, type = "l")
for (i in 1:nsim) {
lines(plotfreqs, specsimrawsurs[, i], col = "grey")
}
lines(plotfreqs, pgrams, col = "red", lwd = 3)
title(main = "Smooth Spectrum - Surrogate Data")
Comments on the Fourier Simulations
Comparing simulations from the Fourier Spectrum using ARMA versus surrogate simulation, we see that the surrogate data does a much better job. This makes sense because in using this surrogate function we basically generate a sample following a bootstrap approach and implement phase randomization. In this way, we are not assuming anything about the underlying distribution in the way that ARMA does.
Compute statistics and boxplot for best ARMA in Fourier
obs.flow <- matrix(y, length(y), 1)
obs.stats <- stats(obs.flow, corr = FALSE, multicorr = FALSE)
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(armax, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(armin, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(arskw, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(arstdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Fourier Spectrum Simulation using ARMA", outer = TRUE)
Comments on statistics from ARMA in Fourier
Here, the mean and sd are captured the best. The maximum, minimum and skew are decently captured, but slightly off. This is due to the underlying issues of using ARMA on a dataset that is non-normal.
Compute statistics and boxplot for surrogate and Fourier
par(mfrow = c(2, 3))
boxplot(surmean, xaxt = "n")
points(obs.stats$mean, col = "red")
title(main = "Mean")
boxplot(surmax, xaxt = "n")
points(obs.stats$max, col = "red")
title(main = "Max")
boxplot(surmin, xaxt = "n")
points(obs.stats$min, col = "red")
title(main = "Min")
boxplot(surskw, xaxt = "n")
points(obs.stats$skew, col = "red")
title(main = "Skew")
boxplot(surstdev, xaxt = "n")
points(obs.stats$stdev, col = "red")
title(main = "SD")
par(oma = c(2, 2, 3, 2))
title("Statistics for Fourier Spectrum Simulation using surrogate", outer = TRUE)
Comments on statistics from surrogate data in Fourier
The surrogate data, since it is bootstrapped, captures all of the statistics perfectly. In fact, each surrogate dataset has exactly the same statistics as the original dataset.
Plot the May PDF based on ARMA
plotMay(y, simpdf.ar)
par(oma = c(2, 2, 3, 2))
title("May PDF from ARMA Simulation", outer = TRUE)
Comments on best ARMA May PDF
Here, we see the same problem as in the other questions that used ARMA. Since ARMA assumes a normal distribution, we will not be about to capture the bimodality of May streamflow.
Plot the May PDF based on surrogate data
plotMay(y, simpdf.sur)
par(oma = c(2, 2, 3, 2))
title("May PDF from Surrogate Simulation", outer = TRUE)
Comments on surrogate data May PDF
Here, because the surrogate data is selected by a bootstrapping method, we retain the underlying distribution of the original data. Therefore, we capture the bimodality of May.