rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("bestARIMA.r")
source("skew.r")
source("stats.r")
source("plotMay.r")
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
Read in the monthly Lees Ferry streamflow and remove the seasonal cycle (remove the mean and divide by the standard deviation) :: SEASONAL
LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")
years <- LFmon[, 1]
nyrs <- length(years)
nyrs1 <- nyrs - 1
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec")
Get the coefficients of the Thomas Fiering Model to implement the non-stationary model
coef1 <- 1:12
coef2 <- 1:12
for (j in 1:12) {
j1 <- j - 1
if (j == 1)
j1 = 12
if (j == 1)
coef1[j] <- (cor(LFmon[2:nyrs, j], LFmon[1:nyrs1, j1]))
if (j > 1)
coef1[j] <- cor(LFmon[1:nyrs, j], LFmon[1:nyrs, j1])
coef2[j] <- sqrt((var(LFmon[, j])) * (1 - coef1[j] * coef1[j]))
coef1[j] <- coef1[j] * sqrt(var(LFmon[, j])/var(LFmon[, j1]))
}
Set up for 250 simulations. Initialize matrices to store statistics of simulations.
nsim <- 250 # number of simulations
nyrs <- length(LFmon[, 1]) # number of years of historical data
nyrs1 <- nyrs - 1 # number of years minus one
# initialize the matrices that store the statistics
armean <- matrix(0, nsim, 12)
arstdev <- matrix(0, nsim, 12)
arcor <- matrix(0, nsim, 12)
arcor2 <- matrix(0, nsim, 12)
arcor3 <- matrix(0, nsim, 12)
arskw <- matrix(0, nsim, 12)
armax <- matrix(0, nsim, 12)
armin <- matrix(0, nsim, 12)
# define points to evaluate the May pdf over
May <- LFmon[, 5]
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)
# store the May simulations and create an array for the PDF
maysim <- 1:nyrs
simpdf <- matrix(0, nrow = nsim, ncol = nevals)
Perform simulation
for (k in 1:nsim) {
nmons <- nyrs * 12 # number of values to be generated
xsim <- 1:nmons
i <- round(runif(1, 1, nyrs)) # random number from uniform dist to start sim
xsim[1] <- LFmon[i, 1] # first sim is flow from random i
xprev <- xsim[1]
# loop through remaining months using Thomas Fiering coeficients
for (i in 2:nmons) {
j <- i%%12
if (j == 0)
j <- 12
j1 <- j - 1
if (j == 1)
j1 <- 12
x1 <- xprev - mean(LFmon[, j1])
x2 <- coef2[j] * rnorm(1, 0, 1)
xsim[i] <- mean(LFmon[, j]) + x1 * coef1[j] + x2
xprev <- xsim[i]
}
simdismon <- matrix(xsim, nrow = 12) # put the array into a matrix
simdismon <- t(simdismon) # transpose
# evaluate May simulated PDF at each time step
maysim <- simdismon[, 5]
simpdf[k, ] <- sm.density(maysim, eval.points = xeval, display = "none")$estimate
# compute stats on simulated timeseries
ar.stats <- stats(simdismon, corr = TRUE, multicorr = TRUE)
armean[k, ] <- ar.stats$mean
armax[k, ] <- ar.stats$max
armin[k, ] <- ar.stats$min
arstdev[k, ] <- ar.stats$stdev
arskw[k, ] <- ar.stats$skew
arcor[k, ] <- ar.stats$cor
arcor2[k, ] <- ar.stats$mcor2
arcor3[k, ] <- ar.stats$mcor3
}
Compute statistics from historical data and boxplot versus simulated
obs.stats <- stats(LFmon, corr = TRUE, multicorr = TRUE)
mos <- as.character(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"))
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
lines(obs.stats$mean, col = "red")
title(main = "Mean")
axis(1, at = 1:12, labels = mos)
boxplot(armax, xaxt = "n")
lines(obs.stats$max, col = "red")
title(main = "Max")
axis(1, at = 1:12, labels = mos)
boxplot(armin, xaxt = "n")
lines(obs.stats$min, col = "red")
title(main = "Min")
axis(1, at = 1:12, labels = mos)
boxplot(arskw, xaxt = "n")
lines(obs.stats$skew, col = "red")
title(main = "Skew")
axis(1, at = 1:12, labels = mos)
boxplot(arstdev, xaxt = "n")
lines(obs.stats$stdev, col = "red")
title(main = "SD")
axis(1, at = 1:12, labels = mos)
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Statistics for Nonstationary Lag-1 ARMA model", outer = TRUE)
Comments on Statistical Plots:
Using the nonstationary (seasonal) lag-1 ARMA gives us good fit on the Lag-1 Correlation plot (compare this to the lack of one in the stationary nonseasonal ARMA). This is because we are basically using 12 different models taking into account the change from month to month. As expected, mean, maximum, minimum and SD are also good fits. The skew is off since this is still assuming normal distribution.
Boxplot the May PDF
plotMay(May, simpdf) # used own function to consolidate May PDf plotting commands
par(oma = c(2, 2, 3, 2))
title("May PDF from Simulation", outer = TRUE)
Comments on May PDF
While this method improves our lag-1 correlation plot (above), it still misses the bimodality in the May streamflow PDF because it assumes normality. Until we use a nonparametric method, we won't capture the bimodality.
Compare multiple lag correlation results
par(mfrow = c(1, 3))
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor2, xaxt = "n")
lines(obs.stats$mcor2, col = "red")
title(main = "Lag-2 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor3, xaxt = "n")
lines(obs.stats$mcor3, col = "red")
title(main = "Lag-3 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Comparison of multiple lags", outer = TRUE)
Comments on lag correlations
This method does capture multiple correlations. This is because we are simulating all 12 at the same time. Therefore, correlation between most months at lags of 1, 2, and 3 months out are captured.