rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("bestARIMA.r")
source("skew.r")
source("stats.r")
source("plotMay.r")
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
Read in the monthly Lees Ferry streamflow
LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")
years <- LFmon[, 1]
nyrs <- length(years)
nyrs1 <- nyrs - 1
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6))
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec")
LFmean <- apply(LFmon, 2, mean) # monthly mean
LFsd <- apply(LFmon, 2, sd) # monthly sd
X <- t((t(LFmon) - LFmean)/LFsd)
# for (i in 1:12){ LFmon[,i]=(LFmon[,i]-LFmean[i])/LFsd[i] }
Perform a PCA
zs <- var(X)
zsvd <- svd(zs)
pcs <- t(t(zsvd$u) %*% t(X))
lambdas <- (zsvd$d/sum(zsvd$d))
par(mfrow = c(1, 1))
plot(1:12, lambdas[1:12], type = "l", xlab = "Modes/PCSs", ylab = "Fraction of Variance Explained",
main = "Eigen Value Spectrum")
points(1:12, lambdas[1:12], col = "red")
plot(cumsum(lambdas)) # shows to use the first 4
title(main = "Cumulative Variance Explained")
Comments on Cumulative Variance Plot
In plotting the cumulative variance explained, we see that the first four explain approximately 80% iof the variance. Therefore, we will fit an ARMA to each of those.
Fit a best ARMA to each PC
ps <- c(1, 2, 3, 4)
qs <- c(1, 2, 3, 4)
z1 <- bestARIMA(pcs[, 1], ps, qs) # ARMA fit for PC1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
z1
## $aics
## AIC p q
## 1 447.5 1 1
## 2 449.2 1 2
## 3 451.2 1 3
## 4 452.7 1 4
## 5 449.3 2 1
## 6 450.5 2 2
## 7 451.8 2 3
## 8 454.3 2 4
## 9 450.5 3 1
## 10 452.1 3 2
## 11 455.2 3 3
## 12 456.2 3 4
## 13 452.1 4 1
## 14 454.1 4 2
## 15 449.3 4 3
## 16 451.6 4 4
##
## $bestp
## [1] 1
##
## $bestq
## [1] 1
##
## $bestmod
##
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
##
## Coefficients:
## ar1 ma1 intercept
## 0.533 -0.162 -0.013
## s.e. 0.094 0.208 0.377
##
## sigma^2 estimated as 4.54: log likelihood = -219.8, aic = 447.5
zz1 <- z1$bestmod # p=1 q=1
z2 <- bestARIMA(pcs[, 2], ps, qs) # ARMA fit for PC2
z2
## $aics
## AIC p q
## 1 348.9 1 1
## 2 351.2 1 2
## 3 352.4 1 3
## 4 354.6 1 4
## 5 350.2 2 1
## 6 352.4 2 2
## 7 351.6 2 3
## 8 353.0 2 4
## 9 352.1 3 1
## 10 351.6 3 2
## 11 353.4 3 3
## 12 355.4 3 4
## 13 353.7 4 1
## 14 353.3 4 2
## 15 355.3 4 3
## 16 356.0 4 4
##
## $bestp
## [1] 1
##
## $bestq
## [1] 1
##
## $bestmod
##
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
##
## Coefficients:
## ar1 ma1 intercept
## -0.984 0.934 -0.001
## s.e. 0.002 0.014 0.127
##
## sigma^2 estimated as 1.71: log likelihood = -170.5, aic = 348.9
zz2 <- z2$bestmod # p=1 q=1
z3 <- bestARIMA(pcs[, 3], ps, qs) # ARMA fit for PC3
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
z3
## $aics
## AIC p q
## 1 315.9 1 1
## 2 321.6 1 2
## 3 323.5 1 3
## 4 321.8 1 4
## 5 320.9 2 1
## 6 314.3 2 2
## 7 316.3 2 3
## 8 326.0 2 4
## 9 322.2 3 1
## 10 316.3 3 2
## 11 318.5 3 3
## 12 318.2 3 4
## 13 315.5 4 1
## 14 317.4 4 2
## 15 320.1 4 3
## 16 321.1 4 4
##
## $bestp
## [1] 2
##
## $bestq
## [1] 2
##
## $bestmod
##
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
##
## Coefficients:
## ar1 ar2 ma1 ma2 intercept
## 0.040 0.939 -0.046 -0.797 0.038
## s.e. 0.057 0.055 0.054 0.251 0.498
##
## sigma^2 estimated as 1.15: log likelihood = -151.2, aic = 314.3
zz3 <- z3$bestmod # p=2 q=2
z4 <- bestARIMA(pcs[, 4], ps, qs) # ARMA fit for PC4
## Warning: possible convergence problem: optim gave code = 1
## Warning: possible convergence problem: optim gave code = 1
z4
## $aics
## AIC p q
## 1 257.7 1 1
## 2 258.3 1 2
## 3 259.2 1 3
## 4 259.0 1 4
## 5 258.7 2 1
## 6 260.2 2 2
## 7 257.4 2 3
## 8 258.9 2 4
## 9 257.1 3 1
## 10 259.1 3 2
## 11 260.9 3 3
## 12 262.9 3 4
## 13 257.5 4 1
## 14 258.1 4 2
## 15 262.5 4 3
## 16 264.6 4 4
##
## $bestp
## [1] 3
##
## $bestq
## [1] 1
##
## $bestmod
##
## Call:
## arima0(x = array, order = c(bestp, 0, bestq))
##
## Coefficients:
## ar1 ar2 ar3 ma1 intercept
## -0.095 0.191 0.313 0.217 0.019
## s.e. 0.101 0.108 0.107 0.347 0.163
##
## sigma^2 estimated as 0.66: log likelihood = -122.5, aic = 257.1
zz4 <- z4$bestmod # p=3 q=1
Initialize vectors to store statistics
nsim <- 250
armean <- matrix(0, nsim, 12)
arstdev <- matrix(0, nsim, 12)
arcor <- matrix(0, nsim, 12)
arcor2 <- matrix(0, nsim, 12)
arcor3 <- matrix(0, nsim, 12)
arskw <- matrix(0, nsim, 12)
armax <- matrix(0, nsim, 12)
armin <- matrix(0, nsim, 12)
# set up points to evaluate May PDFs over
May <- LFmon[, 5]
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)
# store the May simulations and create an array for the PDF
maysim <- 1:nyrs
simpdf <- matrix(0, nrow = nsim, ncol = nevals)
Simulate each PC from it's respective model and take the normal distribution
for (k in 1:nsim) {
# Sim PCs from ARIMA model
PC1sim <- arima.sim(n = nyrs, list(ar = c(zz1$coef[1:z1$bestp]), ma = c(zz1$coef[((z1$bestp) +
1):(z1$bestp + z1$bestq)])), sd = sqrt(zz1$sigma2)) + mean(pcs[, 1])
PC2sim <- arima.sim(n = nyrs, list(ar = c(zz2$coef[1:z2$bestp]), ma = c(zz2$coef[((z2$bestp) +
1):(z2$bestp + z2$bestq)])), sd = sqrt(zz2$sigma2)) + mean(pcs[, 2])
PC3sim <- arima.sim(n = nyrs, list(ar = c(zz3$coef[1:z3$bestp]), ma = c(zz3$coef[((z3$bestp) +
1):(z3$bestp + z3$bestq)])), sd = sqrt(zz3$sigma2)) + mean(pcs[, 3])
PC4sim <- arima.sim(n = nyrs, list(ar = c(zz4$coef[1:z4$bestp]), ma = c(zz4$coef[((z4$bestp) +
1):(z4$bestp + z4$bestq)])), sd = sqrt(zz4$sigma2)) + mean(pcs[, 4])
# Sim remaining from normal distribution
zzs <- matrix(0, 101, 8)
for (i in 1:8) {
zzs[, i] <- rnorm(101, mean = mean(pcs[, i + 4]), sd = sd(pcs[, i +
4]))
}
simPCs <- cbind(PC1sim, PC2sim, PC3sim, PC4sim, zzs)
simstflow <- simPCs %*% t(zsvd$u)
# Unscale the PCs
simflow <- t(t(simstflow) * LFsd + LFmean)
# PDF for May
maysim = simflow[, 5]
simpdf[k, ] = sm.density(maysim, eval.points = xeval, display = "none")$estimate
# compute stats on simulated timeseries
ar.stats <- stats(simflow, corr = TRUE, multicorr = TRUE)
armean[k, ] <- ar.stats$mean
armax[k, ] <- ar.stats$max
armin[k, ] <- ar.stats$min
arstdev[k, ] <- ar.stats$stdev
arskw[k, ] <- ar.stats$skew
arcor[k, ] <- ar.stats$cor
arcor2[k, ] <- ar.stats$mcor2
arcor3[k, ] <- ar.stats$mcor3
}
Compute statistics from historical data and boxplot versus simulated
obs.stats <- stats(LFmon, corr = TRUE, multicorr = TRUE)
mos <- as.character(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"))
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
lines(obs.stats$mean, col = "red")
title(main = "Mean")
axis(1, at = 1:12, labels = mos)
boxplot(armax, xaxt = "n")
lines(obs.stats$max, col = "red")
title(main = "Max")
axis(1, at = 1:12, labels = mos)
boxplot(armin, xaxt = "n")
lines(obs.stats$min, col = "red")
title(main = "Min")
axis(1, at = 1:12, labels = mos)
boxplot(arskw, xaxt = "n")
lines(obs.stats$skew, col = "red")
title(main = "Skew")
axis(1, at = 1:12, labels = mos)
boxplot(arstdev, xaxt = "n")
lines(obs.stats$stdev, col = "red")
title(main = "SD")
axis(1, at = 1:12, labels = mos)
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Statistics for Multivariate Simulation on PCs", outer = TRUE)
Comments on Statistics Plots:
Using an ARMA multivariate simulation using PCs of the timeseries we can notice a couple things from the statistical plots comparing simulatd and observed timeseries:
- The mean, max and SD are captured nicely by the simulations. The min is generally captured.
- Additionally, lag-1 correlation is captured nicely since this is a seasonal model on the PCs.
- Skew is not captured, as expected since this is still assuming normality.
Boxplot the May PDF
plotMay(May, simpdf) # used own function to consolidate May PDf plotting commands
par(oma = c(2, 2, 3, 2))
title("May PDF from Simulation", outer = TRUE)
Comments on May PDF
As expected, since we are still using ARMA, we are assuming a normal distribution. Therefore we are unable to capture the bimodality in May streamflow.
Compare multiple lag correlation results
par(mfrow = c(1, 3))
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor2, xaxt = "n")
lines(obs.stats$mcor2, col = "red")
title(main = "Lag-2 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor3, xaxt = "n")
lines(obs.stats$mcor3, col = "red")
title(main = "Lag-3 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Comparison of multiple lags", outer = TRUE)
Comments on lag correlations
This method does capture multiple correlations. This is because we are simulating all 12 at the same time. Therefore, correlation between months at most lags are captured.