rm(list = ls())
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("bestARIMA.r")
source("skew.r")
source("stats.r")
source("plotMay.r")
library(sm)
## Package `sm', version 2.2-5: type help(sm) for summary information
Read in the monthly Lees Ferry streamflow and remove the seasonal cycle (remove the mean and divide by the standard deviation) :: NONSEASONAL
LFmon <- read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/LeesFerry-monflows-1906-2006.txt")
years <- LFmon[, 1]
LFmon <- as.data.frame(LFmon[, 2:13]/(10^6)) # convert to Million Acre Feet (MaF)
names(LFmon) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
"Oct", "Nov", "Dec")
LFmean <- apply(LFmon, 2, mean) # monthly mean
LFsd <- apply(LFmon, 2, sd) # monthly sd
X <- t((t(LFmon) - LFmean)/LFsd) # same as scale(X)
X <- array(t(X)) # turn into an array to use for ARMA
Find the best ARIMA model. I created a function that evaluates AIC for various combinations of p and q. $bestmod is the model chosen with lowest AIC.
p <- c(1, 2, 3, 4) # run the code for a range of p values
q <- c(1, 2, 3, 4) # run the code for a range of q values
z <- bestARIMA(X, p, q) # home-made code compares AIC values
z$aics
## AIC p q
## 1 2755 1 1
## 2 2753 1 2
## 3 2750 1 3
## 4 2804 1 4
## 5 2749 2 1
## 6 2757 2 2
## 7 2751 2 3
## 8 2752 2 4
## 9 2749 3 1
## 10 2751 3 2
## 11 2752 3 3
## 12 2754 3 4
## 13 2751 4 1
## 14 2832 4 2
## 15 2753 4 3
## 16 2758 4 4
p <- z$bestp # selects p-val with lowest AIC
q <- z$bestq # selects q-val with lowest AIC
zz <- z$bestmod # zz is the best model - contains coefs
Set up for 250 simulations. Initialize matrices to store statistics of simulations.
nsim <- 250 # number of simulations
nyrs <- length(LFmon[, 1]) # number of years of historical data
nyrs1 <- nyrs - 1 # number of years minus one
# initialize the matrices that store the statistics
armean <- matrix(0, nsim, 12)
arstdev <- matrix(0, nsim, 12)
arcor <- matrix(0, nsim, 12)
arcor2 <- matrix(0, nsim, 12)
arcor3 <- matrix(0, nsim, 12)
arskw <- matrix(0, nsim, 12)
armax <- matrix(0, nsim, 12)
armin <- matrix(0, nsim, 12)
# define points to evaluate the May pdf over
May <- LFmon[, 5]
xeval <- seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals <- length(xeval)
# store the May simulations and create an array for the PDF
maysim <- 1:nyrs
simpdf <- matrix(0, nrow = nsim, ncol = nevals)
Perform simulation
for (k in 1:nsim) {
nmons <- nyrs * 12 # number of values to be generated
xsim <- arima.sim(n = nmons, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p +
1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(X)
simdismon <- matrix(xsim, nrow = 12) # put the array into a matrix
simdismon <- simdismon * LFsd + LFmean # back standardize
simdismon <- t(simdismon) # transpose
# evaluate May simulated PDF at each time step
maysim <- simdismon[, 5]
simpdf[k, ] <- sm.density(maysim, eval.points = xeval, display = "none")$estimate
# compute stats on simulated timeseries
ar.stats <- stats(simdismon, corr = TRUE, multicorr = TRUE)
armean[k, ] <- ar.stats$mean
armax[k, ] <- ar.stats$max
armin[k, ] <- ar.stats$min
arstdev[k, ] <- ar.stats$stdev
arskw[k, ] <- ar.stats$skew
arcor[k, ] <- ar.stats$cor
arcor2[k, ] <- ar.stats$mcor2
arcor3[k, ] <- ar.stats$mcor3
}
Compute statistics from historical data and boxplot versus simulated
obs.stats <- stats(LFmon, corr = TRUE, multicorr = TRUE)
mos <- as.character(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"))
par(mfrow = c(2, 3))
boxplot(armean, xaxt = "n")
lines(obs.stats$mean, col = "red")
title(main = "Mean")
axis(1, at = 1:12, labels = mos)
boxplot(armax, xaxt = "n")
lines(obs.stats$max, col = "red")
title(main = "Max")
axis(1, at = 1:12, labels = mos)
boxplot(armin, xaxt = "n")
lines(obs.stats$min, col = "red")
title(main = "Min")
axis(1, at = 1:12, labels = mos)
boxplot(arskw, xaxt = "n")
lines(obs.stats$skew, col = "red")
title(main = "Skew")
axis(1, at = 1:12, labels = mos)
boxplot(arstdev, xaxt = "n")
lines(obs.stats$stdev, col = "red")
title(main = "SD")
axis(1, at = 1:12, labels = mos)
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Statistics for ARMA model", outer = TRUE)
Comments on Statistics Plots:
Using an ARMA nonseasonal model we can notice a couple things from the statistical plots comparing simulatd and observed timeseries:
- The mean, max and SD are captured nicely by the simulations. The min is generally captured.
- Skew is not captured, as expected since this is a parametric method assuming normality
- Lag-1 correlation is not captured since this is a nonseasonal model.
Boxplot the May PDF
plotMay(May, simpdf) # used own function to consolidate May PDf plotting commands
par(oma = c(2, 2, 3, 2))
title("May PDF from Simulation", outer = TRUE)
Comments on May PDF:
The bimodality in the May PDF is not captured. This is expected since this code is utilizing ARMA (parametric) without any alterations to capture non-normality. We will need to add a non-parametric alteration (such as KNN) in order to capture the bimodality.
Compare multiple lag correlation results
par(mfrow = c(1, 3))
boxplot(arcor, xaxt = "n")
lines(obs.stats$cor, col = "red")
title(main = "Lag-1 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor2, xaxt = "n")
lines(obs.stats$mcor2, col = "red")
title(main = "Lag-2 Corr")
axis(1, at = 1:12, labels = mos)
boxplot(arcor3, xaxt = "n")
lines(obs.stats$mcor3, col = "red")
title(main = "Lag-3 Corr")
axis(1, at = 1:12, labels = mos)
par(oma = c(2, 2, 3, 2))
title("Comparison of multiple lags", outer = TRUE)
Comments on lag correlations
This method does not capture any of the lag correlations - not even the lag 1 - since it is a nonseasonal model. We will compare this with the remaining techniques of simulating a time series.