Rebecca Di Bari

CVEN 6833 Homework 3

# function to compute skew and interquartile range together
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew-iqr.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew.r")

# functions to produce boxplots with whiskers at 5th and 95th percentile
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot-stats.r")

library(sm)
library(MASS)

CART.pred = function(X, Y) # X is data outside fo specific epoch. Must be a dataframe
{
    LF.pred = tree(X[, 2] ~ X[, 3] + X[, 4] + X[, 5], data = X)
    X = as.data.frame(Y)
    LFpred = predict(LF.pred, newdata = X)

    plot(Y[, 2], LFpred)
    lines(Y[, 2], Y[, 2], col = "red")
    points(mean(LFpred), mean(LFpred), type = "p", bg = "green", pch = 21, col = "black")

    rmse = sqrt(mean((Y[, 2] - LFpred)^2))
    SST = sum((Y[, 2] - mean(Y[, 2]))^2)
    SSR = sum((Y[, 2] - LFpred)^2)
    rsq = 1 - (SSR/SST)

    myreturn = list(rmse = rmse, rsq = rsq)
}

fun = function(p, q, X) {
    modelaic = matrix(0, nrow = length(p), ncol = length(q))
    for (i in p[1]:p[length(p)]) {
        for (j in q[1]:q[length(q)]) {
            zz = arima0(X, order = c(i, 0, j))
            modelaic[i, j] = zz$aic  #this gives the AIC
            colnames(modelaic) = paste("q=", q[1]:q[length(q)], sep = "")
        }
        rownames(modelaic) = paste("p=", p[1]:p[length(p)], sep = "")
    }
    inds = which(modelaic == min(modelaic), arr.ind = TRUE)
    myresult = list(lowest.aic = modelaic[order(modelaic)[1]], p = inds[1], 
        q = inds[2])
}

CART & Forecasting

1. Colorado River at Lees Ferry, AZ, is an important location on the river, through which 85% of the flow passes through. It has been shown that it is modulated by large scale climate features such as ENSO, PDO and AMO. You wish to predict using CART, to this end perform the following.

### Data: Lee's Ferry: Used Question 1, Question 2, Question 3, Question
### 4, Question 5
LFannual = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/LF_1906-2005.txt", 
    F)
LF.annual = LFannual[, 2]/1e+06
year = LFannual[, 1]

AMO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/AMO_1856-2011.txt", 
    F)
AMO = AMO[51:150, 2]  #get 1906- 2005
PDO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/PDO_1900-2011.txt", 
    F)
PDO = PDO[7:106, 2]
ENSO = read.table("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/files-4HW3/nino3_1905-2007.txt", 
    F)
ENSO = ENSO[2:101, 2]

(i) Fit a best regression tree for the entire period - using CV and Deviance and display the tree

library(tree)

LFdata = cbind(year, LF.annual, ENSO, PDO, AMO)
names(LFdata) <- c("year", "LF.annual", "ENSO", "PDO", "AMO")
LFdata = as.data.frame(LFdata)

# Fit regression tree
ztree <- tree(LFdata[, 2] ~ LFdata[, 3] + LFdata[, 4] + LFdata[, 5], LFdata)

plot(ztree)
text(ztree, cex = 0.75)

plot of chunk unnamed-chunk-3

(ii) Evaluate the performance of the regression tree on three different epochs - 1906 -1921; 1975 - 1990 and 1990 - 2006. Essentialy, fit a best tree on the data outside of these epochs and predict the mean flow for these epochs. Plot the observed and predicted values; compute R2and RMSE

par(mfrow = c(3, 1))
# first epoch: 1906-1921
LFdata.1 = as.data.frame(LFdata[1:16, ])
LFdata.not1 = as.data.frame(LFdata[17:100, ])
epoch.1 = CART.pred(LFdata.not1, LFdata.1)

# second epoch: 1975-1990
LFdata.2 = LFdata[70:85, ]
LFdata.not2 = rbind(LFdata[1:69, ], LFdata[86:100, ])
epoch.2 = CART.pred(LFdata.not2, LFdata.2)

# third epoch: 1990-2005
LFdata.3 = LFdata[1:84, ]
LFdata.not3 = LFdata[85:100, ]
epoch.3 = CART.pred(LFdata.not3, LFdata.3)

plot of chunk unnamed-chunk-4


performance.table = matrix(c(epoch.1$rmse, epoch.1$rsq, epoch.2$rmse, epoch.2$rsq, 
    epoch.3$rmse, epoch.3$rsq), ncol = 2, byrow = T)
rownames(performance.table) = c("epoch1", "epoch2", "epoch3")
colnames(performance.table) = c("rmse", "rsq")
as.table(performance.table)

##           rmse     rsq
## epoch1  4.7955 -0.6207
## epoch2  5.4047  0.1131
## epoch3  5.3122 -0.4772

Cart does not seem to be a great method for predicting, as the predicted verse actual is not great. CART is more tuned for analyzing data and determining relationships.

Question 2. Stochastic simulation of system variables (e.g., streamflow) is important for risk-based decision making. ARMA models are standard linear time series simulation methods. Fit an ARMA model and simulate the monthly Lees Ferry streamflow, the steps are as follows. Remove the seasonal cycle from the data - i.e., remove the monthly mean and divide by monthly standard deviation.

monthdata = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/Leesferry-mon-data.txt"), 
    ncol = 13, byrow = T)
WYmonflow = monthdata[, 2:13]
WYmonflow = WYmonflow/10^6  # convert to MAF
yrs = monthdata[, 1]
nyrs = length(yrs)
nyrs1 = nyrs - 1

### Scale the data..
fmean = apply(WYmonflow, 2, mean)  #monthly mean
fsdev = apply(WYmonflow, 2, sd)  #monthly standard deviation

# get the data into one long array...
X = t((t(WYmonflow) - fmean)/fsdev)
X = array(t(X))  #standardized anomalies..

(i) Fit a best ARMA model for the entire monthly time series.

## Fit various ARMA models
bestmodel = fun(1:3, 1:3, X)
p = bestmodel$p
q = bestmodel$q
pq = matrix(c(p, q), ncol = 2, byrow = T)
colnames(pq) = c("p", "q")
rownames(pq) = "[1]"
as.table(pq)

##     p q
## [1] 3 1

zz = arima0(X, order = c(p, 0, q))

(ii) Generate 250 simulations each of same length as the historical data.

Add the seasonal cycle back and create boxplots of annual and monthly, mean, variance,skew, lag-1 correlation and, PDFs of May and annual flows. Comment on what you observe and also on why some of the monthly statistics are not reproduced by the “best” model.

################### Simulate
nsim = 250

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/arma.sim.txt")
fu = arma.sim(nsim, "mean", zz, X, nyrs, p, q, 12)
LF.sim.mean = fu$LF.mean
fu = arma.sim(nsim, "sd", zz, X, nyrs, p, q, 12)
LF.sim.sd = fu$LF.sd
fu = arma.sim(nsim, "skew", zz, X, nyrs, p, q, 12)
LF.sim.skew = fu$LF.skew
fu = arma.sim(nsim, "cor", zz, X, nyrs, p, q, 12)
LF.sim.cor = fu$LF.cor
fu = arma.sim(nsim, "max", zz, X, nyrs, p, q, 12)
LF.sim.max = fu$LF.max
fu = arma.sim(nsim, "min", zz, X, nyrs, p, q, 12)
LF.sim.min = fu$LF.min

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/arma.sim.month.txt")
May = WYmonflow[, 5]
fu = arma.sim.month(nsim, May, zz, X, p, q, nyrs)
May.sim = fu$Monthpdf
xeval = fu$xeval

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/obs.stats.txt")
fu = obs.stats("mean", WYmonflow, nyrs)
Obs.mean = fu$obs.mean
fu = obs.stats("cor", WYmonflow, nyrs)
Obs.cor = fu$obs.cor
fu = obs.stats("sd", WYmonflow, nyrs)
Obs.sd = fu$obs.sd
fu = obs.stats("skew", WYmonflow, nyrs)
Obs.skew = fu$obs.skew
fu = obs.stats("max", WYmonflow, nyrs)
Obs.max = fu$obs.max
fu = obs.stats("min", WYmonflow, nyrs)
Obs.min = fu$obs.min

########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max, 
    LF.sim.min)
o = list(1:250, 251:500, 501:750, 751:1000, 1001:1250, 1251:1500)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")

par(mfrow = c(3, 2))
for (i in 1:length(o)) {
    xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ])  #the first row is the means of the original data
    xmeans1 = sim.list[o[i][[1]], ]
    zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
    zz$names = rep("", length(zz$names))
    z1 = bxp(zz, ylim = range(xmeans), cex = 1)
    axis(1, at = 1:12, labels = months)
    points(z1, xmeans[1, ], pch = 16, col = "red")
    lines(z1, xmeans[1, ], pch = 16, col = "red")
    title(main = titles[i])
}

plot of chunk unnamed-chunk-7


# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate

par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF", 
    cex = 1.25)

evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
    z2[index[i]] = z1[evaluate[i]]
    n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")

plot of chunk unnamed-chunk-7

ARMA assumes constant stationarity and therefore cannot accurately calculate correleation and skew. This is reflected in the results above. Further, the MAY bimodality is not captured because this is a parametric method.

3) To improve on the above, fit a seasonal AR(1) model - i.e., nonstationary time series model. Fit a best GLM for each monthly flow with residual resampling and repeat 2. Comment on the performance in comparison.

nsim = 250

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/seasonalAR1.sim.txt")
fu = seasonalAR1.sim(nsim, "mean", WYmonflow, nyrs)
LF.sim.mean = fu$LF.mean
fu = seasonalAR1.sim(nsim, "sd", WYmonflow, nyrs)
LF.sim.sd = fu$LF.sd
fu = seasonalAR1.sim(nsim, "skew", WYmonflow, nyrs)
LF.sim.skew = fu$LF.skew
fu = seasonalAR1.sim(nsim, "cor", WYmonflow, nyrs)
LF.sim.cor = fu$LF.cor
fu = seasonalAR1.sim(nsim, "max", WYmonflow, nyrs)
LF.sim.max = fu$LF.max
fu = seasonalAR1.sim(nsim, "min", WYmonflow, nyrs)
LF.sim.min = fu$LF.min

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/seasonalAR1.sim.month.txt")
May = WYmonflow[, 5]
fu = seasonalAR1.sim.month(nsim, May, WYmonflow, nyrs)
May.sim = fu$Monthpdf
xeval = fu$xeval

########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max, 
    LF.sim.min)
o = list(1:250, 251:500, 501:750, 751:1000, 1001:1250, 1251:1500)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")

par(mfrow = c(3, 2))
for (i in 1:length(o)) {
    xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ])  #the first row is the means of the original data
    xmeans1 = sim.list[o[i][[1]], ]
    zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
    zz$names = rep("", length(zz$names))
    z1 = bxp(zz, ylim = range(xmeans), cex = 1)
    axis(1, at = 1:12, labels = months)
    points(z1, xmeans[1, ], pch = 16, col = "red")
    lines(z1, xmeans[1, ], pch = 16, col = "red")
    title(main = titles[i])
}

plot of chunk unnamed-chunk-8


# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate

par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF", 
    cex = 1.25)

evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
    z2[index[i]] = z1[evaluate[i]]
    n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")

plot of chunk unnamed-chunk-8

Correlation is matched because we are suing a seasonal AR, which is a non-stationarity time serires model. However, the MAY bimodality is not captured because this is a parametric method.

4) Fit a nonparametric seasonal lag-1 model and repeat 1. You canuse either the the K-nn bootstrap technique or LOCFIT/residual resampling (which will be a complement to the GLM/resampling approach) and repeat 1. What advantages/disadvantages you see with this nonparametric approach.

# you have to string it to create a long vector
x = array(t(WYmonflow))
N = length(x)
nsim = 100

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/knn1.sim.txt")
fu = knn1.sim(nsim, "mean", WYmonflow, x, nyrs, N)
LF.sim.mean = fu$LF.mean
fu = knn1.sim(nsim, "sd", WYmonflow, x, nyrs, N)
LF.sim.sd = fu$LF.sd
fu = knn1.sim(nsim, "skew", WYmonflow, x, nyrs, N)
LF.sim.skew = fu$LF.skew
fu = knn1.sim(nsim, "cor", WYmonflow, x, nyrs, N)
LF.sim.cor = fu$LF.cor
fu = knn1.sim(nsim, "max", WYmonflow, x, nyrs, N)
LF.sim.max = fu$LF.max
fu = knn1.sim(nsim, "min", WYmonflow, x, nyrs, N)
LF.sim.min = fu$LF.min

source("C:/Users/Rebex/Documents/R/AdvDataAnalysis/knn1.sim.month.txt")
May = WYmonflow[, 5]
fu = knn1.sim.month(nsim, May, WYmonflow, nyrs, N)
May.sim = fu$Monthpdf
xeval = fu$xeval

########### boxplot the stats ###############
months = month.abb
sim.list = rbind(LF.sim.mean, LF.sim.sd, LF.sim.skew, LF.sim.cor, LF.sim.max, 
    LF.sim.min)
o = list(1:101, 102:202, 203:303, 304:404, 405:505, 506:606)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Correlation", "Max", "Min")

par(mfrow = c(3, 2))
for (i in 1:length(o)) {
    xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ])  #the first row is the means of the original data
    xmeans1 = sim.list[o[i][[1]], ]
    zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
    zz$names = rep("", length(zz$names))
    z1 = bxp(zz, ylim = range(xmeans), cex = 1)
    axis(1, at = 1:12, labels = months)
    points(z1, xmeans[1, ], pch = 16, col = "red")
    lines(z1, xmeans[1, ], pch = 16, col = "red")
    title(main = titles[i])
}

plot of chunk unnamed-chunk-9


# boxplots of the May PDF....
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate

par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(May.sim), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(May.sim, xdensityorig), xlab = "May flow MAF", ylab = "PDF", 
    cex = 1.25)

evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
    z2[index[i]] = z1[evaluate[i]]
    n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")

plot of chunk unnamed-chunk-9

K-nn timeseries model provides the best results for all metrics. K-nn is a non-parametric approach, which allows it to caputre the bimodality. It also assumes no underlying trend about the data.

5) Another approach to simulate the monthly streamflow is using PCA.

(i) Perform PCA on the monthly streamflow data. Seem how many PCs to retain based on the Eigen spectrum, retain as many as you need to capture most of the variance, say ~75-80%

monthdata = matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session3/Leesferry-mon-data.txt"), 
    ncol = 13, byrow = T)
nyrs = length(monthdata[, 1])
WYmonflow = monthdata[, 2:13]
WYmonflow = WYmonflow/10^6
fmean = apply(WYmonflow, 2, mean)  #monthly mean
fsdev = apply(WYmonflow, 2, sd)
# scale data

WYmonflow1 = t((t(WYmonflow) - fmean)/fsdev)

zs = var(WYmonflow1)  #variance matrix
zsvd = svd(zs)  #Eigen decomposition..
pcsLF = t(t(zsvd$u) %*% t(WYmonflow1))
evect = zsvd$u  # EigenVector
lambdas = (zsvd$d/sum(zsvd$d))

plot(1:12, lambdas[1:12], type = "l", xlab = "Modes", ylab = "Frac. Var. explained")
points(1:12, lambdas[1:12], col = "red")

plot of chunk unnamed-chunk-10

# keep first 4 PCs

(ii) Fit a best ARMA model for each retained PC and a best fit Normal distribution for the others (i.e., the noise PCs)

## Fit various ARMA models
pq = matrix(0, nrow = 4, ncol = 2)
for (i in 1:4) {
    bestmodel = fun(1:3, 1:3, pcsLF[, i])
    p = bestmodel$p
    q = bestmodel$q
    pq[i, 1] = p
    pq[i, 2] = q
}
colnames(pq) = c("p", "q")
as.table(pq)

##   p q
## A 1 1
## B 1 1
## C 1 1
## D 3 1

(iii) Simulate each PC from their respective model and invert back to the flow space using Eigen Vector matrix. Repeat to generate ensembles Boxplot the statistics listed in 2. and compare with the results from the previous methods

################### Simulate
nsim = 100
nyrs1 = nyrs - 1

May = WYmonflow[, 5]
xeval = seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
nevals = length(xeval)
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate

armean = matrix(0, nsim, 12)
arstdev = matrix(0, nsim, 12)
arcor = matrix(0, nsim, 12)
arskw = matrix(0, nsim, 12)
armax = matrix(0, nsim, 12)
armin = matrix(0, nsim, 12)
simpdf = matrix(0, nrow = nsim, ncol = nevals)
for (k in 1:nsim) {
    p = pq[1, 1]
    q = pq[1, 2]
    zz = arima0(pcsLF[, 1], order = c(p, 0, q))
    xsim1 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p + 
        1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
    p = pq[2, 1]
    q = pq[2, 2]
    zz = arima0(pcsLF[, 2], order = c(p, 0, q))
    xsim2 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p + 
        1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
    p = pq[3, 1]
    q = pq[3, 2]
    zz = arima0(pcsLF[, 3], order = c(p, 0, q))
    xsim3 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p + 
        1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)
    p = pq[4, 1]
    q = pq[4, 2]
    zz = arima0(pcsLF[, 4], order = c(p, 0, q))
    xsim4 = arima.sim(n = nyrs, list(ar = c(zz$coef[1:p]), ma = c(zz$coef[(p + 
        1):(p + q)])), sd = sqrt(zz$sigma2)) + mean(pcsLF)

    # Simulate the remaining pcs with a normal distribution
    pred = matrix(0, nrow = nyrs, ncol = 8)
    j1 = c(5, 6, 7, 8, 9, 10, 11, 12)
    for (j in 1:8) {
        PC = pcsLF[, j1[j]]
        pred[, j] = rnorm(nyrs, mean = mean(PC), sd = sd(PC))
    }

    # combine all simulated pcs
    PC.sim = matrix(cbind(xsim1, xsim2, xsim3, xsim4), nrow = 95)
    pc.sim = cbind(PC.sim, pred)

    # Put statistics back
    LF.sim = pc.sim %*% t(evect)

    # Back standardize
    LFsim = t((t(LF.sim) * fsdev) + fmean)

    for (j in 1:12) {
        armean[k, j] = mean(LFsim[, j])
        armax[k, j] = max(LFsim[, j])
        armin[k, j] = min(LFsim[, j])
        arstdev[k, j] = sd(LFsim[, j])
        arskw[k, j] = skew(LFsim[, j])
    }
    # correlation between one month to another..
    for (j in 2:12) {
        j1 = j - 1
        arcor[k, j] = cor(LFsim[, j], LFsim[, j1])
    }
    arcor[k, 1] = cor(LFsim[1:nyrs1, 12], LFsim[2:nyrs, 1])

    ###### Simulate May May.sim=LFsim[,5]
    ###### xeval=seq(min(May)-0.25*sd(May),max(May)+0.25*sd(May),length=length(May))
    ###### nevals=length(xeval)

    # simpdf[k,]=sm.density(May.sim,eval.points=xeval,display='none')$estimate
}

########### boxplot the stats ###############
months = month.abb
sim.list = rbind(armean, arstdev, arskw, arcor, armax, armin)
o = list(1:100, 101:200, 201:300, 301:400, 401:500, 501:600)
obs.list = rbind(Obs.mean, Obs.sd, Obs.skew, Obs.cor, Obs.max, Obs.min)
titles = c("Mean", "Standard Deviation", "Skew", "Lag 1 Cor", "Max", "Min")

par(mfrow = c(3, 2))
for (i in 1:length(o)) {
    xmeans = rbind(obs.list[i, ], sim.list[o[i][[1]], ])  #the first row is the means of the original data
    xmeans1 = sim.list[o[i][[1]], ]
    zz = boxplot(split(xmeans1, col(xmeans1)), plot = F, cex = 1)
    zz$names = rep("", length(zz$names))
    z1 = bxp(zz, ylim = range(xmeans), cex = 1)
    axis(1, at = 1:12, labels = months)
    points(z1, xmeans[1, ], pch = 16, col = "red")
    lines(z1, xmeans[1, ], pch = 16, col = "red")
    title(main = titles[i])
}

plot of chunk unnamed-chunk-12


#### PDF of may
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(simpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(simpdf, xdensityorig), xlab = "May flow MAF", ylab = "PDF", 
    cex = 1.25)

evaluate = c(1, 20, 40, 60, 80, 100)
index = 1:6
z2 = 1:6
n1 = 1:6
for (i in 1:length(index)) {
    z2[index[i]] = z1[evaluate[i]]
    n1[index[i]] = xeval[evaluate[i]]
}
axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")

plot of chunk unnamed-chunk-12