bestARIMA.r
It uses an inputs of p and q vectors, tests all ARIMA models for various combinations of p and q, and then chooses the best model as the one with the lowest AIC value. It outputs all of the information, as well as 'ar' and 'ma' coefficients of the best ARIMA to be called upon in the actual problem codes.
bestARIMA <- function(array, p, q) {
np <- length(p)
nq <- length(q)
aics <- matrix(1, (np * nq), 3)
it <- 0
for (i in 1:np) {
for (j in 1:nq) {
it <- it + 1
zz <- arima0(array, order = c(p[i], 0, q[j]))
aics[it, ] <- cbind(zz$aic, p[i], q[j])
}
}
aics <- as.data.frame(aics)
names(aics) <- c("AIC", "p", "q")
min <- which(aics == min(aics[, 1]))
bestp <- aics[min, 2]
bestq <- aics[min, 3]
bestmodel <- arima0(array, order = c(bestp, 0, bestq))
results <- list(aics = aics, bestp = bestp, bestq = bestq, bestmod = bestmodel)
return(results)
}
stats.r
It is used in most of the problems of this homework to store the statistics on both the simulated nad historical data. It was generalized to fit both and to fit in scenarios when the lag 1 correlation or multiple month (2, 3) correlations were not appropriate. In this, you can specify “corr=TRUE” for lag-1 correlations and “multicorr=TRUE” for lag-2 and lag-3 correlations.
stats <- function(data, corr, multicorr) {
setwd("~/Documents/University of Colorado/Fall 2013/CVEN6833/HW3")
source("skew.r")
nc <- length(data[1, ])
nyrs <- length(data[, 1])
nyrs1 <- nyrs - 1
mean <- 1:nc
stdev <- 1:nc
cor <- 1:nc
mcor2 <- 1:nc
mcor3 <- 1:nc
skw <- 1:nc
max <- 1:nc
min <- 1:nc
for (i in 1:nc) {
max[i] <- max(data[, i])
min[i] <- min(data[, i])
mean[i] <- mean(data[, i])
stdev[i] <- sd(data[, i])
skw[i] <- skew(data[, i])
}
results <- list(mean = mean, stdev = stdev, min = min, max = max, skew = skw)
# if-statement to perform lag-1 correlation or not
if (corr == "TRUE") {
for (i in 2:12) {
i1 <- i - 1
cor[i] <- cor(data[, i], data[, i1])
}
cor[1] <- cor(data[1:nyrs1, 12], data[2:nyrs, 1])
results <- list(mean = mean, stdev = stdev, min = min, max = max, skew = skw,
cor = cor)
}
# if-statement to perform lag-2 and lag-3 correlation or not
if (multicorr == "TRUE") {
for (i in 3:12) {
i2 <- i - 2
mcor2[i] <- cor(data[, i], data[, i2])
}
mcor2[1] <- cor(data[1:nyrs1, 11], data[2:nyrs, 1])
mcor2[2] <- cor(data[1:nyrs1, 10], data[2:nyrs, 2])
for (i in 4:12) {
i3 <- i - 3
mcor3[i] <- cor(data[, i], data[, i3])
}
mcor3[1] <- cor(data[1:nyrs1, 10], data[2:nyrs, 1])
mcor3[2] <- cor(data[1:nyrs1, 11], data[2:nyrs, 2])
mcor3[3] <- cor(data[1:nyrs1, 12], data[2:nyrs, 2])
results <- list(mean = mean, stdev = stdev, min = min, max = max, skew = skw,
cor = cor, mcor2 = mcor2, mcor3 = mcor3)
}
return(results)
}
plotMay.r
This is a simple function used to plot the May flow PDFs. This is mostly taken from Balaji's code online, but put into a function in order to tighten-up the problem codes.
plotMay <- function(May, simpdf) {
# re-define the evaluation points for the purpose of the function - these
# are the same as were used during simulation creation
xeval = seq(min(May) - 0.25 * sd(May), max(May) + 0.25 * sd(May), length = 100)
# calculate the historical May pdf
zz = sm.density(May, eval.points = xeval, display = "none")
xdensityorig = zz$estimate
# obtain boxplot of simulated PDFs splot into the number of evaluation
# points
par(mfrow = c(1, 1))
xs = 1:length(xeval)
zz = boxplot(split(t(simpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(simpdf, xdensityorig), xlab = "May flow MAF",
ylab = "PDF", cex = 1.25)
# organize for better axis plotting:
z2 = 1:6
n1 = 1:6
z2[1] = z1[1]
z2[2] = z1[20]
z2[3] = z1[40]
z2[4] = z1[60]
z2[5] = z1[80]
z2[6] = z1[100]
n1[1] = xeval[1]
n1[2] = xeval[20]
n1[3] = xeval[40]
n1[4] = xeval[60]
n1[5] = xeval[80]
n1[6] = xeval[100]
n1 = round(n1, dig = 0)
n1 = as.character(n1)
axis(1, at = z2, labels = n1, cex = 1)
# overlay the actual Lees Ferry May pdf in red
lines(z1, xdensityorig, lty = 2, lwd = 2, col = "red")
}
bestTREE.r
This function fits trees for all combinations of 3 predictors (i.e. seven total tree models) and spits out the deviance of each in order to select the tree with the lowest deviance.
bestTREE <- function(data) {
n <- length(data[1, ]) - 1
deviances <- 1:7
models <- c("full", "2,3", "3,4", "2,4", "2", "3", "4")
ztree <- tree(data[, 1] ~ data[, 2:4])
deviances[1] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 2:3])
deviances[2] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 3:4])
deviances[3] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 2] + data[, 4])
deviances[4] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 2])
deviances[5] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 3])
deviances[6] <- deviance(ztree)
ztree <- tree(data[, 1] ~ data[, 4])
deviances[7] <- deviance(ztree)
low <- which(deviances == min(deviances))
deviances <- cbind(deviances, models)
return(deviances)
}