gdata: Unable to load perl libaries needed by read.xls() gdata: to support

# R Project for Civil and Environmental Engineering Applications

# CVEN 5454

# A. Michael Bauer SID: 810 235 807

# May 10th, 2013

library(knitr)
library(e1071)

## Loading required package: class


# Problem 1. Lee's Ferry Basic Statistics and Boxplot

lf = read.table("http://civil.colorado.edu/~balajir/r-session-files/Leesferry-mon-data.txt")
boxplot(lf[, 2:13], axes = F)
axis(2)
axis(1, at = 1:12, labels = month.abb)
box()
points(1:12, apply(lf[, 2:13], 2, mean), pch = 19, col = "red")

plot of chunk unnamed-chunk-1


### Problem 2. Lee's Ferry Flow Data - Mean, Variance, Skew and Each
### Robust Counterpart

myqcskewness <- function(x) {
    x25 <- as.double(quantile(x, prob = 0.25))
    x75 <- as.double(quantile(x, prob = 0.75))
    qcsk <- (x25 + x75 - 2 * mean(x))/(x75 - x25)
    qcsk
}
lfmean = apply(lf[, 2:13], 2, mean)
lfvar = apply(lf[, 2:13], 2, var)
lfskew = apply(lf[, 2:13], 2, skewness)
lfmed = apply(lf[, 2:13], 2, median)
lfIQR = apply(lf[, 2:13], 2, IQR)
lfqcskew = apply(lf[, 2:13], 2, myqcskewness)
plot(lfmean, type = "l", main = "Mean Flow - Lee's Ferry", xlab = "Occurence", 
    ylab = "Mean Flow Rate")

plot of chunk unnamed-chunk-1

plot(lfvar, type = "l", main = "Flow Variance - Lee's Ferry", xlab = "Occurence", 
    ylab = "Flow Rate Variance", col = "red")

plot of chunk unnamed-chunk-1

plot(lfskew, type = "l", main = "Skew of Flow - Lee's Ferry", xlab = "Occurence", 
    ylab = "Skew of Flow Rate", col = "blue")

plot of chunk unnamed-chunk-1

plot(lfmed, type = "l", main = "Median Flow - Lee's Ferry", xlab = "Occurence", 
    ylab = "Median Flow Rate", col = "green")

plot of chunk unnamed-chunk-1

plot(lfIQR, type = "l", main = "IQR of Flow - Lee's Ferry", xlab = "Occurence", 
    ylab = "IQR of Flow Rate", col = "green")

plot of chunk unnamed-chunk-1

plot(lfqcskew, type = "l", main = "QC Skew of Flow - Lee's Ferry", xlab = "Occurence", 
    ylab = "QC Skew of Flow Rate", col = "red")

plot of chunk unnamed-chunk-1


### Problem 3. Lee's Ferry Flow Data with Overlaid PDFs of Fitted Models

library(ggplot2)
library(MASS)
for (i in 2:13) {
    x = lf[, i]
    fitnormpdf = dnorm(sort(x), mean(x), sd(x))
    gammax = fitdistr(x/10000, "gamma")
    fitgammapdf = dgamma(sort(x/10000), shape = gammax$estimate[1], rate = gammax$estimate[2])/10000
    if (i == 9) {
        weibullx = fitdistr(x/10000, "weibull")
        fitweibullpdf = dweibull(sort(x/10000), shape = weibullx$estimate[1], 
            scale = weibullx$estimate[2])/10000
    } else {
        weibullx = fitdistr(x/10000, "weibull")
        fitweibullpdf = dweibull(sort(x/10000), shape = weibullx$estimate[1], 
            scale = weibullx$estimate[2])/10000
    }

    fitlognorm = dlnorm(sort(x), mean(log(x)), sd(log(x)))
    hist(x, prob = T, main = month.name[(i - 1)])
    lines(sort(x), fitnormpdf, col = "blue")
    lines(sort(x), fitgammapdf, col = "red")
    lines(sort(x), fitweibullpdf, col = "green")
    lines(sort(x), fitlognorm, col = "black")
    legend("topright", col = c("blue", "red", "green", "black"), legend = c("Normal", 
        "Gamma", "Weibull", "Lognormal"), lty = 1, lwd = 1)
}

## Warning: NaNs produced

## Warning: NaNs produced

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1

## Warning: NaNs produced

plot of chunk unnamed-chunk-1


# Problem 4. Highly Skewed Lee's Ferry Flow Data - Fitting Models

library(ADGofTest)
library(BSDA)

## Loading required package: lattice

## Attaching package: 'BSDA'

## The following object is masked from 'package:datasets':
## 
## Orange


i = 3
x = lf[, i]
fitnormpdf = dnorm(sort(x), mean(x), sd(x))
gammax = fitdistr(x/10000, "gamma")
fitgammapdf = dgamma(sort(x/10000), shape = gammax$estimate[1], rate = gammax$estimate[2])/10000
fitlognorm = dlnorm(sort(x), mean(log(x)), sd(log(x)))
hist(x, prob = T, main = month.name[(i - 1)])
lines(sort(x), fitnormpdf, col = "blue")
lines(sort(x), fitgammapdf, col = "red")
lines(sort(x), fitweibullpdf, col = "green")
lines(sort(x), fitlognorm, col = "black")
legend("topright", col = c("blue", "red", "green", "black"), legend = c("Normal", 
    "Gamma", "Weibull", "Lognormal"), lty = 1, lwd = 1)

plot of chunk unnamed-chunk-1


# Problem 4a. Goodness of Fit tests for Various Distributions on February
# flow data

# H0: Sample data are consistent with the model distribution H1: Sample
# data are *not* consistent with the model distribution Alpha = 0.05 =
# significance level

x = lf[, 3]

# 4a.i) K-S test for goodness of fit of Normal

zznorm = ks.test(x, "pnorm", mean = mean(x), sd = sd(x))
zznorm$p.value

## [1] 0.1408


# Since p-value=0.14 > 0.05=alpha, we do not reject the null hypothesis.
# The sample data are consistent with the normal distribution at 95%
# significance.  This does not fully agrees with the visual check of
# normal distributaion against the positively skewed histogram.

## 4a.ii) K-S test for goodness of fit of lognormal
zzlnorm = ks.test(x, "plnorm", mean = mean(log(x)), sd = sd(log(x)))
zzlnorm$p.value

## [1] 0.6846


# Since p-value=0.68 > 0.05=alpha, we do not reject the null hypothesis.
# The sample data is consistent with the lognormal distribution at 95%
# significance.

# 4a.iii) K-S test for Gamma
zgamma = fitdistr(x/10000, dgamma, list(shape = 1, rate = 0.1), lower = 0.01)
zgamma

##     shape       rate  
##   19.29829    0.48937 
##  ( 2.77594) ( 0.07131)

zzgamma = ks.test(x/10000, "pgamma", shape = zgamma$estimate[1], scale = 1/zgamma$estimate[2])
zzgamma

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x/10000
## D = 0.0855, p-value = 0.4648
## alternative hypothesis: two-sided


# Since p-value=0.48 > 0.05=alpha, we do not reject the null hypothesis.
# The sample data is consistent with the Gamma distribution at 95%
# significance.

# 4a.iv) K-S test for Weibull
zweibull = fitdistr(x/10000, "weibull", list(shape = 1, scale = 0.1), lower = 0.001)
zzweib = ks.test(x/10000, "pweibull", shape = zweibull$estimate[1], scale = zweibull$estimate[2])
zzweib

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  x/10000
## D = 0.1309, p-value = 0.07026
## alternative hypothesis: two-sided


# Since p-value=0.07 > 0.05=alpha, we do not reject the null hypothesis.
# The sample data is consistent with the Weibull distribution at 95%
# significance.  However, the data are *not* consistent with the model at
# 90% significance, i.e., p=0.07<0.1=alpha.

# 4a.v) A-D test for Normal Distribution
adnorm = ad.test(x, pnorm, mean = mean(x), sd = sd(x))
adnorm$p.value

##      AD 
## 0.06112


# Since p-value=0.06 > 0.05=alpha, we do not reject the null hypothesis at
# 95% significance.  The sample data are consistent with the normal
# distribution; however, at 90% sifnificance, the sample data are *not*
# consistent with the distribution, i.e. at alpha=0.1

# 4a.vi) A-D test for Lognormal Distribution

# 4a.vi) A-D test for Lognormal Distribution
adlnorm = ad.test(log(x), plnorm, mean = mean(x), sd = sd(x))
adlnorm$p.value

##        AD 
## 6.316e-06


# Since p-value=6.3^-6 << 0.05=alpha, we reject the null hypothesis.  The
# transformed sample data (log(x)) are not consistent with the lognnormal
# distribution at 95% confidence.

# 4a.vii) A-D test for Gamma Distribution
adgamma = ad.test(x, pgamma, shape = zgamma$estimate[1], scale = 1/zgamma$estimate[2])
adgamma$p.value

##        AD 
## 6.316e-06


# Since p-value=6.3^-6 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the Gamma distribution at any
# reasonable significance level

# 4a.viii) A-D test for Weibull Distribution
adweibull = ad.test(x, pweibull, shape = zweibull$estimate[1], scale = zweibull$estimate[2])
adweibull$p.value

##        AD 
## 6.316e-06


# Since p-value=6.3^-6 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the Gamma distribution

# 4a.ix) Chi-Square test of Goodness of Fit for Normal Distribution
chisq.test(lf[, 3], p = fitnormpdf/sum(fitnormpdf))

## 
##  Chi-squared test for given probabilities
## 
## data:  lf[, 3]
## X-squared = 1.026e+09, df = 94, p-value < 2.2e-16


# Since p-value=2.2^-16 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the normal distribution

# 4a.x) Chi-Square test of Goodness of Fit for Lognormal Distribution
chisq.test(x, p = fitlognorm/sum(fitlognorm))

## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 162080488, df = 94, p-value < 2.2e-16

# Since p-value=2.2^-16 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the lognormal distribution

# 4a.xi) Chi-Square test of Goodness of Fit for Gamma Distribution
chisq.test(x, p = fitgammapdf/sum(fitgammapdf))

## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 2.82e+08, df = 94, p-value < 2.2e-16


# Since p-value=2.2^-16 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the Gamma distribution

# 4a.xii) Chi-Square test of Goodness of Fit for Weibull Distribution
chisq.test(x, p = fitweibullpdf/sum(fitweibullpdf))

## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 165095082, df = 94, p-value < 2.2e-16


# Since p-value=2.2^-16 << 0.05=alpha, we reject the null hypothesis.  The
# sample data are not consistent with the Weibull distribution

# 4 b. Plots of Model Quantiles vs. Sample Quantiles

# 4b. i) QQ Plot of Normal Distribution versus Sample Distribution

qqplot(fitnormpdf, x, main = "QQ Plot of Fitted Normal versus Empirical Quantiles", 
    xlab = "Fitted Normal Quantiles", ylab = "Sample Quantiles")

plot of chunk unnamed-chunk-1


# As we saw in the Goodness of Fit tests, the sample data does not match
# well with the Normal Distribution.

# 4b. ii) QQ Plot of Lognormal Distribution versus Sample Distribution

qqplot(fitlognorm, log(x), main = "QQ Plot of Fitted Lognormal versus Empirical Quantiles", 
    xlab = "Fitted Lognormal Quantiles", ylab = "Sample Log Quantiles")

plot of chunk unnamed-chunk-1


# As we saw in the Goodness of Fit tests, the sample data does not match
# well with the Lognormal Distribution.

# 4b. iii) QQ Plot of Gamma Distribution versus Sample Distribution

qqplot(fitgammapdf, x, main = "QQ Plot of Fitted Gamma versus Empirical Quantiles", 
    xlab = "Fitted Gamma Quantiles", ylab = "Sample Quantiles")

plot of chunk unnamed-chunk-1

qqplot(fitweibullpdf, x, main = "QQ Plot of Fitted Weibull versus Empirical Quantiles", 
    xlab = "Fitted Weibull Quantiles", ylab = "Sample Quantiles")

plot of chunk unnamed-chunk-1


# The quantile plots also indicate that we should reject the null
# hypothesis: sample data does not match well with the distributions.

# Problem 5. Nonparametric vs. Parametric PDF of May, August and December
# Flows at Lee's Ferry
library(sm)

## Package `sm', version 2.2-4.1 Copyright (C) 1997, 2000, 2005, 2007, 2008,
## A.W.Bowman & A.Azzalini Type help(sm) for summary information


may = lf[, 6]
aug = lf[, 9]
dec = lf[, 13]

# 5 a) Bootstrap PDFs of Monthly Flows
mayeval = seq(min(may) - sd(may), max(may) + sd(may), length = 100)
augeval = seq(min(aug) - sd(aug), max(aug) + sd(aug), length = 100)
deceval = seq(min(dec) - sd(dec), max(dec) + sd(dec), length = 100)
maypdf = sm.density(may, eval.points = mayeval, display = "none")$estimate
augpdf = sm.density(aug, eval.points = augeval, display = "none")$estimate
decpdf = sm.density(dec, eval.points = deceval, display = "none")$estimate
fitnormpdf.may = dnorm(sort(may), mean(may), sd(may))
hist(may, prob = T, main = "May Lee's Ferry Flow", xlab = "Flow Rate", ylab = "PDF")
legend("topright", legend = c("Normal", "Actual"), lty = c(2, 1), lwd = 1)
lines(mayeval, maypdf, lty = 1)
lines(sort(may), fitnormpdf.may, lty = 2)

plot of chunk unnamed-chunk-1

fitnormpdf.aug = dnorm(sort(aug), mean(aug), sd(aug))
hist(aug, prob = T, main = "August Lee's Ferry Flow", xlab = "Flow Rate", ylab = "PDF")
lines(augeval, augpdf, lty = 1)
lines(sort(aug), fitnormpdf.aug, lty = 2)
legend("topright", legend = c("Normal", "Actual"), lty = c(2, 1), lwd = 1)

plot of chunk unnamed-chunk-1

fitnormpdf.dec = dnorm(sort(dec), mean(dec), sd(dec))
hist(dec, prob = T, main = "December Lee's Ferry Flow", xlab = "Flow Rate", 
    ylab = "PDF")
lines(deceval, decpdf, lty = 1)
lines(sort(dec), fitnormpdf.dec, lty = 2)
legend("topright", legend = c("Normal", "Actual"), lty = c(2, 1), lwd = 1)

plot of chunk unnamed-chunk-1


# The Kernel-smoothed PDFs can be different from the fitted Normal PDFs.
# December most closely resembles a normal distribution;this may due to
# the cold temperatures and a lack of 'runoff' events, like in May, for
# example, where we see a bimodal distribution.

# Problem 6.

data = read.csv("C:/Users/Katita/Downloads/Old_faithful_data.csv", header = T)
xdata = data[, 1]

# The historical data is in the vector 'xdata'
N = length(xdata)  #number of data points.

# For Monte Carlo...
nsim = 500  #number of simulations, each of length N

# points at which to estimate the PDF from the simulations and the
# observed

xeval = seq(min(xdata) - sd(xdata), max(xdata) + sd(xdata), length = 100)
neval = length(xeval)
xsimpdf = matrix(0, nrow = nsim, ncol = neval)

# Vectors for the statistics
meansim = 1:nsim
mediansim = 1:nsim
sdsim = 1:nsim
skewsim = 1:nsim
iqrsim = 1:nsim

# H0: Sample data are consistent with the model distribution H1: Sample
# data are *not* consistent with the model distribution Alpha = 0.05 =
# significance level

# norm K-S TEST
zznorm = ks.test(xdata, "pnorm", mean = mean(xdata), sd = sd(xdata))

## Warning: ties should not be present for the Kolmogorov-Smirnov test

zznorm$p.value

## [1] 0.0001216

# p-value = 0.0001215503 < 0.05 .... reject null hypothesis

# log-norm K-S TEST
zzlnorm = ks.test(xdata, "plnorm", mean = mean(log(xdata)), sd = sd(log(xdata)))

## Warning: ties should not be present for the Kolmogorov-Smirnov test

zzlnorm$p.value

## [1] 3.8e-07

# p-value = 0 < 0.05 ... reject null hypothesis

# gamma K-S TEST
zgamma = fitdistr(xdata, dgamma, list(shape = 1, rate = 0.1), lower = 0.01)
zgamma

##     shape       rate  
##   25.16272    0.34809 
##  ( 2.04734) ( 0.02861)

zzgamma = ks.test(xdata, "pgamma", shape = zgamma$estimate[1], scale = 1/zgamma$estimate[2])

## Warning: ties should not be present for the Kolmogorov-Smirnov test

zzgamma

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  xdata
## D = 0.1503, p-value = 2.861e-06
## alternative hypothesis: two-sided

# p-value = 0 < 0.05 ... reject null hypothesis

# weibull K-S TEST
zweibull = fitdistr(xdata, "weibull", list(shape = 1, scale = 0.1), lower = 0.001)
zzweib = ks.test(xdata, "pweibull", shape = zweibull$estimate[1], scale = zweibull$estimate[2])

## Warning: ties should not be present for the Kolmogorov-Smirnov test

zzweib

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  xdata
## D = 0.0991, p-value = 0.005747
## alternative hypothesis: two-sided


# p-value = 0.005 < 0.05, therefore we reject null hypothesis However,
# this p-value is still the largest of the tests; as such, this is the
# distribution I will choose to fit.

# Model Estimation

## kernel PDf
xdensitykernel = sm.density(xdata, eval.points = xeval, display = "none")$estimate

# Weibull
xdensityorig = dweibull(xeval, shape = zweibull$estimate[1], scale = zweibull$estimate[2])

######## Simulation ##########

for (i in 1:nsim) {
    # simulate from Weibull..
    xsim = rweibull(N, shape = zweibull$estimate[1], scale = zweibull$estimate[2])


    # compute the statistics from the simulation
    meansim[i] = mean(xsim)
    sdsim[i] = sd(xsim)
    # sdsim[i]=sd(log(xsim))
    skewsim[i] = skewness(xsim)
    iqrsim[i] = diff(quantile(xsim, c(0.25, 0.75)))
    mediansim[i] = quantile(xsim, c(0.5))

    # estimate the PDF at the evaluation points based on the simulated data
    # replace with the appropriate distribution..

    # Weibull..
    zz = fitdistr(xsim, "weibull")
    xsimpdf[i, ] = dweibull(xeval, shape = zz$estimate[1], scale = zz$estimate[2])
}

# boxplot of stats from the simulations and overlay the corresponding
# value from the original data as a point.

par(mfrow = c(1, 5))

zz = boxplot(meansim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, mean(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "Mean")

zz = boxplot(mediansim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, quantile(xdata, c(0.5)), col = "red", cex = 1.25, pch = 16)
title(main = "Median")

zz = boxplot(sdsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
# points(z1,sd(log(xdata)),col='red',cex=1.25)
points(z1, sd(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "SD")

zz = boxplot(skewsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, skewness(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "Skew")

zz = boxplot(iqrsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25, ylim = range(c(iqrsim, diff(quantile(xdata, 
    c(0.25, 0.75))))))
points(z1, diff(quantile(xdata, c(0.25, 0.75))), col = "red", cex = 1.25, pch = 16)
title(main = "IQR")

plot of chunk unnamed-chunk-1


# par(ask=TRUE)

# boxplot of PDFs from the simulations along with that of the original
# data

par(mfrow = c(1, 1))
xs = 1:neval

# For R version 2.4.1
zz = boxplot(split(t(xsimpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xsimpdf, xdensityorig), xlab = "", ylab = "", cex = 1.25)

npts = 10  #number of points to plot on the x-axis..
n2 = round(neval * (1:npts)/npts)

z2 = z1[n2]
n1 = xeval[n2]


n1 = round(n1, dig = 2)
n1 = as.character(n1)

axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, col = "red")

lines(z1, xdensitykernel, col = "blue", lwd = 2)

title(main = "PDFs from the simulations and the historical data")

plot of chunk unnamed-chunk-1


# Problem 7. Non-Parametric Approach to Old Faithful Modelling

# For Monte Carlo...
nsim = 500  #number of simulations, each of length N

# Points at which to estimate the PDF from the simulations and the
# observed

xeval = seq(min(xdata) - sd(xdata), max(xdata) + sd(xdata), length = 100)
neval = length(xeval)
xsimpdf = matrix(0, nrow = nsim, ncol = neval)

# Vectors for Statistics
meansim = 1:nsim
mediansim = 1:nsim
sdsim = 1:nsim
skewsim = 1:nsim
iqrsim = 1:nsim

# Evaluate the Kernel density PDF at the evaluation points based on the
# data

xdensityorig = sm.density(xdata, eval.points = xeval, display = "none")$estimate

# Bandwidth of the data
band = hnorm(xdata)
######## Simulation ##########

for (i in 1:nsim) {

    xsim = sample(xdata, replace = T)
    meansim[i] = mean(xsim)
    sdsim[i] = sd(xsim)
    # sdsim[i]=sd(log(xsim))
    skewsim[i] = skewness(xsim)
    iqrsim[i] = diff(quantile(xsim, c(0.25, 0.75)))
    mediansim[i] = quantile(xsim, c(0.5))

    xsimpdf[i, ] = sm.density(xsim, eval.points = xeval, display = "none")$estimate

}

# Boxplot of statistics from the simulations, overlaid with the
# corresponding values from the original data.

par(mfrow = c(1, 5))

zz = boxplot(meansim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, mean(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "Mean")

zz = boxplot(mediansim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, quantile(xdata, c(0.5)), col = "red", cex = 1.25, pch = 16)
title(main = "Median")

zz = boxplot(sdsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
# points(z1,sd(log(xdata)),col='red',cex=1.25)
points(z1, sd(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "SD")

zz = boxplot(skewsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, skewness(xdata), col = "red", cex = 1.25, pch = 16)
title(main = "Skew")

zz = boxplot(iqrsim, xlab = "", ylab = "Mean", plot = F)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, xlab = "", ylab = "Mean", cex = 1.25)
points(z1, diff(quantile(xdata, c(0.25, 0.75))), col = "red", cex = 1.25, pch = 16)
title(main = "IQR")

plot of chunk unnamed-chunk-1


par(ask = TRUE)

# Boxplot of PDFs from MC simulations and Original Data

par(mfrow = c(1, 1))
xs = 1:neval

# For R version 2.4.1
zz = boxplot(split(t(xsimpdf), xs), plot = F, cex = 1)
zz$names = rep("", length(zz$names))
z1 = bxp(zz, ylim = range(xsimpdf, xdensityorig), xlab = "", ylab = "", cex = 1.25)

npts = 10  #number of points to plot on the x-axis..
n2 = round(neval * (1:npts)/npts)

z2 = z1[n2]
n1 = xeval[n2]


n1 = round(n1, dig = 2)
n1 = as.character(n1)

axis(1, at = z2, labels = n1, cex = 1)
lines(z1, xdensityorig, col = "red", lwd = 2)

title(main = "PDFs from MC Simulations and Sample Data")

plot of chunk unnamed-chunk-1


# Problem 8. Investigating the relationship between inter-eruption time
# and the duration of eruptions at Old Faithful in Yellowstone National
# Park: Bivariate Histograms and PDFs

# 8.a) Bivariate Histograms: parametric and nonparametric models and
# empirical Histograms
library(gplots)

## Loading required package: gtools

## Attaching package: 'gtools'

## The following object is masked from 'package:e1071':
## 
## permutations

## Loading required package: gdata

## gdata: Unable to locate valid perl interpreter gdata: gdata: read.xls()
## will be unable to read Excel XLS and XLSX files gdata: unless the 'perl='
## argument is used to specify the location gdata: of a valid perl
## intrpreter. gdata: gdata: (To avoid display of this message in the future,
## please gdata: ensure perl is installed and available on the executable
## gdata: search path.)

## gdata: Unable to load perl libaries needed by read.xls() gdata: to support
## 'XLX' (Excel 97-2004) files.

## ```