In HW2, you will simulate and analyze various bivariate random vectors, (X, Y ), as discussed in Chapter 6 of the textbook. Let Z1, Z2, Z3, · · · , be the a sequence of independent variables following a standard normal N(0,1) distribution. Each of the (Xi,Yi),i = 1,2,··· ,100 is generated according to the following:
Question A. X=2Z1 + Z2 + 3Z3 + 2 and Y =Z1+2*Z2+Z3+2.
n<-100
data1<- matrix (data=0, ncol=2,nrow=n)
for (i in 1:n)
{
z1 <- rnorm(1)
z2 <- rnorm(1)
z3 <- rnorm(1)
x <- 2*z1 + z2 + 3*z3 + 2
y <- z1 + 2*z2 +z3 + 2
data1[i,1] <- x
data1[i,2] <- y
}
plot(data1[,1], data1[,2])
library(pander)
pander(colMeans(data1))
1.809 and 1.833
pander(cov(data1))
| 10.48 | 4.621 |
| 4.621 | 4.313 |
pander(cor(data1))
| 1 | 0.6874 |
| 0.6874 | 1 |
pander(sapply(as.data.frame(data1), sd))
| V1 | V2 |
|---|---|
| 3.237 | 2.077 |
pander(var(data1))
| 10.48 | 4.621 |
| 4.621 | 4.313 |
library(MVA)
## Loading required package: HSAUR2
## Loading required package: tools
biv <- data1[,1:2] # Extract bivariate data
### pdf(file = "data1.pdf")
op <- par(mfrow=c(2, 3), mar = c(4, 4, 4, 0.25), cex.lab = 1.75)
plot(biv[, 1], biv[, 2], pch = 16, # Naive scatterplot
cex = 1.5, col = "blue", cex.lab = 1.75,
xlab = "x",
ylab = "y")
bvbox(biv, xlab = "x", yaxt = "n", ylab = "", cex.lab = 1.75,
pch = 16, cex = 1.5, col = "blue", method = "NOT")
library(aplpack)
nlev <- 5
colors <- heat.colors(9)[3 : (nlev + 2)]
plothulls(biv, n.hull = 5, col.hull = colors, cex.lab = 1.75,
xlab = "x", ylab = " ", yaxt = "n",
lty.hull = 1 : nlev, density = NA, col = 0, main = " ")
## [[1]]
## x.hull y.hull
## [1,] -1.2398702 -3.655009
## [2,] -6.4184525 -4.362746
## [3,] -8.0823277 -1.050031
## [4,] -0.9975848 3.970809
## [5,] 6.6096121 7.515555
## [6,] 9.6240905 5.772245
## [7,] 7.1943715 2.723820
##
## [[2]]
## x.hull y.hull
## [1,] 4.3555773 0.7799300
## [2,] 3.4282674 -0.1134456
## [3,] 0.1046710 -2.2226077
## [4,] -3.8913673 -1.6749052
## [5,] -4.6739457 0.5839700
## [6,] 0.2567084 4.3028639
## [7,] 3.3720914 5.5555957
## [8,] 8.7833806 5.8795864
## [9,] 6.0484350 2.4575742
##
## [[3]]
## x.hull y.hull
## [1,] 4.6647967 1.13247289
## [2,] 3.2765751 -0.04012134
## [3,] -2.0282040 -1.39281654
## [4,] -3.8256624 0.04403021
## [5,] -3.6063294 1.00672297
## [6,] -0.4575111 3.40653670
## [7,] 2.2622914 4.76117342
## [8,] 4.3252933 5.19425796
## [9,] 5.9447874 5.31265417
## [10,] 7.5239554 4.77036993
## [11,] 7.6659315 4.66161907
##
## [[4]]
## x.hull y.hull
## [1,] 4.5983689 1.30851850
## [2,] 3.3493447 0.05223692
## [3,] 0.2671006 -0.49615966
## [4,] -0.7779849 -0.65223201
## [5,] -2.2875770 -0.74117036
## [6,] -2.8899206 -0.12464422
## [7,] -3.2915567 0.45768691
## [8,] -2.4058426 1.43032118
## [9,] -0.2097324 2.92192406
## [10,] 4.4534722 4.91423341
## [11,] 6.2821192 4.95755806
## [12,] 6.3550803 4.24846664
## [13,] 6.3848104 3.91028877
##
## [[5]]
## x.hull y.hull
## [1,] 3.0740976 0.32263268
## [2,] 2.6873746 0.13000064
## [3,] 2.3383717 0.06150962
## [4,] 0.1058051 -0.32856968
## [5,] -0.7792003 -0.29537180
## [6,] -1.9766330 0.80547850
## [7,] -2.1136445 1.22509279
## [8,] 0.2968950 2.56314297
## [9,] 2.6903067 3.80296506
## [10,] 3.4974408 4.13303755
## [11,] 5.1247679 4.56175176
## [12,] 6.0899759 4.37867315
## [13,] 5.0852300 2.03049774
points(biv, pch = 16, cex = 1.5, col = "blue")
par(op)
It is clear that the correlation in the Swiss data plotted in Fig. 3 corresponds to a positive correlation. Correlations close to either −1 or +1 result in thin ellipses corresponding to the almost perfect linear relationship between the two variables. Figure 3 : Bivariate normal contours
corcon <- function(x, y, correl)
{
nx <- length(x)
ny <- length(y)
z <- matrix(rep(0, nx * ny), nx, ny)
for (i in 1 : nx)
{
for (j in 1 : ny)
{
z[i, j] <- dmvnorm(c(x[i], y[j]), c(0, 0),
matrix(c(1, correl, correl, 1), 2, 2))
}
}
z
}
library(MVA)
library(mvtnorm)
del <- .05 # how fine the grid
lim <- 3.25 # std normals plotted on +/- lim
### pdf(file = "bivnormal.pdf")
par(mfrow = c(2, 4), mar = c(5, 0, 5, 0), cex.lab = 1.5) # Four plots across
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del),
correl = -.5), col = "blue",
xlab = expression(rho == 1), cex.lab = 1.5,
drawlabels = FALSE, axes = FALSE, frame = TRUE)
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 0),
xlab = expression(rho == .73), cex.lab=1.5, col = "blue",
drawlabels = FALSE, axes = FALSE, frame = TRUE)
par(op)
A bivariate normal density function with correlation 0.73, drawn using image and persp, both in the graphics package. The contour plots of Fig 4. Two other graphical methods for depicting the bivariate normal density function are given in Fig. 4 using a heat image and a perspective plot. Figure 4: Heat map and profile
library(MASS)
library(mvtnorm)
library(graphics)
layout(t(matrix(c(1 : 2, rep(0, 2)), 2, 2)), widths=c(1, 1))
del <- .0125 # how fine the grid
lim <- 1.25 # std normals plotted on +/- lim
image(corcon(seq( -lim, lim, del), seq(-lim, lim, del),
correl = 0.73), axes = FALSE)
del <- .1 # how fine the grid
lim <- 2.7 # std normals plotted on +/- lim
persp(corcon(seq(-lim, lim, del), seq( -lim, lim, del),
correl = 0.73),
axes = FALSE, xlab = " ", ylab = " ", box = FALSE,
col = "lightblue", shade = .05)
par(op)
Figure 5: Bivariate confidence ellipse
library(MASS)
library(MVA)
biv <- data1[, 1: 2] # Extract bivariate data
bivCI <- function(s, xbar, n, alpha, m)
# returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
{
x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1)) # m points on a unit circle
y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
cv <- qchisq(1 - alpha, 2) # chisquared critical value
cv <- cv / n # value of quadratic form
for (i in 1 : m)
{
pair <- c(x[i], y[i]) # ith (x,y) pair
q <- pair %*% solve(s, pair) # quadratic form
x[i] <- x[i] * sqrt(cv / q) + xbar[1]
y[i] <- y[i] * sqrt(cv / q) + xbar[2]
}
return(cbind(x, y))
}
#
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 100), type = "l",
col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 100),
type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p",
lwd = 1)
The joint confidence contours are values of μ such that where χ2 is the upper 1 − α quantile of a chi-squared variate with 2 df.An example of this appears in #Fig-5. This figure was drawn in R using the program code for the confidence ellipse of the bivariat.
The bivCI function is defined and returns m points on a 1−α confidence ellipse. The code in this output plots the data. We next use lines to draw the 99 and 95 % confidence ellipses. The last lines statement puts the “+” sign in the center of the figure Figure 6 : Plot data and identify extremes
plot(data1[, 1], data1[, 2], xlab = "x",
ylab = "y", pch = 19, col= "red", cex = 1.5)
ext <- c(3, 9, 29) # identified extremes
text(data1[ext, 1], data1[ext, 2],
labels = row.names(data1)[ext], pos = c(3, 2, 3), col = "blue")
rc <- glm(data1[, 2] ~ data1[, 1])$coefficients # regression coefficients
xb <- range(data1[,1]) * c( .9, 1.1)
yb <- rc[1] + xb * rc[2]
lines(xb, yb, col = "green")
text(12, 8, labels = "regression line", col = "green")
lines(xb, xb, col = "blue") # diagonal line
text(11, 9, labels = "equal rates line", col = "blue")
Figure 6 plots the bivariate data. Two outliers are identified in this bivariate plot, but are not extreme in either of the X & Y. Multivariate outliers can appear but not be excessive in any of their marginal components. Other than these two points, there is nothing to indicate a lack of fit to the bivariate normal distribution. Figure 6 also contains a 45◦ line illustrating that most of the x rates were higher than those in y. The correlation (0.73) of the data values is high across the x & y.
library(mvtnorm) # library with dmvnorm function
biv5 <- function(par) # all five parameter, natural parameteriztions
{
cov <- par[5]* sqrt(par[3] * par[4])
biv5 <- sum(
-dmvnorm(data1, mean = c(par[1], par[2]),
sigma = matrix(c(par[3], cov, cov, par[4]),2, 2),
log = TRUE) )
print(c(par, biv5))
return(biv5)
}
nlm(biv5, c(45, 45, 100, 100, .8), hessian = TRUE) # fits with warnings
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
nlm.out <- nlm(biv5, c(45, 45, 100, 100, .8), hessian = TRUE)
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
## Warning in sqrt(par[3] * par[4]): NaNs produced
## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value
nlm.out$estimate # parameter estimates
nlm.out$hessian # estimated Hessian matrix
solve(nlm.out$hessian) # invert the Hessian matrix
diag(solve(nlm.out$hessian)) # diagonal elements
sqrt(diag(solve(nlm.out$hessian))) # estimated se"s
se <- sqrt(diag(solve(nlm.out$hessian)))
q <- qnorm(.975) # 95% interval quantile
upper <- nlm.out$estimate + q * se # upper 95% CI intervals
lower <- nlm.out$estimate - q * se # lower 95% CI intervals
summary <- data.frame(
cbind(nlm.out$estimate, se, lower, upper),
row.names = c("mean1", "mean2", "var1", "var2", "rho"))
colnames(summary)[1] <- "estimate"
print(summary, digits = 3)
## estimate se lower upper
## mean1 1.809 0.3221 1.178 2.440
## mean2 1.833 0.2066 1.428 2.238
## var1 10.373 1.4673 7.497 13.249
## var2 4.270 0.6040 3.086 5.453
## rho 0.687 0.0527 0.584 0.791
The parameter estimates are approximately normally distributed with these standard errors so we can easily construct confidence intervals as we show in the above summary.