In HW2, you will simulate and analyze various bivariate random vectors, (X, Y ), as discussed in Chapter 6 of the textbook. Let Z1, Z2, Z3, · · · , be the a sequence of independent variables following a standard normal N(0,1) distribution. Each of the (Xi,Yi),i = 1,2,··· ,100 is generated according to the following:

Solution

n<-100
data1<- matrix (data=0, ncol=2,nrow=n)
for (i in 1:n)
  {
    z1 <- rnorm(1) 
    z2 <- rnorm(1)
    z3 <- rnorm(1)
           
    x <- 2*z1 + z2 + 3*z3 + 2
    y <- z1 + 2*z2 +z3 + 2
      
      data1[i,1] <- x
      data1[i,2] <- y
}

plot(data1[,1], data1[,2])

library(pander)
pander(colMeans(data1))

1.809 and 1.833

pander(cov(data1))

10.48	4.621
4.621	4.313

pander(cor(data1))

1	0.6874
0.6874	1

pander(sapply(as.data.frame(data1), sd))

V1	V2
3.237	2.077

pander(var(data1))

10.48	4.621
4.621	4.313

library(MVA)

## Loading required package: HSAUR2

## Loading required package: tools

biv <- data1[,1:2]            #   Extract bivariate data

### pdf(file = "data1.pdf")
op <- par(mfrow=c(2, 3), mar = c(4, 4, 4, 0.25), cex.lab = 1.75)  

plot(biv[, 1], biv[, 2], pch = 16,        #  Naive scatterplot
     cex = 1.5, col = "blue", cex.lab = 1.75,
     xlab = "x", 
     ylab = "y")

bvbox(biv, xlab = "x", yaxt = "n", ylab = "", cex.lab = 1.75,
      pch = 16, cex = 1.5, col = "blue", method = "NOT")

library(aplpack)
nlev <- 5
colors <- heat.colors(9)[3 : (nlev + 2)]
plothulls(biv, n.hull = 5, col.hull = colors, cex.lab = 1.75, 
          xlab = "x", ylab = " ", yaxt = "n", 
          lty.hull = 1 : nlev, density = NA, col = 0, main = " ")

## [[1]]
##          x.hull    y.hull
## [1,] -1.2398702 -3.655009
## [2,] -6.4184525 -4.362746
## [3,] -8.0823277 -1.050031
## [4,] -0.9975848  3.970809
## [5,]  6.6096121  7.515555
## [6,]  9.6240905  5.772245
## [7,]  7.1943715  2.723820
## 
## [[2]]
##           x.hull     y.hull
##  [1,]  4.3555773  0.7799300
##  [2,]  3.4282674 -0.1134456
##  [3,]  0.1046710 -2.2226077
##  [4,] -3.8913673 -1.6749052
##  [5,] -4.6739457  0.5839700
##  [6,]  0.2567084  4.3028639
##  [7,]  3.3720914  5.5555957
##  [8,]  8.7833806  5.8795864
##  [9,]  6.0484350  2.4575742
## 
## [[3]]
##           x.hull      y.hull
##  [1,]  4.6647967  1.13247289
##  [2,]  3.2765751 -0.04012134
##  [3,] -2.0282040 -1.39281654
##  [4,] -3.8256624  0.04403021
##  [5,] -3.6063294  1.00672297
##  [6,] -0.4575111  3.40653670
##  [7,]  2.2622914  4.76117342
##  [8,]  4.3252933  5.19425796
##  [9,]  5.9447874  5.31265417
## [10,]  7.5239554  4.77036993
## [11,]  7.6659315  4.66161907
## 
## [[4]]
##           x.hull      y.hull
##  [1,]  4.5983689  1.30851850
##  [2,]  3.3493447  0.05223692
##  [3,]  0.2671006 -0.49615966
##  [4,] -0.7779849 -0.65223201
##  [5,] -2.2875770 -0.74117036
##  [6,] -2.8899206 -0.12464422
##  [7,] -3.2915567  0.45768691
##  [8,] -2.4058426  1.43032118
##  [9,] -0.2097324  2.92192406
## [10,]  4.4534722  4.91423341
## [11,]  6.2821192  4.95755806
## [12,]  6.3550803  4.24846664
## [13,]  6.3848104  3.91028877
## 
## [[5]]
##           x.hull      y.hull
##  [1,]  3.0740976  0.32263268
##  [2,]  2.6873746  0.13000064
##  [3,]  2.3383717  0.06150962
##  [4,]  0.1058051 -0.32856968
##  [5,] -0.7792003 -0.29537180
##  [6,] -1.9766330  0.80547850
##  [7,] -2.1136445  1.22509279
##  [8,]  0.2968950  2.56314297
##  [9,]  2.6903067  3.80296506
## [10,]  3.4974408  4.13303755
## [11,]  5.1247679  4.56175176
## [12,]  6.0899759  4.37867315
## [13,]  5.0852300  2.03049774

points(biv, pch = 16, cex = 1.5, col = "blue")

par(op)

It is clear that the correlation in the Swiss data plotted in Fig. 3 corresponds to a positive correlation. Correlations close to either −1 or +1 result in thin ellipses corresponding to the almost perfect linear relationship between the two variables. Figure 3 : Bivariate normal contours

corcon <- function(x, y, correl)
{
  nx <- length(x)
  ny <- length(y)
  z <- matrix(rep(0, nx * ny), nx, ny)
  for (i in 1 : nx)
  {
    for (j in 1 : ny)
    {
      z[i, j] <- dmvnorm(c(x[i], y[j]), c(0, 0),
                         matrix(c(1, correl, correl, 1), 2, 2)) 
    }
  }
  z
}

library(MVA)
library(mvtnorm)

del <- .05      # how fine the grid
lim <- 3.25     # std normals plotted on +/- lim

### pdf(file = "bivnormal.pdf")
par(mfrow = c(2, 4), mar = c(5, 0, 5, 0), cex.lab = 1.5)   # Four plots across
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 
               correl = -.5), col = "blue",
        xlab = expression(rho == 1), cex.lab = 1.5,
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
contour(corcon(seq(-lim, lim, del), seq(-lim, lim, del), 0),
        xlab = expression(rho == .73), cex.lab=1.5, col = "blue",
        drawlabels = FALSE, axes = FALSE, frame = TRUE)
par(op)

A bivariate normal density function with correlation 0.73, drawn using image and persp, both in the graphics package. The contour plots of Fig 4. Two other graphical methods for depicting the bivariate normal density function are given in Fig. 4 using a heat image and a perspective plot. Figure 4: Heat map and profile

library(MASS)
library(mvtnorm)
library(graphics)

layout(t(matrix(c(1 : 2, rep(0, 2)), 2, 2)), widths=c(1, 1))

del <- .0125      # how fine the grid
lim <- 1.25      # std normals plotted on +/- lim

image(corcon(seq( -lim, lim, del), seq(-lim, lim, del), 
             correl = 0.73), axes = FALSE) 

del <- .1        # how fine the grid
lim <- 2.7       # std normals plotted on +/- lim

persp(corcon(seq(-lim, lim, del), seq( -lim, lim, del), 
             correl = 0.73),
      axes = FALSE, xlab = " ", ylab = " ", box = FALSE, 
      col = "lightblue", shade = .05)

par(op)

Figure 5: Bivariate confidence ellipse

library(MASS)
library(MVA)

biv <- data1[, 1: 2]          #   Extract bivariate data

bivCI <- function(s, xbar, n, alpha, m)
  #  returns m (x,y) coordinates of 1-alpha joint confidence ellipse of mean
  
{
  x <- sin( 2* pi * (0 : (m - 1) )/ (m - 1))    # m points on a unit circle
  y <- cos( 2* pi * (0 : (m - 1)) / (m - 1))
  cv <-  qchisq(1 - alpha, 2)                   # chisquared critical value
  cv <- cv / n                                  # value of quadratic form
  for (i in 1 : m)
  {
    pair <- c(x[i], y[i])        # ith (x,y) pair
    q <- pair %*% solve(s, pair)  # quadratic form
    x[i] <- x[i] * sqrt(cv / q) + xbar[1]
    y[i] <- y[i] * sqrt(cv / q) + xbar[2]
  }
  return(cbind(x, y))
}
#
plot(biv, col = "red", pch = 16, cex.lab = 1.5)
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .01, 100), type = "l",
      col = "blue")
lines(bivCI(var(biv), colMeans(biv), dim(biv)[1], .05, 100), 
      type = "l", col = "green", lwd = 1)
lines(colMeans(biv)[1], colMeans(biv)[2], pch = 3, cex = .8, type = "p", 
      lwd = 1)

The joint confidence contours are values of μ such that where χ2 is the upper 1 − α quantile of a chi-squared variate with 2 df.An example of this appears in #Fig-5. This figure was drawn in R using the program code for the confidence ellipse of the bivariat.

The bivCI function is defined and returns m points on a 1−α confidence ellipse. The code in this output plots the data. We next use lines to draw the 99 and 95 % confidence ellipses. The last lines statement puts the “+” sign in the center of the figure Figure 6 : Plot data and identify extremes

plot(data1[, 1], data1[, 2], xlab = "x", 
     ylab = "y", pch = 19, col= "red", cex = 1.5)
ext <- c(3, 9, 29)      # identified extremes
text(data1[ext, 1], data1[ext, 2], 
     labels = row.names(data1)[ext], pos = c(3, 2, 3), col = "blue")

rc <- glm(data1[, 2] ~ data1[, 1])$coefficients # regression coefficients
xb <- range(data1[,1]) * c( .9, 1.1)
yb <- rc[1] + xb * rc[2]
lines(xb, yb, col = "green")
text(12, 8, labels = "regression line", col = "green")

lines(xb, xb, col = "blue")  # diagonal line
text(11, 9, labels = "equal rates line", col = "blue")

Figure 6 plots the bivariate data. Two outliers are identified in this bivariate plot, but are not extreme in either of the X & Y. Multivariate outliers can appear but not be excessive in any of their marginal components. Other than these two points, there is nothing to indicate a lack of fit to the bivariate normal distribution. Figure 6 also contains a 45◦ line illustrating that most of the x rates were higher than those in y. The correlation (0.73) of the data values is high across the x & y.

library(mvtnorm)       # library with dmvnorm function
biv5 <- function(par)  # all five parameter, natural parameteriztions
{
  cov <- par[5]* sqrt(par[3] * par[4]) 
  biv5 <- sum(
    -dmvnorm(data1, mean = c(par[1], par[2]),
             sigma = matrix(c(par[3], cov, cov, par[4]),2, 2), 
             log = TRUE) )
  print(c(par, biv5))
  return(biv5)
}

nlm(biv5, c(45, 45, 100, 100, .8), hessian = TRUE)  # fits with warnings

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

nlm.out <- nlm(biv5, c(45, 45, 100, 100, .8), hessian = TRUE)

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

## Warning in sqrt(par[3] * par[4]): NaNs produced

## Warning in nlm(biv5, c(45, 45, 100, 100, 0.8), hessian = TRUE): NA/Inf replaced
## by maximum positive value

nlm.out$estimate                    # parameter estimates

nlm.out$hessian                     # estimated Hessian matrix

solve(nlm.out$hessian)              # invert the Hessian matrix

diag(solve(nlm.out$hessian))        # diagonal elements

sqrt(diag(solve(nlm.out$hessian)))  # estimated se"s

se <- sqrt(diag(solve(nlm.out$hessian))) 
q <- qnorm(.975)                      # 95% interval quantile
upper <- nlm.out$estimate + q * se    # upper 95% CI intervals
lower <- nlm.out$estimate - q * se    # lower 95% CI intervals
summary <- data.frame(              
  cbind(nlm.out$estimate, se, lower, upper),
  row.names = c("mean1", "mean2", "var1", "var2", "rho"))
colnames(summary)[1] <- "estimate"
print(summary, digits = 3)

##       estimate     se lower  upper
## mean1    1.809 0.3221 1.178  2.440
## mean2    1.833 0.2066 1.428  2.238
## var1    10.373 1.4673 7.497 13.249
## var2     4.270 0.6040 3.086  5.453
## rho      0.687 0.0527 0.584  0.791

The parameter estimates are approximately normally distributed with these standard errors so we can easily construct confidence intervals as we show in the above summary.

Test

Hasan

10/12/2020

Solution